aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2016-06-18 20:25:08 -0400
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2016-06-18 20:25:08 -0400
commit6ea24cf79e055f0a62a64baa8587e2254a493c7b (patch)
treec5cd6113ed93854b1bc30cd471c366f080c4be2f /net
parent540c26087bfbad6ea72758b76b16ae6282a73fea (diff)
parent488326947cd1f038da8d2c9068a0d07b913b7983 (diff)
Merge branch 'cec-defines' into for-linus
Let's bring in HDMI CEC defines to ease merging CEC support in the next merge window.
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/core.c39
-rw-r--r--net/6lowpan/debugfs.c247
-rw-r--r--net/6lowpan/iphc.c413
-rw-r--r--net/8021q/vlan.c2
-rw-r--r--net/8021q/vlan_dev.c10
-rw-r--r--net/8021q/vlanproc.c3
-rw-r--r--net/8021q/vlanproc.h4
-rw-r--r--net/9p/trans_rdma.c86
-rw-r--r--net/Kconfig24
-rw-r--r--net/Makefile1
-rw-r--r--net/ax25/ax25_ip.c15
-rw-r--r--net/batman-adv/Kconfig16
-rw-r--r--net/batman-adv/Makefile5
-rw-r--r--net/batman-adv/bat_algo.h30
-rw-r--r--net/batman-adv/bat_iv_ogm.c115
-rw-r--r--net/batman-adv/bat_v.c359
-rw-r--r--net/batman-adv/bat_v_elp.c515
-rw-r--r--net/batman-adv/bat_v_elp.h33
-rw-r--r--net/batman-adv/bat_v_ogm.c833
-rw-r--r--net/batman-adv/bat_v_ogm.h36
-rw-r--r--net/batman-adv/bitarray.c14
-rw-r--r--net/batman-adv/bitarray.h14
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c329
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h2
-rw-r--r--net/batman-adv/debugfs.c8
-rw-r--r--net/batman-adv/debugfs.h2
-rw-r--r--net/batman-adv/distributed-arp-table.c117
-rw-r--r--net/batman-adv/distributed-arp-table.h2
-rw-r--r--net/batman-adv/fragmentation.c34
-rw-r--r--net/batman-adv/fragmentation.h4
-rw-r--r--net/batman-adv/gateway_client.c127
-rw-r--r--net/batman-adv/gateway_client.h2
-rw-r--r--net/batman-adv/gateway_common.c8
-rw-r--r--net/batman-adv/gateway_common.h4
-rw-r--r--net/batman-adv/hard-interface.c60
-rw-r--r--net/batman-adv/hard-interface.h18
-rw-r--r--net/batman-adv/hash.c2
-rw-r--r--net/batman-adv/hash.h24
-rw-r--r--net/batman-adv/icmp_socket.c10
-rw-r--r--net/batman-adv/icmp_socket.h2
-rw-r--r--net/batman-adv/main.c112
-rw-r--r--net/batman-adv/main.h39
-rw-r--r--net/batman-adv/multicast.c44
-rw-r--r--net/batman-adv/multicast.h4
-rw-r--r--net/batman-adv/network-coding.c164
-rw-r--r--net/batman-adv/network-coding.h2
-rw-r--r--net/batman-adv/originator.c257
-rw-r--r--net/batman-adv/originator.h18
-rw-r--r--net/batman-adv/packet.h68
-rw-r--r--net/batman-adv/routing.c121
-rw-r--r--net/batman-adv/routing.h5
-rw-r--r--net/batman-adv/send.c104
-rw-r--r--net/batman-adv/send.h16
-rw-r--r--net/batman-adv/soft-interface.c81
-rw-r--r--net/batman-adv/soft-interface.h4
-rw-r--r--net/batman-adv/sysfs.c162
-rw-r--r--net/batman-adv/sysfs.h2
-rw-r--r--net/batman-adv/translation-table.c359
-rw-r--r--net/batman-adv/translation-table.h2
-rw-r--r--net/batman-adv/types.h167
-rw-r--r--net/bluetooth/Kconfig9
-rw-r--r--net/bluetooth/Makefile1
-rw-r--r--net/bluetooth/hci_conn.c17
-rw-r--r--net/bluetooth/hci_core.c7
-rw-r--r--net/bluetooth/hci_request.c56
-rw-r--r--net/bluetooth/hci_request.h2
-rw-r--r--net/bluetooth/leds.c74
-rw-r--r--net/bluetooth/leds.h16
-rw-r--r--net/bluetooth/mgmt.c26
-rw-r--r--net/bluetooth/smp.c135
-rw-r--r--net/bridge/br_forward.c1
-rw-r--r--net/bridge/br_if.c59
-rw-r--r--net/bridge/br_input.c16
-rw-r--r--net/bridge/br_ioctl.c5
-rw-r--r--net/bridge/br_mdb.c174
-rw-r--r--net/bridge/br_multicast.c111
-rw-r--r--net/bridge/br_netfilter_hooks.c68
-rw-r--r--net/bridge/br_netlink.c1
-rw-r--r--net/bridge/br_private.h10
-rw-r--r--net/bridge/br_stp.c27
-rw-r--r--net/bridge/br_stp_if.c2
-rw-r--r--net/bridge/br_stp_timer.c1
-rw-r--r--net/bridge/br_vlan.c11
-rw-r--r--net/bridge/netfilter/ebtables.c10
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c24
-rw-r--r--net/caif/cfpkt_skbuff.c2
-rw-r--r--net/ceph/auth.c8
-rw-r--r--net/ceph/auth_none.c71
-rw-r--r--net/ceph/auth_none.h3
-rw-r--r--net/ceph/auth_x.c21
-rw-r--r--net/ceph/auth_x.h1
-rw-r--r--net/ceph/ceph_common.c4
-rw-r--r--net/ceph/crypto.c101
-rw-r--r--net/ceph/debugfs.c17
-rw-r--r--net/ceph/messenger.c35
-rw-r--r--net/ceph/mon_client.c457
-rw-r--r--net/ceph/osd_client.c115
-rw-r--r--net/ceph/pagelist.c4
-rw-r--r--net/ceph/pagevec.c32
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/dev.c40
-rw-r--r--net/core/devlink.c738
-rw-r--r--net/core/dst.c10
-rw-r--r--net/core/dst_cache.c168
-rw-r--r--net/core/ethtool.c638
-rw-r--r--net/core/filter.c290
-rw-r--r--net/core/flow.c14
-rw-r--r--net/core/flow_dissector.c58
-rw-r--r--net/core/gen_estimator.c2
-rw-r--r--net/core/gen_stats.c1
-rw-r--r--net/core/hwbm.c87
-rw-r--r--net/core/lwtunnel.c37
-rw-r--r--net/core/net-sysfs.c18
-rw-r--r--net/core/netclassid_cgroup.c1
-rw-r--r--net/core/netprio_cgroup.c1
-rw-r--r--net/core/pktgen.c4
-rw-r--r--net/core/rtnetlink.c138
-rw-r--r--net/core/skbuff.c172
-rw-r--r--net/core/sock.c16
-rw-r--r--net/dccp/ipv4.c4
-rw-r--r--net/dccp/ipv6.c4
-rw-r--r--net/decnet/dn_route.c9
-rw-r--r--net/dsa/dsa.c43
-rw-r--r--net/dsa/slave.c213
-rw-r--r--net/ethernet/eth.c3
-rw-r--r--net/ieee802154/6lowpan/core.c7
-rw-r--r--net/ieee802154/socket.c17
-rw-r--r--net/ipv4/Kconfig9
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c69
-rw-r--r--net/ipv4/arp.c41
-rw-r--r--net/ipv4/devinet.c70
-rw-r--r--net/ipv4/fib_frontend.c26
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/fou.c58
-rw-r--r--net/ipv4/gre_offload.c117
-rw-r--r--net/ipv4/icmp.c5
-rw-r--r--net/ipv4/igmp.c78
-rw-r--r--net/ipv4/inet_connection_sock.c254
-rw-r--r--net/ipv4/inet_diag.c7
-rw-r--r--net/ipv4/inet_hashtables.c239
-rw-r--r--net/ipv4/inet_lro.c374
-rw-r--r--net/ipv4/ip_forward.c1
-rw-r--r--net/ipv4/ip_fragment.c29
-rw-r--r--net/ipv4/ip_gre.c84
-rw-r--r--net/ipv4/ip_input.c30
-rw-r--r--net/ipv4/ip_options.c14
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/ip_sockglue.c10
-rw-r--r--net/ipv4/ip_tunnel.c78
-rw-r--r--net/ipv4/ip_tunnel_core.c43
-rw-r--r--net/ipv4/ip_vti.c18
-rw-r--r--net/ipv4/ipip.c4
-rw-r--r--net/ipv4/netfilter/arp_tables.c109
-rw-r--r--net/ipv4/netfilter/arptable_filter.c44
-rw-r--r--net/ipv4/netfilter/ip_tables.c111
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c55
-rw-r--r--net/ipv4/netfilter/iptable_filter.c44
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c41
-rw-r--r--net/ipv4/netfilter/iptable_nat.c41
-rw-r--r--net/ipv4/netfilter/iptable_raw.c38
-rw-r--r--net/ipv4/netfilter/iptable_security.c44
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c30
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c12
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c7
-rw-r--r--net/ipv4/ping.c11
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/route.c19
-rw-r--r--net/ipv4/syncookies.c7
-rw-r--r--net/ipv4/sysctl_net_ipv4.c236
-rw-r--r--net/ipv4/tcp.c84
-rw-r--r--net/ipv4/tcp_fastopen.c79
-rw-r--r--net/ipv4/tcp_input.c180
-rw-r--r--net/ipv4/tcp_ipv4.c48
-rw-r--r--net/ipv4/tcp_metrics.c3
-rw-r--r--net/ipv4/tcp_minisocks.c5
-rw-r--r--net/ipv4/tcp_offload.c8
-rw-r--r--net/ipv4/tcp_output.c32
-rw-r--r--net/ipv4/tcp_probe.c8
-rw-r--r--net/ipv4/tcp_timer.c23
-rw-r--r--net/ipv4/udp.c53
-rw-r--r--net/ipv4/udp_offload.c122
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/addrconf.c214
-rw-r--r--net/ipv6/af_inet6.c6
-rw-r--r--net/ipv6/datagram.c169
-rw-r--r--net/ipv6/icmp.c5
-rw-r--r--net/ipv6/ila/ila_common.c1
-rw-r--r--net/ipv6/ila/ila_lwt.c3
-rw-r--r--net/ipv6/inet6_connection_sock.c2
-rw-r--r--net/ipv6/inet6_hashtables.c78
-rw-r--r--net/ipv6/ip6_checksum.c26
-rw-r--r--net/ipv6/ip6_fib.c91
-rw-r--r--net/ipv6/ip6_gre.c12
-rw-r--r--net/ipv6/ip6_input.c12
-rw-r--r--net/ipv6/ip6_offload.c15
-rw-r--r--net/ipv6/ip6_output.c9
-rw-r--r--net/ipv6/ip6_tunnel.c105
-rw-r--r--net/ipv6/ip6_udp_tunnel.c6
-rw-r--r--net/ipv6/ip6_vti.c2
-rw-r--r--net/ipv6/ndisc.c9
-rw-r--r--net/ipv6/netfilter/ip6_tables.c113
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c47
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c46
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c41
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c46
-rw-r--r--net/ipv6/netfilter/ip6table_security.c44
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c30
-rw-r--r--net/ipv6/netfilter/nft_masq_ipv6.c7
-rw-r--r--net/ipv6/ping.c59
-rw-r--r--net/ipv6/reassembly.c6
-rw-r--r--net/ipv6/route.c21
-rw-r--r--net/ipv6/sit.c29
-rw-r--r--net/ipv6/syncookies.c5
-rw-r--r--net/ipv6/tcp_ipv6.c42
-rw-r--r--net/ipv6/udp.c69
-rw-r--r--net/ipv6/udp_offload.c8
-rw-r--r--net/irda/ircomm/ircomm_tty.c15
-rw-r--r--net/irda/ircomm/ircomm_tty_ioctl.c13
-rw-r--r--net/kcm/Kconfig10
-rw-r--r--net/kcm/Makefile3
-rw-r--r--net/kcm/kcmproc.c426
-rw-r--r--net/kcm/kcmsock.c2409
-rw-r--r--net/l2tp/l2tp_core.c4
-rw-r--r--net/l2tp/l2tp_ip.c8
-rw-r--r--net/l2tp/l2tp_ip6.c11
-rw-r--r--net/l3mdev/l3mdev.c11
-rw-r--r--net/llc/af_llc.c5
-rw-r--r--net/mac80211/agg-rx.c50
-rw-r--r--net/mac80211/agg-tx.c53
-rw-r--r--net/mac80211/cfg.c34
-rw-r--r--net/mac80211/chan.c6
-rw-r--r--net/mac80211/debugfs.c1
-rw-r--r--net/mac80211/debugfs_key.c5
-rw-r--r--net/mac80211/driver-ops.c10
-rw-r--r--net/mac80211/driver-ops.h4
-rw-r--r--net/mac80211/ht.c5
-rw-r--r--net/mac80211/ibss.c32
-rw-r--r--net/mac80211/ieee80211_i.h41
-rw-r--r--net/mac80211/iface.c18
-rw-r--r--net/mac80211/key.c86
-rw-r--r--net/mac80211/key.h10
-rw-r--r--net/mac80211/mesh.c9
-rw-r--r--net/mac80211/mesh.h3
-rw-r--r--net/mac80211/mesh_hwmp.c8
-rw-r--r--net/mac80211/mesh_pathtbl.c111
-rw-r--r--net/mac80211/mesh_plink.c10
-rw-r--r--net/mac80211/mlme.c79
-rw-r--r--net/mac80211/rx.c141
-rw-r--r--net/mac80211/sta_info.c49
-rw-r--r--net/mac80211/sta_info.h25
-rw-r--r--net/mac80211/status.c2
-rw-r--r--net/mac80211/tdls.c43
-rw-r--r--net/mac80211/tkip.c36
-rw-r--r--net/mac80211/tkip.h2
-rw-r--r--net/mac80211/trace.h43
-rw-r--r--net/mac80211/tx.c113
-rw-r--r--net/mac80211/util.c116
-rw-r--r--net/mac80211/vht.c87
-rw-r--r--net/mac80211/wpa.c11
-rw-r--r--net/mac802154/llsec.c41
-rw-r--r--net/mac802154/llsec.h3
-rw-r--r--net/mac802154/main.c2
-rw-r--r--net/mpls/af_mpls.c3
-rw-r--r--net/mpls/mpls_iptunnel.c1
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c2
-rw-r--r--net/netfilter/ipset/ip_set_core.c36
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h2
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c3
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c57
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c38
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c17
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c12
-rw-r--r--net/netfilter/nf_conntrack_core.c12
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c4
-rw-r--r--net/netfilter/nf_dup_netdev.c1
-rw-r--r--net/netfilter/nfnetlink.c7
-rw-r--r--net/netfilter/nfnetlink_acct.c5
-rw-r--r--net/netfilter/nfnetlink_log.c5
-rw-r--r--net/netfilter/nfnetlink_queue.c13
-rw-r--r--net/netfilter/nft_compat.c6
-rw-r--r--net/netfilter/nft_masq.c51
-rw-r--r--net/netfilter/nft_meta.c11
-rw-r--r--net/netfilter/x_tables.c68
-rw-r--r--net/netfilter/xt_IDLETIMER.c1
-rw-r--r--net/netfilter/xt_TPROXY.c31
-rw-r--r--net/netfilter/xt_osf.c2
-rw-r--r--net/netfilter/xt_socket.c28
-rw-r--r--net/netlabel/netlabel_domainhash.c4
-rw-r--r--net/netlabel/netlabel_unlabeled.c6
-rw-r--r--net/netlink/Kconfig9
-rw-r--r--net/netlink/af_netlink.c786
-rw-r--r--net/netlink/af_netlink.h15
-rw-r--r--net/netlink/diag.c39
-rw-r--r--net/netlink/genetlink.c25
-rw-r--r--net/nfc/llcp_commands.c4
-rw-r--r--net/nfc/llcp_sock.c6
-rw-r--r--net/nfc/nci/uart.c9
-rw-r--r--net/openvswitch/Kconfig6
-rw-r--r--net/openvswitch/actions.c12
-rw-r--r--net/openvswitch/conntrack.c676
-rw-r--r--net/openvswitch/conntrack.h3
-rw-r--r--net/openvswitch/datapath.c108
-rw-r--r--net/openvswitch/datapath.h4
-rw-r--r--net/openvswitch/flow.h2
-rw-r--r--net/openvswitch/flow_netlink.c9
-rw-r--r--net/openvswitch/vport-geneve.c2
-rw-r--r--net/openvswitch/vport-internal_dev.c10
-rw-r--r--net/openvswitch/vport-netdev.c2
-rw-r--r--net/openvswitch/vport.h7
-rw-r--r--net/packet/af_packet.c475
-rw-r--r--net/phonet/socket.c6
-rw-r--r--net/rds/Kconfig7
-rw-r--r--net/rds/Makefile4
-rw-r--r--net/rds/af_rds.c26
-rw-r--r--net/rds/cong.c4
-rw-r--r--net/rds/ib.c47
-rw-r--r--net/rds/ib.h37
-rw-r--r--net/rds/ib_cm.c61
-rw-r--r--net/rds/ib_fmr.c248
-rw-r--r--net/rds/ib_frmr.c376
-rw-r--r--net/rds/ib_mr.h148
-rw-r--r--net/rds/ib_rdma.c495
-rw-r--r--net/rds/ib_recv.c2
-rw-r--r--net/rds/ib_send.c6
-rw-r--r--net/rds/ib_stats.c2
-rw-r--r--net/rds/iw.c312
-rw-r--r--net/rds/iw.h398
-rw-r--r--net/rds/iw_cm.c769
-rw-r--r--net/rds/iw_rdma.c837
-rw-r--r--net/rds/iw_recv.c904
-rw-r--r--net/rds/iw_ring.c169
-rw-r--r--net/rds/iw_send.c981
-rw-r--r--net/rds/iw_stats.c95
-rw-r--r--net/rds/iw_sysctl.c123
-rw-r--r--net/rds/page.c8
-rw-r--r--net/rds/rdma_transport.c21
-rw-r--r--net/rds/rdma_transport.h5
-rw-r--r--net/rds/rds.h1
-rw-r--r--net/rds/recv.c20
-rw-r--r--net/rds/tcp.c149
-rw-r--r--net/rds/tcp.h4
-rw-r--r--net/rds/tcp_connect.c8
-rw-r--r--net/rds/tcp_listen.c54
-rw-r--r--net/rfkill/Kconfig3
-rw-r--r--net/rfkill/core.c172
-rw-r--r--net/rfkill/rfkill-gpio.c24
-rw-r--r--net/rxrpc/af_rxrpc.c39
-rw-r--r--net/rxrpc/ar-accept.c56
-rw-r--r--net/rxrpc/ar-ack.c225
-rw-r--r--net/rxrpc/ar-call.c88
-rw-r--r--net/rxrpc/ar-connection.c85
-rw-r--r--net/rxrpc/ar-connevent.c79
-rw-r--r--net/rxrpc/ar-error.c13
-rw-r--r--net/rxrpc/ar-input.c118
-rw-r--r--net/rxrpc/ar-internal.h220
-rw-r--r--net/rxrpc/ar-key.c12
-rw-r--r--net/rxrpc/ar-local.c29
-rw-r--r--net/rxrpc/ar-output.c75
-rw-r--r--net/rxrpc/ar-peer.c2
-rw-r--r--net/rxrpc/ar-proc.c10
-rw-r--r--net/rxrpc/ar-recvmsg.c20
-rw-r--r--net/rxrpc/ar-security.c6
-rw-r--r--net/rxrpc/ar-skbuff.c7
-rw-r--r--net/rxrpc/ar-transport.c3
-rw-r--r--net/rxrpc/rxkad.c337
-rw-r--r--net/rxrpc/sysctl.c34
-rw-r--r--net/sched/Kconfig22
-rw-r--r--net/sched/Makefile3
-rw-r--r--net/sched/act_api.c137
-rw-r--r--net/sched/act_bpf.c52
-rw-r--r--net/sched/act_connmark.c54
-rw-r--r--net/sched/act_csum.c67
-rw-r--r--net/sched/act_gact.c55
-rw-r--r--net/sched/act_ife.c876
-rw-r--r--net/sched/act_ipt.c146
-rw-r--r--net/sched/act_meta_mark.c79
-rw-r--r--net/sched/act_meta_skbprio.c76
-rw-r--r--net/sched/act_mirred.c72
-rw-r--r--net/sched/act_nat.c72
-rw-r--r--net/sched/act_pedit.c54
-rw-r--r--net/sched/act_police.c52
-rw-r--r--net/sched/act_simple.c71
-rw-r--r--net/sched/act_skbedit.c70
-rw-r--r--net/sched/act_vlan.c74
-rw-r--r--net/sched/cls_bpf.c13
-rw-r--r--net/sched/cls_flower.c64
-rw-r--r--net/sched/cls_u32.c118
-rw-r--r--net/sched/sch_api.c10
-rw-r--r--net/sched/sch_cbq.c12
-rw-r--r--net/sched/sch_choke.c6
-rw-r--r--net/sched/sch_codel.c10
-rw-r--r--net/sched/sch_drr.c9
-rw-r--r--net/sched/sch_dsmark.c13
-rw-r--r--net/sched/sch_fq.c4
-rw-r--r--net/sched/sch_fq_codel.c17
-rw-r--r--net/sched/sch_generic.c6
-rw-r--r--net/sched/sch_hfsc.c9
-rw-r--r--net/sched/sch_hhf.c10
-rw-r--r--net/sched/sch_htb.c24
-rw-r--r--net/sched/sch_mq.c2
-rw-r--r--net/sched/sch_mqprio.c11
-rw-r--r--net/sched/sch_multiq.c16
-rw-r--r--net/sched/sch_netem.c74
-rw-r--r--net/sched/sch_pie.c5
-rw-r--r--net/sched/sch_prio.c15
-rw-r--r--net/sched/sch_qfq.c9
-rw-r--r--net/sched/sch_red.c10
-rw-r--r--net/sched/sch_sfb.c10
-rw-r--r--net/sched/sch_sfq.c16
-rw-r--r--net/sched/sch_tbf.c15
-rw-r--r--net/sctp/associola.c7
-rw-r--r--net/sctp/auth.c36
-rw-r--r--net/sctp/bind_addr.c14
-rw-r--r--net/sctp/chunk.c19
-rw-r--r--net/sctp/endpointola.c1
-rw-r--r--net/sctp/input.c9
-rw-r--r--net/sctp/output.c15
-rw-r--r--net/sctp/outqueue.c51
-rw-r--r--net/sctp/probe.c10
-rw-r--r--net/sctp/proc.c2
-rw-r--r--net/sctp/protocol.c1
-rw-r--r--net/sctp/sm_make_chunk.c137
-rw-r--r--net/sctp/sm_sideeffect.c65
-rw-r--r--net/sctp/socket.c23
-rw-r--r--net/sctp/transport.c25
-rw-r--r--net/socket.c64
-rw-r--r--net/sunrpc/Makefile3
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c12
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c356
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_keys.c12
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c90
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c22
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c28
-rw-r--r--net/sunrpc/auth_null.c4
-rw-r--r--net/sunrpc/auth_unix.c6
-rw-r--r--net/sunrpc/cache.c10
-rw-r--r--net/sunrpc/clnt.c328
-rw-r--r--net/sunrpc/rpc_pipe.c4
-rw-r--r--net/sunrpc/rpcb_clnt.c10
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/sunrpc/socklib.c6
-rw-r--r--net/sunrpc/xdr.c50
-rw-r--r--net/sunrpc/xprt.c42
-rw-r--r--net/sunrpc/xprtmultipath.c475
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c28
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c143
-rw-r--r--net/sunrpc/xprtrdma/physical_ops.c1
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c108
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c17
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_marshal.c64
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c60
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c196
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c445
-rw-r--r--net/sunrpc/xprtrdma/verbs.c204
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h14
-rw-r--r--net/sunrpc/xprtsock.c4
-rw-r--r--net/switchdev/switchdev.c8
-rw-r--r--net/tipc/bcast.c5
-rw-r--r--net/tipc/bcast.h1
-rw-r--r--net/tipc/bearer.c18
-rw-r--r--net/tipc/core.c1
-rw-r--r--net/tipc/core.h3
-rw-r--r--net/tipc/link.c167
-rw-r--r--net/tipc/link.h7
-rw-r--r--net/tipc/name_distr.c35
-rw-r--r--net/tipc/name_table.c20
-rw-r--r--net/tipc/net.c7
-rw-r--r--net/tipc/netlink.c69
-rw-r--r--net/tipc/netlink.h11
-rw-r--r--net/tipc/netlink_compat.c2
-rw-r--r--net/tipc/node.c135
-rw-r--r--net/tipc/server.c4
-rw-r--r--net/tipc/socket.c9
-rw-r--r--net/tipc/subscr.c132
-rw-r--r--net/tipc/subscr.h11
-rw-r--r--net/tipc/udp_media.c44
-rw-r--r--net/unix/af_unix.c5
-rw-r--r--net/vmw_vsock/af_vsock.c176
-rw-r--r--net/vmw_vsock/vmci_transport.c9
-rw-r--r--net/wireless/Kconfig25
-rw-r--r--net/wireless/core.c10
-rw-r--r--net/wireless/lib80211_crypt_tkip.c99
-rw-r--r--net/wireless/lib80211_crypt_wep.c46
-rw-r--r--net/wireless/mlme.c3
-rw-r--r--net/wireless/nl80211.c31
-rw-r--r--net/wireless/radiotap.c1
-rw-r--r--net/wireless/reg.c122
-rw-r--r--net/wireless/sme.c9
-rw-r--r--net/wireless/util.c277
-rw-r--r--net/x25/x25_facilities.c1
-rw-r--r--net/xfrm/xfrm_algo.c7
-rw-r--r--net/xfrm/xfrm_input.c3
-rw-r--r--net/xfrm/xfrm_output.c3
-rw-r--r--net/xfrm/xfrm_user.c2
500 files changed, 22349 insertions, 14088 deletions
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index faf65baed617..34e44c0c0836 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -20,7 +20,7 @@
20int lowpan_register_netdevice(struct net_device *dev, 20int lowpan_register_netdevice(struct net_device *dev,
21 enum lowpan_lltypes lltype) 21 enum lowpan_lltypes lltype)
22{ 22{
23 int ret; 23 int i, ret;
24 24
25 dev->addr_len = EUI64_ADDR_LEN; 25 dev->addr_len = EUI64_ADDR_LEN;
26 dev->type = ARPHRD_6LOWPAN; 26 dev->type = ARPHRD_6LOWPAN;
@@ -29,6 +29,10 @@ int lowpan_register_netdevice(struct net_device *dev,
29 29
30 lowpan_priv(dev)->lltype = lltype; 30 lowpan_priv(dev)->lltype = lltype;
31 31
32 spin_lock_init(&lowpan_priv(dev)->ctx.lock);
33 for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++)
34 lowpan_priv(dev)->ctx.table[i].id = i;
35
32 ret = register_netdevice(dev); 36 ret = register_netdevice(dev);
33 if (ret < 0) 37 if (ret < 0)
34 return ret; 38 return ret;
@@ -68,6 +72,32 @@ void lowpan_unregister_netdev(struct net_device *dev)
68} 72}
69EXPORT_SYMBOL(lowpan_unregister_netdev); 73EXPORT_SYMBOL(lowpan_unregister_netdev);
70 74
75static int lowpan_event(struct notifier_block *unused,
76 unsigned long event, void *ptr)
77{
78 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
79 int i;
80
81 if (dev->type != ARPHRD_6LOWPAN)
82 return NOTIFY_DONE;
83
84 switch (event) {
85 case NETDEV_DOWN:
86 for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++)
87 clear_bit(LOWPAN_IPHC_CTX_FLAG_ACTIVE,
88 &lowpan_priv(dev)->ctx.table[i].flags);
89 break;
90 default:
91 return NOTIFY_DONE;
92 }
93
94 return NOTIFY_OK;
95}
96
97static struct notifier_block lowpan_notifier = {
98 .notifier_call = lowpan_event,
99};
100
71static int __init lowpan_module_init(void) 101static int __init lowpan_module_init(void)
72{ 102{
73 int ret; 103 int ret;
@@ -76,6 +106,12 @@ static int __init lowpan_module_init(void)
76 if (ret < 0) 106 if (ret < 0)
77 return ret; 107 return ret;
78 108
109 ret = register_netdevice_notifier(&lowpan_notifier);
110 if (ret < 0) {
111 lowpan_debugfs_exit();
112 return ret;
113 }
114
79 request_module_nowait("ipv6"); 115 request_module_nowait("ipv6");
80 116
81 request_module_nowait("nhc_dest"); 117 request_module_nowait("nhc_dest");
@@ -92,6 +128,7 @@ static int __init lowpan_module_init(void)
92static void __exit lowpan_module_exit(void) 128static void __exit lowpan_module_exit(void)
93{ 129{
94 lowpan_debugfs_exit(); 130 lowpan_debugfs_exit();
131 unregister_netdevice_notifier(&lowpan_notifier);
95} 132}
96 133
97module_init(lowpan_module_init); 134module_init(lowpan_module_init);
diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c
index 88eef84df0fc..0793a8157472 100644
--- a/net/6lowpan/debugfs.c
+++ b/net/6lowpan/debugfs.c
@@ -16,19 +16,266 @@
16 16
17#include "6lowpan_i.h" 17#include "6lowpan_i.h"
18 18
19#define LOWPAN_DEBUGFS_CTX_PFX_NUM_ARGS 8
20
19static struct dentry *lowpan_debugfs; 21static struct dentry *lowpan_debugfs;
20 22
23static int lowpan_ctx_flag_active_set(void *data, u64 val)
24{
25 struct lowpan_iphc_ctx *ctx = data;
26
27 if (val != 0 && val != 1)
28 return -EINVAL;
29
30 if (val)
31 set_bit(LOWPAN_IPHC_CTX_FLAG_ACTIVE, &ctx->flags);
32 else
33 clear_bit(LOWPAN_IPHC_CTX_FLAG_ACTIVE, &ctx->flags);
34
35 return 0;
36}
37
38static int lowpan_ctx_flag_active_get(void *data, u64 *val)
39{
40 *val = lowpan_iphc_ctx_is_active(data);
41 return 0;
42}
43
44DEFINE_SIMPLE_ATTRIBUTE(lowpan_ctx_flag_active_fops,
45 lowpan_ctx_flag_active_get,
46 lowpan_ctx_flag_active_set, "%llu\n");
47
48static int lowpan_ctx_flag_c_set(void *data, u64 val)
49{
50 struct lowpan_iphc_ctx *ctx = data;
51
52 if (val != 0 && val != 1)
53 return -EINVAL;
54
55 if (val)
56 set_bit(LOWPAN_IPHC_CTX_FLAG_COMPRESSION, &ctx->flags);
57 else
58 clear_bit(LOWPAN_IPHC_CTX_FLAG_COMPRESSION, &ctx->flags);
59
60 return 0;
61}
62
63static int lowpan_ctx_flag_c_get(void *data, u64 *val)
64{
65 *val = lowpan_iphc_ctx_is_compression(data);
66 return 0;
67}
68
69DEFINE_SIMPLE_ATTRIBUTE(lowpan_ctx_flag_c_fops, lowpan_ctx_flag_c_get,
70 lowpan_ctx_flag_c_set, "%llu\n");
71
72static int lowpan_ctx_plen_set(void *data, u64 val)
73{
74 struct lowpan_iphc_ctx *ctx = data;
75 struct lowpan_iphc_ctx_table *t =
76 container_of(ctx, struct lowpan_iphc_ctx_table, table[ctx->id]);
77
78 if (val > 128)
79 return -EINVAL;
80
81 spin_lock_bh(&t->lock);
82 ctx->plen = val;
83 spin_unlock_bh(&t->lock);
84
85 return 0;
86}
87
88static int lowpan_ctx_plen_get(void *data, u64 *val)
89{
90 struct lowpan_iphc_ctx *ctx = data;
91 struct lowpan_iphc_ctx_table *t =
92 container_of(ctx, struct lowpan_iphc_ctx_table, table[ctx->id]);
93
94 spin_lock_bh(&t->lock);
95 *val = ctx->plen;
96 spin_unlock_bh(&t->lock);
97 return 0;
98}
99
100DEFINE_SIMPLE_ATTRIBUTE(lowpan_ctx_plen_fops, lowpan_ctx_plen_get,
101 lowpan_ctx_plen_set, "%llu\n");
102
103static int lowpan_ctx_pfx_show(struct seq_file *file, void *offset)
104{
105 struct lowpan_iphc_ctx *ctx = file->private;
106 struct lowpan_iphc_ctx_table *t =
107 container_of(ctx, struct lowpan_iphc_ctx_table, table[ctx->id]);
108
109 spin_lock_bh(&t->lock);
110 seq_printf(file, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
111 be16_to_cpu(ctx->pfx.s6_addr16[0]),
112 be16_to_cpu(ctx->pfx.s6_addr16[1]),
113 be16_to_cpu(ctx->pfx.s6_addr16[2]),
114 be16_to_cpu(ctx->pfx.s6_addr16[3]),
115 be16_to_cpu(ctx->pfx.s6_addr16[4]),
116 be16_to_cpu(ctx->pfx.s6_addr16[5]),
117 be16_to_cpu(ctx->pfx.s6_addr16[6]),
118 be16_to_cpu(ctx->pfx.s6_addr16[7]));
119 spin_unlock_bh(&t->lock);
120
121 return 0;
122}
123
124static int lowpan_ctx_pfx_open(struct inode *inode, struct file *file)
125{
126 return single_open(file, lowpan_ctx_pfx_show, inode->i_private);
127}
128
129static ssize_t lowpan_ctx_pfx_write(struct file *fp,
130 const char __user *user_buf, size_t count,
131 loff_t *ppos)
132{
133 char buf[128] = {};
134 struct seq_file *file = fp->private_data;
135 struct lowpan_iphc_ctx *ctx = file->private;
136 struct lowpan_iphc_ctx_table *t =
137 container_of(ctx, struct lowpan_iphc_ctx_table, table[ctx->id]);
138 int status = count, n, i;
139 unsigned int addr[8];
140
141 if (copy_from_user(&buf, user_buf, min_t(size_t, sizeof(buf) - 1,
142 count))) {
143 status = -EFAULT;
144 goto out;
145 }
146
147 n = sscanf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x",
148 &addr[0], &addr[1], &addr[2], &addr[3], &addr[4],
149 &addr[5], &addr[6], &addr[7]);
150 if (n != LOWPAN_DEBUGFS_CTX_PFX_NUM_ARGS) {
151 status = -EINVAL;
152 goto out;
153 }
154
155 spin_lock_bh(&t->lock);
156 for (i = 0; i < 8; i++)
157 ctx->pfx.s6_addr16[i] = cpu_to_be16(addr[i] & 0xffff);
158 spin_unlock_bh(&t->lock);
159
160out:
161 return status;
162}
163
164static const struct file_operations lowpan_ctx_pfx_fops = {
165 .open = lowpan_ctx_pfx_open,
166 .read = seq_read,
167 .write = lowpan_ctx_pfx_write,
168 .llseek = seq_lseek,
169 .release = single_release,
170};
171
172static int lowpan_dev_debugfs_ctx_init(struct net_device *dev,
173 struct dentry *ctx, u8 id)
174{
175 struct lowpan_priv *lpriv = lowpan_priv(dev);
176 struct dentry *dentry, *root;
177 char buf[32];
178
179 WARN_ON_ONCE(id > LOWPAN_IPHC_CTX_TABLE_SIZE);
180
181 sprintf(buf, "%d", id);
182
183 root = debugfs_create_dir(buf, ctx);
184 if (!root)
185 return -EINVAL;
186
187 dentry = debugfs_create_file("active", 0644, root,
188 &lpriv->ctx.table[id],
189 &lowpan_ctx_flag_active_fops);
190 if (!dentry)
191 return -EINVAL;
192
193 dentry = debugfs_create_file("compression", 0644, root,
194 &lpriv->ctx.table[id],
195 &lowpan_ctx_flag_c_fops);
196 if (!dentry)
197 return -EINVAL;
198
199 dentry = debugfs_create_file("prefix", 0644, root,
200 &lpriv->ctx.table[id],
201 &lowpan_ctx_pfx_fops);
202 if (!dentry)
203 return -EINVAL;
204
205 dentry = debugfs_create_file("prefix_len", 0644, root,
206 &lpriv->ctx.table[id],
207 &lowpan_ctx_plen_fops);
208 if (!dentry)
209 return -EINVAL;
210
211 return 0;
212}
213
214static int lowpan_context_show(struct seq_file *file, void *offset)
215{
216 struct lowpan_iphc_ctx_table *t = file->private;
217 int i;
218
219 seq_printf(file, "%3s|%-43s|%c\n", "cid", "prefix", 'C');
220 seq_puts(file, "-------------------------------------------------\n");
221
222 spin_lock_bh(&t->lock);
223 for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++) {
224 if (!lowpan_iphc_ctx_is_active(&t->table[i]))
225 continue;
226
227 seq_printf(file, "%3d|%39pI6c/%-3d|%d\n", t->table[i].id,
228 &t->table[i].pfx, t->table[i].plen,
229 lowpan_iphc_ctx_is_compression(&t->table[i]));
230 }
231 spin_unlock_bh(&t->lock);
232
233 return 0;
234}
235
236static int lowpan_context_open(struct inode *inode, struct file *file)
237{
238 return single_open(file, lowpan_context_show, inode->i_private);
239}
240
241static const struct file_operations lowpan_context_fops = {
242 .open = lowpan_context_open,
243 .read = seq_read,
244 .llseek = seq_lseek,
245 .release = single_release,
246};
247
21int lowpan_dev_debugfs_init(struct net_device *dev) 248int lowpan_dev_debugfs_init(struct net_device *dev)
22{ 249{
23 struct lowpan_priv *lpriv = lowpan_priv(dev); 250 struct lowpan_priv *lpriv = lowpan_priv(dev);
251 struct dentry *contexts, *dentry;
252 int ret, i;
24 253
25 /* creating the root */ 254 /* creating the root */
26 lpriv->iface_debugfs = debugfs_create_dir(dev->name, lowpan_debugfs); 255 lpriv->iface_debugfs = debugfs_create_dir(dev->name, lowpan_debugfs);
27 if (!lpriv->iface_debugfs) 256 if (!lpriv->iface_debugfs)
28 goto fail; 257 goto fail;
29 258
259 contexts = debugfs_create_dir("contexts", lpriv->iface_debugfs);
260 if (!contexts)
261 goto remove_root;
262
263 dentry = debugfs_create_file("show", 0644, contexts,
264 &lowpan_priv(dev)->ctx,
265 &lowpan_context_fops);
266 if (!dentry)
267 goto remove_root;
268
269 for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++) {
270 ret = lowpan_dev_debugfs_ctx_init(dev, contexts, i);
271 if (ret < 0)
272 goto remove_root;
273 }
274
30 return 0; 275 return 0;
31 276
277remove_root:
278 lowpan_dev_debugfs_exit(dev);
32fail: 279fail:
33 return -EINVAL; 280 return -EINVAL;
34} 281}
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c
index 346b5c1a9185..99bb22aea346 100644
--- a/net/6lowpan/iphc.c
+++ b/net/6lowpan/iphc.c
@@ -56,6 +56,7 @@
56/* special link-layer handling */ 56/* special link-layer handling */
57#include <net/mac802154.h> 57#include <net/mac802154.h>
58 58
59#include "6lowpan_i.h"
59#include "nhc.h" 60#include "nhc.h"
60 61
61/* Values of fields within the IPHC encoding first byte */ 62/* Values of fields within the IPHC encoding first byte */
@@ -147,6 +148,9 @@
147 (((a)->s6_addr16[6]) == 0) && \ 148 (((a)->s6_addr16[6]) == 0) && \
148 (((a)->s6_addr[14]) == 0)) 149 (((a)->s6_addr[14]) == 0))
149 150
151#define LOWPAN_IPHC_CID_DCI(cid) (cid & 0x0f)
152#define LOWPAN_IPHC_CID_SCI(cid) ((cid & 0xf0) >> 4)
153
150static inline void iphc_uncompress_eui64_lladdr(struct in6_addr *ipaddr, 154static inline void iphc_uncompress_eui64_lladdr(struct in6_addr *ipaddr,
151 const void *lladdr) 155 const void *lladdr)
152{ 156{
@@ -195,6 +199,98 @@ static inline void iphc_uncompress_802154_lladdr(struct in6_addr *ipaddr,
195 } 199 }
196} 200}
197 201
202static struct lowpan_iphc_ctx *
203lowpan_iphc_ctx_get_by_id(const struct net_device *dev, u8 id)
204{
205 struct lowpan_iphc_ctx *ret = &lowpan_priv(dev)->ctx.table[id];
206
207 if (!lowpan_iphc_ctx_is_active(ret))
208 return NULL;
209
210 return ret;
211}
212
213static struct lowpan_iphc_ctx *
214lowpan_iphc_ctx_get_by_addr(const struct net_device *dev,
215 const struct in6_addr *addr)
216{
217 struct lowpan_iphc_ctx *table = lowpan_priv(dev)->ctx.table;
218 struct lowpan_iphc_ctx *ret = NULL;
219 struct in6_addr addr_pfx;
220 u8 addr_plen;
221 int i;
222
223 for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++) {
224 /* Check if context is valid. A context that is not valid
225 * MUST NOT be used for compression.
226 */
227 if (!lowpan_iphc_ctx_is_active(&table[i]) ||
228 !lowpan_iphc_ctx_is_compression(&table[i]))
229 continue;
230
231 ipv6_addr_prefix(&addr_pfx, addr, table[i].plen);
232
233 /* if prefix len < 64, the remaining bits until 64th bit is
234 * zero. Otherwise we use table[i]->plen.
235 */
236 if (table[i].plen < 64)
237 addr_plen = 64;
238 else
239 addr_plen = table[i].plen;
240
241 if (ipv6_prefix_equal(&addr_pfx, &table[i].pfx, addr_plen)) {
242 /* remember first match */
243 if (!ret) {
244 ret = &table[i];
245 continue;
246 }
247
248 /* get the context with longest prefix len */
249 if (table[i].plen > ret->plen)
250 ret = &table[i];
251 }
252 }
253
254 return ret;
255}
256
257static struct lowpan_iphc_ctx *
258lowpan_iphc_ctx_get_by_mcast_addr(const struct net_device *dev,
259 const struct in6_addr *addr)
260{
261 struct lowpan_iphc_ctx *table = lowpan_priv(dev)->ctx.table;
262 struct lowpan_iphc_ctx *ret = NULL;
263 struct in6_addr addr_mcast, network_pfx = {};
264 int i;
265
266 /* init mcast address with */
267 memcpy(&addr_mcast, addr, sizeof(*addr));
268
269 for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++) {
270 /* Check if context is valid. A context that is not valid
271 * MUST NOT be used for compression.
272 */
273 if (!lowpan_iphc_ctx_is_active(&table[i]) ||
274 !lowpan_iphc_ctx_is_compression(&table[i]))
275 continue;
276
277 /* setting plen */
278 addr_mcast.s6_addr[3] = table[i].plen;
279 /* get network prefix to copy into multicast address */
280 ipv6_addr_prefix(&network_pfx, &table[i].pfx,
281 table[i].plen);
282 /* setting network prefix */
283 memcpy(&addr_mcast.s6_addr[4], &network_pfx, 8);
284
285 if (ipv6_addr_equal(addr, &addr_mcast)) {
286 ret = &table[i];
287 break;
288 }
289 }
290
291 return ret;
292}
293
198/* Uncompress address function for source and 294/* Uncompress address function for source and
199 * destination address(non-multicast). 295 * destination address(non-multicast).
200 * 296 *
@@ -259,30 +355,59 @@ static int uncompress_addr(struct sk_buff *skb, const struct net_device *dev,
259/* Uncompress address function for source context 355/* Uncompress address function for source context
260 * based address(non-multicast). 356 * based address(non-multicast).
261 */ 357 */
262static int uncompress_context_based_src_addr(struct sk_buff *skb, 358static int uncompress_ctx_addr(struct sk_buff *skb,
263 struct in6_addr *ipaddr, 359 const struct net_device *dev,
264 u8 address_mode) 360 const struct lowpan_iphc_ctx *ctx,
361 struct in6_addr *ipaddr, u8 address_mode,
362 const void *lladdr)
265{ 363{
364 bool fail;
365
266 switch (address_mode) { 366 switch (address_mode) {
267 case LOWPAN_IPHC_SAM_00: 367 /* SAM and DAM are the same here */
268 /* unspec address :: 368 case LOWPAN_IPHC_DAM_00:
369 fail = false;
370 /* SAM_00 -> unspec address ::
269 * Do nothing, address is already :: 371 * Do nothing, address is already ::
372 *
373 * DAM 00 -> reserved should never occur.
270 */ 374 */
271 break; 375 break;
272 case LOWPAN_IPHC_SAM_01: 376 case LOWPAN_IPHC_SAM_01:
273 /* TODO */ 377 case LOWPAN_IPHC_DAM_01:
378 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[8], 8);
379 ipv6_addr_prefix_copy(ipaddr, &ctx->pfx, ctx->plen);
380 break;
274 case LOWPAN_IPHC_SAM_10: 381 case LOWPAN_IPHC_SAM_10:
275 /* TODO */ 382 case LOWPAN_IPHC_DAM_10:
383 ipaddr->s6_addr[11] = 0xFF;
384 ipaddr->s6_addr[12] = 0xFE;
385 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[14], 2);
386 ipv6_addr_prefix_copy(ipaddr, &ctx->pfx, ctx->plen);
387 break;
276 case LOWPAN_IPHC_SAM_11: 388 case LOWPAN_IPHC_SAM_11:
277 /* TODO */ 389 case LOWPAN_IPHC_DAM_11:
278 netdev_warn(skb->dev, "SAM value 0x%x not supported\n", 390 fail = false;
279 address_mode); 391 switch (lowpan_priv(dev)->lltype) {
280 return -EINVAL; 392 case LOWPAN_LLTYPE_IEEE802154:
393 iphc_uncompress_802154_lladdr(ipaddr, lladdr);
394 break;
395 default:
396 iphc_uncompress_eui64_lladdr(ipaddr, lladdr);
397 break;
398 }
399 ipv6_addr_prefix_copy(ipaddr, &ctx->pfx, ctx->plen);
400 break;
281 default: 401 default:
282 pr_debug("Invalid sam value: 0x%x\n", address_mode); 402 pr_debug("Invalid sam value: 0x%x\n", address_mode);
283 return -EINVAL; 403 return -EINVAL;
284 } 404 }
285 405
406 if (fail) {
407 pr_debug("Failed to fetch skb data\n");
408 return -EIO;
409 }
410
286 raw_dump_inline(NULL, 411 raw_dump_inline(NULL,
287 "Reconstructed context based ipv6 src addr is", 412 "Reconstructed context based ipv6 src addr is",
288 ipaddr->s6_addr, 16); 413 ipaddr->s6_addr, 16);
@@ -346,6 +471,30 @@ static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb,
346 return 0; 471 return 0;
347} 472}
348 473
474static int lowpan_uncompress_multicast_ctx_daddr(struct sk_buff *skb,
475 struct lowpan_iphc_ctx *ctx,
476 struct in6_addr *ipaddr,
477 u8 address_mode)
478{
479 struct in6_addr network_pfx = {};
480 bool fail;
481
482 ipaddr->s6_addr[0] = 0xFF;
483 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 2);
484 fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[12], 4);
485 if (fail)
486 return -EIO;
487
488 /* take prefix_len and network prefix from the context */
489 ipaddr->s6_addr[3] = ctx->plen;
490 /* get network prefix to copy into multicast address */
491 ipv6_addr_prefix(&network_pfx, &ctx->pfx, ctx->plen);
492 /* setting network prefix */
493 memcpy(&ipaddr->s6_addr[4], &network_pfx, 8);
494
495 return 0;
496}
497
349/* get the ecn values from iphc tf format and set it to ipv6hdr */ 498/* get the ecn values from iphc tf format and set it to ipv6hdr */
350static inline void lowpan_iphc_tf_set_ecn(struct ipv6hdr *hdr, const u8 *tf) 499static inline void lowpan_iphc_tf_set_ecn(struct ipv6hdr *hdr, const u8 *tf)
351{ 500{
@@ -459,7 +608,8 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
459 const void *daddr, const void *saddr) 608 const void *daddr, const void *saddr)
460{ 609{
461 struct ipv6hdr hdr = {}; 610 struct ipv6hdr hdr = {};
462 u8 iphc0, iphc1; 611 struct lowpan_iphc_ctx *ci;
612 u8 iphc0, iphc1, cid = 0;
463 int err; 613 int err;
464 614
465 raw_dump_table(__func__, "raw skb data dump uncompressed", 615 raw_dump_table(__func__, "raw skb data dump uncompressed",
@@ -469,12 +619,14 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
469 lowpan_fetch_skb(skb, &iphc1, sizeof(iphc1))) 619 lowpan_fetch_skb(skb, &iphc1, sizeof(iphc1)))
470 return -EINVAL; 620 return -EINVAL;
471 621
472 /* another if the CID flag is set */
473 if (iphc1 & LOWPAN_IPHC_CID)
474 return -ENOTSUPP;
475
476 hdr.version = 6; 622 hdr.version = 6;
477 623
624 /* default CID = 0, another if the CID flag is set */
625 if (iphc1 & LOWPAN_IPHC_CID) {
626 if (lowpan_fetch_skb(skb, &cid, sizeof(cid)))
627 return -EINVAL;
628 }
629
478 err = lowpan_iphc_tf_decompress(skb, &hdr, 630 err = lowpan_iphc_tf_decompress(skb, &hdr,
479 iphc0 & LOWPAN_IPHC_TF_MASK); 631 iphc0 & LOWPAN_IPHC_TF_MASK);
480 if (err < 0) 632 if (err < 0)
@@ -500,10 +652,17 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
500 } 652 }
501 653
502 if (iphc1 & LOWPAN_IPHC_SAC) { 654 if (iphc1 & LOWPAN_IPHC_SAC) {
503 /* Source address context based uncompression */ 655 spin_lock_bh(&lowpan_priv(dev)->ctx.lock);
656 ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_SCI(cid));
657 if (!ci) {
658 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
659 return -EINVAL;
660 }
661
504 pr_debug("SAC bit is set. Handle context based source address.\n"); 662 pr_debug("SAC bit is set. Handle context based source address.\n");
505 err = uncompress_context_based_src_addr(skb, &hdr.saddr, 663 err = uncompress_ctx_addr(skb, dev, ci, &hdr.saddr,
506 iphc1 & LOWPAN_IPHC_SAM_MASK); 664 iphc1 & LOWPAN_IPHC_SAM_MASK, saddr);
665 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
507 } else { 666 } else {
508 /* Source address uncompression */ 667 /* Source address uncompression */
509 pr_debug("source address stateless compression\n"); 668 pr_debug("source address stateless compression\n");
@@ -515,27 +674,52 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
515 if (err) 674 if (err)
516 return -EINVAL; 675 return -EINVAL;
517 676
518 /* check for Multicast Compression */ 677 switch (iphc1 & (LOWPAN_IPHC_M | LOWPAN_IPHC_DAC)) {
519 if (iphc1 & LOWPAN_IPHC_M) { 678 case LOWPAN_IPHC_M | LOWPAN_IPHC_DAC:
520 if (iphc1 & LOWPAN_IPHC_DAC) { 679 spin_lock_bh(&lowpan_priv(dev)->ctx.lock);
521 pr_debug("dest: context-based mcast compression\n"); 680 ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_DCI(cid));
522 /* TODO: implement this */ 681 if (!ci) {
523 } else { 682 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
524 err = lowpan_uncompress_multicast_daddr(skb, &hdr.daddr, 683 return -EINVAL;
525 iphc1 & LOWPAN_IPHC_DAM_MASK); 684 }
526 685
527 if (err) 686 /* multicast with context */
528 return -EINVAL; 687 pr_debug("dest: context-based mcast compression\n");
688 err = lowpan_uncompress_multicast_ctx_daddr(skb, ci,
689 &hdr.daddr,
690 iphc1 & LOWPAN_IPHC_DAM_MASK);
691 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
692 break;
693 case LOWPAN_IPHC_M:
694 /* multicast */
695 err = lowpan_uncompress_multicast_daddr(skb, &hdr.daddr,
696 iphc1 & LOWPAN_IPHC_DAM_MASK);
697 break;
698 case LOWPAN_IPHC_DAC:
699 spin_lock_bh(&lowpan_priv(dev)->ctx.lock);
700 ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_DCI(cid));
701 if (!ci) {
702 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
703 return -EINVAL;
529 } 704 }
530 } else { 705
706 /* Destination address context based uncompression */
707 pr_debug("DAC bit is set. Handle context based destination address.\n");
708 err = uncompress_ctx_addr(skb, dev, ci, &hdr.daddr,
709 iphc1 & LOWPAN_IPHC_DAM_MASK, daddr);
710 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
711 break;
712 default:
531 err = uncompress_addr(skb, dev, &hdr.daddr, 713 err = uncompress_addr(skb, dev, &hdr.daddr,
532 iphc1 & LOWPAN_IPHC_DAM_MASK, daddr); 714 iphc1 & LOWPAN_IPHC_DAM_MASK, daddr);
533 pr_debug("dest: stateless compression mode %d dest %pI6c\n", 715 pr_debug("dest: stateless compression mode %d dest %pI6c\n",
534 iphc1 & LOWPAN_IPHC_DAM_MASK, &hdr.daddr); 716 iphc1 & LOWPAN_IPHC_DAM_MASK, &hdr.daddr);
535 if (err) 717 break;
536 return -EINVAL;
537 } 718 }
538 719
720 if (err)
721 return -EINVAL;
722
539 /* Next header data uncompression */ 723 /* Next header data uncompression */
540 if (iphc0 & LOWPAN_IPHC_NH) { 724 if (iphc0 & LOWPAN_IPHC_NH) {
541 err = lowpan_nhc_do_uncompression(skb, dev, &hdr); 725 err = lowpan_nhc_do_uncompression(skb, dev, &hdr);
@@ -585,6 +769,58 @@ static const u8 lowpan_iphc_dam_to_sam_value[] = {
585 [LOWPAN_IPHC_DAM_11] = LOWPAN_IPHC_SAM_11, 769 [LOWPAN_IPHC_DAM_11] = LOWPAN_IPHC_SAM_11,
586}; 770};
587 771
772static u8 lowpan_compress_ctx_addr(u8 **hc_ptr, const struct in6_addr *ipaddr,
773 const struct lowpan_iphc_ctx *ctx,
774 const unsigned char *lladdr, bool sam)
775{
776 struct in6_addr tmp = {};
777 u8 dam;
778
779 /* check for SAM/DAM = 11 */
780 memcpy(&tmp.s6_addr[8], lladdr, 8);
781 /* second bit-flip (Universe/Local) is done according RFC2464 */
782 tmp.s6_addr[8] ^= 0x02;
783 /* context information are always used */
784 ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
785 if (ipv6_addr_equal(&tmp, ipaddr)) {
786 dam = LOWPAN_IPHC_DAM_11;
787 goto out;
788 }
789
790 memset(&tmp, 0, sizeof(tmp));
791 /* check for SAM/DAM = 10 */
792 tmp.s6_addr[11] = 0xFF;
793 tmp.s6_addr[12] = 0xFE;
794 memcpy(&tmp.s6_addr[14], &ipaddr->s6_addr[14], 2);
795 /* context information are always used */
796 ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
797 if (ipv6_addr_equal(&tmp, ipaddr)) {
798 lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[14], 2);
799 dam = LOWPAN_IPHC_DAM_10;
800 goto out;
801 }
802
803 memset(&tmp, 0, sizeof(tmp));
804 /* check for SAM/DAM = 01, should always match */
805 memcpy(&tmp.s6_addr[8], &ipaddr->s6_addr[8], 8);
806 /* context information are always used */
807 ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
808 if (ipv6_addr_equal(&tmp, ipaddr)) {
809 lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[8], 8);
810 dam = LOWPAN_IPHC_DAM_01;
811 goto out;
812 }
813
814 WARN_ONCE(1, "context found but no address mode matched\n");
815 return LOWPAN_IPHC_DAM_00;
816out:
817
818 if (sam)
819 return lowpan_iphc_dam_to_sam_value[dam];
820 else
821 return dam;
822}
823
588static u8 lowpan_compress_addr_64(u8 **hc_ptr, const struct in6_addr *ipaddr, 824static u8 lowpan_compress_addr_64(u8 **hc_ptr, const struct in6_addr *ipaddr,
589 const unsigned char *lladdr, bool sam) 825 const unsigned char *lladdr, bool sam)
590{ 826{
@@ -708,6 +944,21 @@ static u8 lowpan_iphc_tf_compress(u8 **hc_ptr, const struct ipv6hdr *hdr)
708 return val; 944 return val;
709} 945}
710 946
947static u8 lowpan_iphc_mcast_ctx_addr_compress(u8 **hc_ptr,
948 const struct lowpan_iphc_ctx *ctx,
949 const struct in6_addr *ipaddr)
950{
951 u8 data[6];
952
953 /* flags/scope, reserved (RIID) */
954 memcpy(data, &ipaddr->s6_addr[1], 2);
955 /* group ID */
956 memcpy(&data[1], &ipaddr->s6_addr[11], 4);
957 lowpan_push_hc_data(hc_ptr, data, 6);
958
959 return LOWPAN_IPHC_DAM_00;
960}
961
711static u8 lowpan_iphc_mcast_addr_compress(u8 **hc_ptr, 962static u8 lowpan_iphc_mcast_addr_compress(u8 **hc_ptr,
712 const struct in6_addr *ipaddr) 963 const struct in6_addr *ipaddr)
713{ 964{
@@ -742,10 +993,11 @@ static u8 lowpan_iphc_mcast_addr_compress(u8 **hc_ptr,
742int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev, 993int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
743 const void *daddr, const void *saddr) 994 const void *daddr, const void *saddr)
744{ 995{
745 u8 iphc0, iphc1, *hc_ptr; 996 u8 iphc0, iphc1, *hc_ptr, cid = 0;
746 struct ipv6hdr *hdr; 997 struct ipv6hdr *hdr;
747 u8 head[LOWPAN_IPHC_MAX_HC_BUF_LEN] = {}; 998 u8 head[LOWPAN_IPHC_MAX_HC_BUF_LEN] = {};
748 int ret, addr_type; 999 struct lowpan_iphc_ctx *dci, *sci, dci_entry, sci_entry;
1000 int ret, ipv6_daddr_type, ipv6_saddr_type;
749 1001
750 if (skb->protocol != htons(ETH_P_IPV6)) 1002 if (skb->protocol != htons(ETH_P_IPV6))
751 return -EINVAL; 1003 return -EINVAL;
@@ -769,14 +1021,38 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
769 iphc0 = LOWPAN_DISPATCH_IPHC; 1021 iphc0 = LOWPAN_DISPATCH_IPHC;
770 iphc1 = 0; 1022 iphc1 = 0;
771 1023
772 /* TODO: context lookup */
773
774 raw_dump_inline(__func__, "saddr", saddr, EUI64_ADDR_LEN); 1024 raw_dump_inline(__func__, "saddr", saddr, EUI64_ADDR_LEN);
775 raw_dump_inline(__func__, "daddr", daddr, EUI64_ADDR_LEN); 1025 raw_dump_inline(__func__, "daddr", daddr, EUI64_ADDR_LEN);
776 1026
777 raw_dump_table(__func__, "sending raw skb network uncompressed packet", 1027 raw_dump_table(__func__, "sending raw skb network uncompressed packet",
778 skb->data, skb->len); 1028 skb->data, skb->len);
779 1029
1030 ipv6_daddr_type = ipv6_addr_type(&hdr->daddr);
1031 spin_lock_bh(&lowpan_priv(dev)->ctx.lock);
1032 if (ipv6_daddr_type & IPV6_ADDR_MULTICAST)
1033 dci = lowpan_iphc_ctx_get_by_mcast_addr(dev, &hdr->daddr);
1034 else
1035 dci = lowpan_iphc_ctx_get_by_addr(dev, &hdr->daddr);
1036 if (dci) {
1037 memcpy(&dci_entry, dci, sizeof(*dci));
1038 cid |= dci->id;
1039 }
1040 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
1041
1042 spin_lock_bh(&lowpan_priv(dev)->ctx.lock);
1043 sci = lowpan_iphc_ctx_get_by_addr(dev, &hdr->saddr);
1044 if (sci) {
1045 memcpy(&sci_entry, sci, sizeof(*sci));
1046 cid |= (sci->id << 4);
1047 }
1048 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
1049
1050 /* if cid is zero it will be compressed */
1051 if (cid) {
1052 iphc1 |= LOWPAN_IPHC_CID;
1053 lowpan_push_hc_data(&hc_ptr, &cid, sizeof(cid));
1054 }
1055
780 /* Traffic Class, Flow Label compression */ 1056 /* Traffic Class, Flow Label compression */
781 iphc0 |= lowpan_iphc_tf_compress(&hc_ptr, hdr); 1057 iphc0 |= lowpan_iphc_tf_compress(&hc_ptr, hdr);
782 1058
@@ -813,39 +1089,64 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
813 sizeof(hdr->hop_limit)); 1089 sizeof(hdr->hop_limit));
814 } 1090 }
815 1091
816 addr_type = ipv6_addr_type(&hdr->saddr); 1092 ipv6_saddr_type = ipv6_addr_type(&hdr->saddr);
817 /* source address compression */ 1093 /* source address compression */
818 if (addr_type == IPV6_ADDR_ANY) { 1094 if (ipv6_saddr_type == IPV6_ADDR_ANY) {
819 pr_debug("source address is unspecified, setting SAC\n"); 1095 pr_debug("source address is unspecified, setting SAC\n");
820 iphc1 |= LOWPAN_IPHC_SAC; 1096 iphc1 |= LOWPAN_IPHC_SAC;
821 } else { 1097 } else {
822 if (addr_type & IPV6_ADDR_LINKLOCAL) { 1098 if (sci) {
823 iphc1 |= lowpan_compress_addr_64(&hc_ptr, &hdr->saddr, 1099 iphc1 |= lowpan_compress_ctx_addr(&hc_ptr, &hdr->saddr,
824 saddr, true); 1100 &sci_entry, saddr,
825 pr_debug("source address unicast link-local %pI6c iphc1 0x%02x\n", 1101 true);
826 &hdr->saddr, iphc1); 1102 iphc1 |= LOWPAN_IPHC_SAC;
827 } else { 1103 } else {
828 pr_debug("send the full source address\n"); 1104 if (ipv6_saddr_type & IPV6_ADDR_LINKLOCAL) {
829 lowpan_push_hc_data(&hc_ptr, hdr->saddr.s6_addr, 16); 1105 iphc1 |= lowpan_compress_addr_64(&hc_ptr,
1106 &hdr->saddr,
1107 saddr, true);
1108 pr_debug("source address unicast link-local %pI6c iphc1 0x%02x\n",
1109 &hdr->saddr, iphc1);
1110 } else {
1111 pr_debug("send the full source address\n");
1112 lowpan_push_hc_data(&hc_ptr,
1113 hdr->saddr.s6_addr, 16);
1114 }
830 } 1115 }
831 } 1116 }
832 1117
833 addr_type = ipv6_addr_type(&hdr->daddr);
834 /* destination address compression */ 1118 /* destination address compression */
835 if (addr_type & IPV6_ADDR_MULTICAST) { 1119 if (ipv6_daddr_type & IPV6_ADDR_MULTICAST) {
836 pr_debug("destination address is multicast: "); 1120 pr_debug("destination address is multicast: ");
837 iphc1 |= LOWPAN_IPHC_M; 1121 iphc1 |= LOWPAN_IPHC_M;
838 iphc1 |= lowpan_iphc_mcast_addr_compress(&hc_ptr, &hdr->daddr); 1122 if (dci) {
1123 iphc1 |= lowpan_iphc_mcast_ctx_addr_compress(&hc_ptr,
1124 &dci_entry,
1125 &hdr->daddr);
1126 iphc1 |= LOWPAN_IPHC_DAC;
1127 } else {
1128 iphc1 |= lowpan_iphc_mcast_addr_compress(&hc_ptr,
1129 &hdr->daddr);
1130 }
839 } else { 1131 } else {
840 if (addr_type & IPV6_ADDR_LINKLOCAL) { 1132 if (dci) {
841 /* TODO: context lookup */ 1133 iphc1 |= lowpan_compress_ctx_addr(&hc_ptr, &hdr->daddr,
842 iphc1 |= lowpan_compress_addr_64(&hc_ptr, &hdr->daddr, 1134 &dci_entry, daddr,
843 daddr, false); 1135 false);
844 pr_debug("dest address unicast link-local %pI6c " 1136 iphc1 |= LOWPAN_IPHC_DAC;
845 "iphc1 0x%02x\n", &hdr->daddr, iphc1);
846 } else { 1137 } else {
847 pr_debug("dest address unicast %pI6c\n", &hdr->daddr); 1138 if (ipv6_daddr_type & IPV6_ADDR_LINKLOCAL) {
848 lowpan_push_hc_data(&hc_ptr, hdr->daddr.s6_addr, 16); 1139 iphc1 |= lowpan_compress_addr_64(&hc_ptr,
1140 &hdr->daddr,
1141 daddr, false);
1142 pr_debug("dest address unicast link-local %pI6c iphc1 0x%02x\n",
1143 &hdr->daddr, iphc1);
1144 } else {
1145 pr_debug("dest address unicast %pI6c\n",
1146 &hdr->daddr);
1147 lowpan_push_hc_data(&hc_ptr,
1148 hdr->daddr.s6_addr, 16);
1149 }
849 } 1150 }
850 } 1151 }
851 1152
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index d2cd9de4b724..a1e273af6fc8 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -261,7 +261,6 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
261 * hope the underlying device can handle it. 261 * hope the underlying device can handle it.
262 */ 262 */
263 new_dev->mtu = real_dev->mtu; 263 new_dev->mtu = real_dev->mtu;
264 new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT);
265 264
266 vlan = vlan_dev_priv(new_dev); 265 vlan = vlan_dev_priv(new_dev);
267 vlan->vlan_proto = htons(ETH_P_8021Q); 266 vlan->vlan_proto = htons(ETH_P_8021Q);
@@ -312,6 +311,7 @@ static void vlan_transfer_features(struct net_device *dev,
312 struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); 311 struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
313 312
314 vlandev->gso_max_size = dev->gso_max_size; 313 vlandev->gso_max_size = dev->gso_max_size;
314 vlandev->gso_max_segs = dev->gso_max_segs;
315 315
316 if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto)) 316 if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto))
317 vlandev->hard_header_len = dev->hard_header_len; 317 vlandev->hard_header_len = dev->hard_header_len;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index ad5e2fd1012c..e7e62570bdb8 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -551,6 +551,7 @@ static int vlan_dev_init(struct net_device *dev)
551 dev->features |= real_dev->vlan_features | NETIF_F_LLTX | 551 dev->features |= real_dev->vlan_features | NETIF_F_LLTX |
552 NETIF_F_GSO_SOFTWARE; 552 NETIF_F_GSO_SOFTWARE;
553 dev->gso_max_size = real_dev->gso_max_size; 553 dev->gso_max_size = real_dev->gso_max_size;
554 dev->gso_max_segs = real_dev->gso_max_segs;
554 if (dev->features & NETIF_F_VLAN_FEATURES) 555 if (dev->features & NETIF_F_VLAN_FEATURES)
555 netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n"); 556 netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n");
556 557
@@ -621,12 +622,12 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
621 return features; 622 return features;
622} 623}
623 624
624static int vlan_ethtool_get_settings(struct net_device *dev, 625static int vlan_ethtool_get_link_ksettings(struct net_device *dev,
625 struct ethtool_cmd *cmd) 626 struct ethtool_link_ksettings *cmd)
626{ 627{
627 const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); 628 const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
628 629
629 return __ethtool_get_settings(vlan->real_dev, cmd); 630 return __ethtool_get_link_ksettings(vlan->real_dev, cmd);
630} 631}
631 632
632static void vlan_ethtool_get_drvinfo(struct net_device *dev, 633static void vlan_ethtool_get_drvinfo(struct net_device *dev,
@@ -741,7 +742,7 @@ static int vlan_dev_get_iflink(const struct net_device *dev)
741} 742}
742 743
743static const struct ethtool_ops vlan_ethtool_ops = { 744static const struct ethtool_ops vlan_ethtool_ops = {
744 .get_settings = vlan_ethtool_get_settings, 745 .get_link_ksettings = vlan_ethtool_get_link_ksettings,
745 .get_drvinfo = vlan_ethtool_get_drvinfo, 746 .get_drvinfo = vlan_ethtool_get_drvinfo,
746 .get_link = ethtool_op_get_link, 747 .get_link = ethtool_op_get_link,
747 .get_ts_info = vlan_ethtool_get_ts_info, 748 .get_ts_info = vlan_ethtool_get_ts_info,
@@ -799,6 +800,7 @@ void vlan_setup(struct net_device *dev)
799 ether_setup(dev); 800 ether_setup(dev);
800 801
801 dev->priv_flags |= IFF_802_1Q_VLAN | IFF_NO_QUEUE; 802 dev->priv_flags |= IFF_802_1Q_VLAN | IFF_NO_QUEUE;
803 dev->priv_flags |= IFF_UNICAST_FLT;
802 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 804 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
803 netif_keep_dst(dev); 805 netif_keep_dst(dev);
804 806
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index ae63cf72a953..5f1446c9f098 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -184,12 +184,11 @@ int vlan_proc_add_dev(struct net_device *vlandev)
184/* 184/*
185 * Delete directory entry for VLAN device. 185 * Delete directory entry for VLAN device.
186 */ 186 */
187int vlan_proc_rem_dev(struct net_device *vlandev) 187void vlan_proc_rem_dev(struct net_device *vlandev)
188{ 188{
189 /** NOTE: This will consume the memory pointed to by dent, it seems. */ 189 /** NOTE: This will consume the memory pointed to by dent, it seems. */
190 proc_remove(vlan_dev_priv(vlandev)->dent); 190 proc_remove(vlan_dev_priv(vlandev)->dent);
191 vlan_dev_priv(vlandev)->dent = NULL; 191 vlan_dev_priv(vlandev)->dent = NULL;
192 return 0;
193} 192}
194 193
195/****** Proc filesystem entry points ****************************************/ 194/****** Proc filesystem entry points ****************************************/
diff --git a/net/8021q/vlanproc.h b/net/8021q/vlanproc.h
index 063f60a3d5cc..8838a2e92eb6 100644
--- a/net/8021q/vlanproc.h
+++ b/net/8021q/vlanproc.h
@@ -5,7 +5,7 @@
5struct net; 5struct net;
6 6
7int vlan_proc_init(struct net *net); 7int vlan_proc_init(struct net *net);
8int vlan_proc_rem_dev(struct net_device *vlandev); 8void vlan_proc_rem_dev(struct net_device *vlandev);
9int vlan_proc_add_dev(struct net_device *vlandev); 9int vlan_proc_add_dev(struct net_device *vlandev);
10void vlan_proc_cleanup(struct net *net); 10void vlan_proc_cleanup(struct net *net);
11 11
@@ -14,7 +14,7 @@ void vlan_proc_cleanup(struct net *net);
14#define vlan_proc_init(net) (0) 14#define vlan_proc_init(net) (0)
15#define vlan_proc_cleanup(net) do {} while (0) 15#define vlan_proc_cleanup(net) do {} while (0)
16#define vlan_proc_add_dev(dev) ({(void)(dev), 0; }) 16#define vlan_proc_add_dev(dev) ({(void)(dev), 0; })
17#define vlan_proc_rem_dev(dev) ({(void)(dev), 0; }) 17#define vlan_proc_rem_dev(dev) do {} while (0)
18#endif 18#endif
19 19
20#endif /* !(__BEN_VLAN_PROC_INC__) */ 20#endif /* !(__BEN_VLAN_PROC_INC__) */
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 52b4a2f993f2..1852e383afd6 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -109,14 +109,13 @@ struct p9_trans_rdma {
109/** 109/**
110 * p9_rdma_context - Keeps track of in-process WR 110 * p9_rdma_context - Keeps track of in-process WR
111 * 111 *
112 * @wc_op: The original WR op for when the CQE completes in error.
113 * @busa: Bus address to unmap when the WR completes 112 * @busa: Bus address to unmap when the WR completes
114 * @req: Keeps track of requests (send) 113 * @req: Keeps track of requests (send)
115 * @rc: Keepts track of replies (receive) 114 * @rc: Keepts track of replies (receive)
116 */ 115 */
117struct p9_rdma_req; 116struct p9_rdma_req;
118struct p9_rdma_context { 117struct p9_rdma_context {
119 enum ib_wc_opcode wc_op; 118 struct ib_cqe cqe;
120 dma_addr_t busa; 119 dma_addr_t busa;
121 union { 120 union {
122 struct p9_req_t *req; 121 struct p9_req_t *req;
@@ -284,9 +283,12 @@ p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
284} 283}
285 284
286static void 285static void
287handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, 286recv_done(struct ib_cq *cq, struct ib_wc *wc)
288 struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len)
289{ 287{
288 struct p9_client *client = cq->cq_context;
289 struct p9_trans_rdma *rdma = client->trans;
290 struct p9_rdma_context *c =
291 container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
290 struct p9_req_t *req; 292 struct p9_req_t *req;
291 int err = 0; 293 int err = 0;
292 int16_t tag; 294 int16_t tag;
@@ -295,7 +297,7 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
295 ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize, 297 ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize,
296 DMA_FROM_DEVICE); 298 DMA_FROM_DEVICE);
297 299
298 if (status != IB_WC_SUCCESS) 300 if (wc->status != IB_WC_SUCCESS)
299 goto err_out; 301 goto err_out;
300 302
301 err = p9_parse_header(c->rc, NULL, NULL, &tag, 1); 303 err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
@@ -316,21 +318,32 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
316 req->rc = c->rc; 318 req->rc = c->rc;
317 p9_client_cb(client, req, REQ_STATUS_RCVD); 319 p9_client_cb(client, req, REQ_STATUS_RCVD);
318 320
321 out:
322 up(&rdma->rq_sem);
323 kfree(c);
319 return; 324 return;
320 325
321 err_out: 326 err_out:
322 p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", req, err, status); 327 p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n",
328 req, err, wc->status);
323 rdma->state = P9_RDMA_FLUSHING; 329 rdma->state = P9_RDMA_FLUSHING;
324 client->status = Disconnected; 330 client->status = Disconnected;
331 goto out;
325} 332}
326 333
327static void 334static void
328handle_send(struct p9_client *client, struct p9_trans_rdma *rdma, 335send_done(struct ib_cq *cq, struct ib_wc *wc)
329 struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len)
330{ 336{
337 struct p9_client *client = cq->cq_context;
338 struct p9_trans_rdma *rdma = client->trans;
339 struct p9_rdma_context *c =
340 container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
341
331 ib_dma_unmap_single(rdma->cm_id->device, 342 ib_dma_unmap_single(rdma->cm_id->device,
332 c->busa, c->req->tc->size, 343 c->busa, c->req->tc->size,
333 DMA_TO_DEVICE); 344 DMA_TO_DEVICE);
345 up(&rdma->sq_sem);
346 kfree(c);
334} 347}
335 348
336static void qp_event_handler(struct ib_event *event, void *context) 349static void qp_event_handler(struct ib_event *event, void *context)
@@ -339,42 +352,6 @@ static void qp_event_handler(struct ib_event *event, void *context)
339 event->event, context); 352 event->event, context);
340} 353}
341 354
342static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
343{
344 struct p9_client *client = cq_context;
345 struct p9_trans_rdma *rdma = client->trans;
346 int ret;
347 struct ib_wc wc;
348
349 ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
350 while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
351 struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id;
352
353 switch (c->wc_op) {
354 case IB_WC_RECV:
355 handle_recv(client, rdma, c, wc.status, wc.byte_len);
356 up(&rdma->rq_sem);
357 break;
358
359 case IB_WC_SEND:
360 handle_send(client, rdma, c, wc.status, wc.byte_len);
361 up(&rdma->sq_sem);
362 break;
363
364 default:
365 pr_err("unexpected completion type, c->wc_op=%d, wc.opcode=%d, status=%d\n",
366 c->wc_op, wc.opcode, wc.status);
367 break;
368 }
369 kfree(c);
370 }
371}
372
373static void cq_event_handler(struct ib_event *e, void *v)
374{
375 p9_debug(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v);
376}
377
378static void rdma_destroy_trans(struct p9_trans_rdma *rdma) 355static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
379{ 356{
380 if (!rdma) 357 if (!rdma)
@@ -387,7 +364,7 @@ static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
387 ib_dealloc_pd(rdma->pd); 364 ib_dealloc_pd(rdma->pd);
388 365
389 if (rdma->cq && !IS_ERR(rdma->cq)) 366 if (rdma->cq && !IS_ERR(rdma->cq))
390 ib_destroy_cq(rdma->cq); 367 ib_free_cq(rdma->cq);
391 368
392 if (rdma->cm_id && !IS_ERR(rdma->cm_id)) 369 if (rdma->cm_id && !IS_ERR(rdma->cm_id))
393 rdma_destroy_id(rdma->cm_id); 370 rdma_destroy_id(rdma->cm_id);
@@ -408,13 +385,14 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
408 if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) 385 if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
409 goto error; 386 goto error;
410 387
388 c->cqe.done = recv_done;
389
411 sge.addr = c->busa; 390 sge.addr = c->busa;
412 sge.length = client->msize; 391 sge.length = client->msize;
413 sge.lkey = rdma->pd->local_dma_lkey; 392 sge.lkey = rdma->pd->local_dma_lkey;
414 393
415 wr.next = NULL; 394 wr.next = NULL;
416 c->wc_op = IB_WC_RECV; 395 wr.wr_cqe = &c->cqe;
417 wr.wr_id = (unsigned long) c;
418 wr.sg_list = &sge; 396 wr.sg_list = &sge;
419 wr.num_sge = 1; 397 wr.num_sge = 1;
420 return ib_post_recv(rdma->qp, &wr, &bad_wr); 398 return ib_post_recv(rdma->qp, &wr, &bad_wr);
@@ -499,13 +477,14 @@ dont_need_post_recv:
499 goto send_error; 477 goto send_error;
500 } 478 }
501 479
480 c->cqe.done = send_done;
481
502 sge.addr = c->busa; 482 sge.addr = c->busa;
503 sge.length = c->req->tc->size; 483 sge.length = c->req->tc->size;
504 sge.lkey = rdma->pd->local_dma_lkey; 484 sge.lkey = rdma->pd->local_dma_lkey;
505 485
506 wr.next = NULL; 486 wr.next = NULL;
507 c->wc_op = IB_WC_SEND; 487 wr.wr_cqe = &c->cqe;
508 wr.wr_id = (unsigned long) c;
509 wr.opcode = IB_WR_SEND; 488 wr.opcode = IB_WR_SEND;
510 wr.send_flags = IB_SEND_SIGNALED; 489 wr.send_flags = IB_SEND_SIGNALED;
511 wr.sg_list = &sge; 490 wr.sg_list = &sge;
@@ -642,7 +621,6 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
642 struct p9_trans_rdma *rdma; 621 struct p9_trans_rdma *rdma;
643 struct rdma_conn_param conn_param; 622 struct rdma_conn_param conn_param;
644 struct ib_qp_init_attr qp_attr; 623 struct ib_qp_init_attr qp_attr;
645 struct ib_cq_init_attr cq_attr = {};
646 624
647 /* Parse the transport specific mount options */ 625 /* Parse the transport specific mount options */
648 err = parse_opts(args, &opts); 626 err = parse_opts(args, &opts);
@@ -695,13 +673,11 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
695 goto error; 673 goto error;
696 674
697 /* Create the Completion Queue */ 675 /* Create the Completion Queue */
698 cq_attr.cqe = opts.sq_depth + opts.rq_depth + 1; 676 rdma->cq = ib_alloc_cq(rdma->cm_id->device, client,
699 rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, 677 opts.sq_depth + opts.rq_depth + 1,
700 cq_event_handler, client, 678 0, IB_POLL_SOFTIRQ);
701 &cq_attr);
702 if (IS_ERR(rdma->cq)) 679 if (IS_ERR(rdma->cq))
703 goto error; 680 goto error;
704 ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
705 681
706 /* Create the Protection Domain */ 682 /* Create the Protection Domain */
707 rdma->pd = ib_alloc_pd(rdma->cm_id->device); 683 rdma->pd = ib_alloc_pd(rdma->cm_id->device);
diff --git a/net/Kconfig b/net/Kconfig
index 174354618f8a..a8934d8c8fda 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -253,6 +253,9 @@ config XPS
253 depends on SMP 253 depends on SMP
254 default y 254 default y
255 255
256config HWBM
257 bool
258
256config SOCK_CGROUP_DATA 259config SOCK_CGROUP_DATA
257 bool 260 bool
258 default n 261 default n
@@ -360,6 +363,7 @@ source "net/can/Kconfig"
360source "net/irda/Kconfig" 363source "net/irda/Kconfig"
361source "net/bluetooth/Kconfig" 364source "net/bluetooth/Kconfig"
362source "net/rxrpc/Kconfig" 365source "net/rxrpc/Kconfig"
366source "net/kcm/Kconfig"
363 367
364config FIB_RULES 368config FIB_RULES
365 bool 369 bool
@@ -392,6 +396,26 @@ config LWTUNNEL
392 weight tunnel endpoint. Tunnel encapsulation parameters are stored 396 weight tunnel endpoint. Tunnel encapsulation parameters are stored
393 with light weight tunnel state associated with fib routes. 397 with light weight tunnel state associated with fib routes.
394 398
399config DST_CACHE
400 bool
401 default n
402
403config NET_DEVLINK
404 tristate "Network physical/parent device Netlink interface"
405 help
406 Network physical/parent device Netlink interface provides
407 infrastructure to support access to physical chip-wide config and
408 monitoring.
409
410config MAY_USE_DEVLINK
411 tristate
412 default m if NET_DEVLINK=m
413 default y if NET_DEVLINK=y || NET_DEVLINK=n
414 help
415 Drivers using the devlink infrastructure should have a dependency
416 on MAY_USE_DEVLINK to ensure they do not cause link errors when
417 devlink is a loadable module and the driver using it is built-in.
418
395endif # if NET 419endif # if NET
396 420
397# Used by archs to tell that they support BPF_JIT 421# Used by archs to tell that they support BPF_JIT
diff --git a/net/Makefile b/net/Makefile
index a5d04098dfce..81d14119eab5 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_IRDA) += irda/
34obj-$(CONFIG_BT) += bluetooth/ 34obj-$(CONFIG_BT) += bluetooth/
35obj-$(CONFIG_SUNRPC) += sunrpc/ 35obj-$(CONFIG_SUNRPC) += sunrpc/
36obj-$(CONFIG_AF_RXRPC) += rxrpc/ 36obj-$(CONFIG_AF_RXRPC) += rxrpc/
37obj-$(CONFIG_AF_KCM) += kcm/
37obj-$(CONFIG_ATM) += atm/ 38obj-$(CONFIG_ATM) += atm/
38obj-$(CONFIG_L2TP) += l2tp/ 39obj-$(CONFIG_L2TP) += l2tp/
39obj-$(CONFIG_DECNET) += decnet/ 40obj-$(CONFIG_DECNET) += decnet/
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index b563a3f5f2a8..2fa3be965101 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -228,8 +228,23 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
228} 228}
229#endif 229#endif
230 230
231static bool ax25_validate_header(const char *header, unsigned int len)
232{
233 ax25_digi digi;
234
235 if (!len)
236 return false;
237
238 if (header[0])
239 return true;
240
241 return ax25_addr_parse(header + 1, len - 1, NULL, NULL, &digi, NULL,
242 NULL);
243}
244
231const struct header_ops ax25_header_ops = { 245const struct header_ops ax25_header_ops = {
232 .create = ax25_hard_header, 246 .create = ax25_hard_header,
247 .validate = ax25_validate_header,
233}; 248};
234 249
235EXPORT_SYMBOL(ax25_header_ops); 250EXPORT_SYMBOL(ax25_header_ops);
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index c6fc8f756c9a..f66930ee3c0b 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -12,9 +12,23 @@ config BATMAN_ADV
12 B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is 12 B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
13 a routing protocol for multi-hop ad-hoc mesh networks. The 13 a routing protocol for multi-hop ad-hoc mesh networks. The
14 networks may be wired or wireless. See 14 networks may be wired or wireless. See
15 http://www.open-mesh.org/ for more information and user space 15 https://www.open-mesh.org/ for more information and user space
16 tools. 16 tools.
17 17
18config BATMAN_ADV_BATMAN_V
19 bool "B.A.T.M.A.N. V protocol (experimental)"
20 depends on BATMAN_ADV && CFG80211=y || (CFG80211=m && BATMAN_ADV=m)
21 default n
22 help
23 This option enables the B.A.T.M.A.N. V protocol, the successor
24 of the currently used B.A.T.M.A.N. IV protocol. The main
25 changes include splitting of the OGM protocol into a neighbor
26 discovery protocol (Echo Location Protocol, ELP) and a new OGM
27 Protocol OGMv2 for flooding protocol information through the
28 network, as well as a throughput based metric.
29 B.A.T.M.A.N. V is currently considered experimental and not
30 compatible to B.A.T.M.A.N. IV networks.
31
18config BATMAN_ADV_BLA 32config BATMAN_ADV_BLA
19 bool "Bridge Loop Avoidance" 33 bool "Bridge Loop Avoidance"
20 depends on BATMAN_ADV && INET 34 depends on BATMAN_ADV && INET
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 21434ab79d2c..797cf2fc88c1 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
1# 1#
2# Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 2# Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
3# 3#
4# Marek Lindner, Simon Wunderlich 4# Marek Lindner, Simon Wunderlich
5# 5#
@@ -18,6 +18,9 @@
18 18
19obj-$(CONFIG_BATMAN_ADV) += batman-adv.o 19obj-$(CONFIG_BATMAN_ADV) += batman-adv.o
20batman-adv-y += bat_iv_ogm.o 20batman-adv-y += bat_iv_ogm.o
21batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v.o
22batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_elp.o
23batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_ogm.o
21batman-adv-y += bitarray.o 24batman-adv-y += bitarray.o
22batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o 25batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o
23batman-adv-$(CONFIG_DEBUG_FS) += debugfs.o 26batman-adv-$(CONFIG_DEBUG_FS) += debugfs.o
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 4e59cf3eb079..03dafd33d23b 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -1,6 +1,6 @@
1/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner, Linus Lüssing
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public 6 * modify it under the terms of version 2 of the GNU General Public
@@ -18,6 +18,32 @@
18#ifndef _NET_BATMAN_ADV_BAT_ALGO_H_ 18#ifndef _NET_BATMAN_ADV_BAT_ALGO_H_
19#define _NET_BATMAN_ADV_BAT_ALGO_H_ 19#define _NET_BATMAN_ADV_BAT_ALGO_H_
20 20
21struct batadv_priv;
22
21int batadv_iv_init(void); 23int batadv_iv_init(void);
22 24
25#ifdef CONFIG_BATMAN_ADV_BATMAN_V
26
27int batadv_v_init(void);
28int batadv_v_mesh_init(struct batadv_priv *bat_priv);
29void batadv_v_mesh_free(struct batadv_priv *bat_priv);
30
31#else
32
33static inline int batadv_v_init(void)
34{
35 return 0;
36}
37
38static inline int batadv_v_mesh_init(struct batadv_priv *bat_priv)
39{
40 return 0;
41}
42
43static inline void batadv_v_mesh_free(struct batadv_priv *bat_priv)
44{
45}
46
47#endif /* CONFIG_BATMAN_ADV_BATMAN_V */
48
23#endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */ 49#endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index df625de55ef2..cb2d1b9b0340 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -31,6 +31,7 @@
31#include <linux/init.h> 31#include <linux/init.h>
32#include <linux/jiffies.h> 32#include <linux/jiffies.h>
33#include <linux/list.h> 33#include <linux/list.h>
34#include <linux/kref.h>
34#include <linux/netdevice.h> 35#include <linux/netdevice.h>
35#include <linux/pkt_sched.h> 36#include <linux/pkt_sched.h>
36#include <linux/printk.h> 37#include <linux/printk.h>
@@ -88,7 +89,7 @@ static void batadv_ring_buffer_set(u8 lq_recv[], u8 *lq_index, u8 value)
88 * in the given ring buffer 89 * in the given ring buffer
89 * @lq_recv: pointer to the ring buffer 90 * @lq_recv: pointer to the ring buffer
90 * 91 *
91 * Returns computed average value. 92 * Return: computed average value.
92 */ 93 */
93static u8 batadv_ring_buffer_avg(const u8 lq_recv[]) 94static u8 batadv_ring_buffer_avg(const u8 lq_recv[])
94{ 95{
@@ -132,7 +133,7 @@ static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node)
132 * @orig_node: the orig_node that has to be changed 133 * @orig_node: the orig_node that has to be changed
133 * @max_if_num: the current amount of interfaces 134 * @max_if_num: the current amount of interfaces
134 * 135 *
135 * Returns 0 on success, a negative error code otherwise. 136 * Return: 0 on success, a negative error code otherwise.
136 */ 137 */
137static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node, 138static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node,
138 int max_if_num) 139 int max_if_num)
@@ -180,7 +181,7 @@ unlock:
180 * @max_if_num: the current amount of interfaces 181 * @max_if_num: the current amount of interfaces
181 * @del_if_num: the index of the interface being removed 182 * @del_if_num: the index of the interface being removed
182 * 183 *
183 * Returns 0 on success, a negative error code otherwise. 184 * Return: 0 on success, a negative error code otherwise.
184 */ 185 */
185static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node, 186static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
186 int max_if_num, int del_if_num) 187 int max_if_num, int del_if_num)
@@ -246,7 +247,7 @@ unlock:
246 * @bat_priv: the bat priv with all the soft interface information 247 * @bat_priv: the bat priv with all the soft interface information
247 * @addr: mac address of the originator 248 * @addr: mac address of the originator
248 * 249 *
249 * Returns the originator object corresponding to the passed mac address or NULL 250 * Return: the originator object corresponding to the passed mac address or NULL
250 * on failure. 251 * on failure.
251 * If the object does not exists it is created an initialised. 252 * If the object does not exists it is created an initialised.
252 */ 253 */
@@ -286,8 +287,8 @@ batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr)
286 287
287free_orig_node: 288free_orig_node:
288 /* free twice, as batadv_orig_node_new sets refcount to 2 */ 289 /* free twice, as batadv_orig_node_new sets refcount to 2 */
289 batadv_orig_node_free_ref(orig_node); 290 batadv_orig_node_put(orig_node);
290 batadv_orig_node_free_ref(orig_node); 291 batadv_orig_node_put(orig_node);
291 292
292 return NULL; 293 return NULL;
293} 294}
@@ -396,7 +397,14 @@ static u8 batadv_hop_penalty(u8 tq, const struct batadv_priv *bat_priv)
396 return new_tq; 397 return new_tq;
397} 398}
398 399
399/* is there another aggregated packet here? */ 400/**
401 * batadv_iv_ogm_aggr_packet - checks if there is another OGM attached
402 * @buff_pos: current position in the skb
403 * @packet_len: total length of the skb
404 * @tvlv_len: tvlv length of the previously considered OGM
405 *
406 * Return: true if there is enough space for another OGM, false otherwise.
407 */
400static bool batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len, 408static bool batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len,
401 __be16 tvlv_len) 409 __be16 tvlv_len)
402{ 410{
@@ -470,7 +478,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
470 batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_TX); 478 batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_TX);
471 batadv_add_counter(bat_priv, BATADV_CNT_MGMT_TX_BYTES, 479 batadv_add_counter(bat_priv, BATADV_CNT_MGMT_TX_BYTES,
472 skb->len + ETH_HLEN); 480 skb->len + ETH_HLEN);
473 batadv_send_skb_packet(skb, hard_iface, batadv_broadcast_addr); 481 batadv_send_broadcast_skb(skb, hard_iface);
474 } 482 }
475} 483}
476 484
@@ -507,7 +515,7 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
507 515
508out: 516out:
509 if (primary_if) 517 if (primary_if)
510 batadv_hardif_free_ref(primary_if); 518 batadv_hardif_put(primary_if);
511} 519}
512 520
513/** 521/**
@@ -522,7 +530,7 @@ out:
522 * @if_outgoing: interface for which the retransmission should be considered 530 * @if_outgoing: interface for which the retransmission should be considered
523 * @forw_packet: the forwarded packet which should be checked 531 * @forw_packet: the forwarded packet which should be checked
524 * 532 *
525 * Returns true if new_packet can be aggregated with forw_packet 533 * Return: true if new_packet can be aggregated with forw_packet
526 */ 534 */
527static bool 535static bool
528batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, 536batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet,
@@ -609,7 +617,7 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet,
609 617
610out: 618out:
611 if (primary_if) 619 if (primary_if)
612 batadv_hardif_free_ref(primary_if); 620 batadv_hardif_put(primary_if);
613 return res; 621 return res;
614} 622}
615 623
@@ -636,10 +644,10 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
636 unsigned char *skb_buff; 644 unsigned char *skb_buff;
637 unsigned int skb_size; 645 unsigned int skb_size;
638 646
639 if (!atomic_inc_not_zero(&if_incoming->refcount)) 647 if (!kref_get_unless_zero(&if_incoming->refcount))
640 return; 648 return;
641 649
642 if (!atomic_inc_not_zero(&if_outgoing->refcount)) 650 if (!kref_get_unless_zero(&if_outgoing->refcount))
643 goto out_free_incoming; 651 goto out_free_incoming;
644 652
645 /* own packet should always be scheduled */ 653 /* own packet should always be scheduled */
@@ -703,9 +711,9 @@ out_nomem:
703 if (!own_packet) 711 if (!own_packet)
704 atomic_inc(&bat_priv->batman_queue_left); 712 atomic_inc(&bat_priv->batman_queue_left);
705out_free_outgoing: 713out_free_outgoing:
706 batadv_hardif_free_ref(if_outgoing); 714 batadv_hardif_put(if_outgoing);
707out_free_incoming: 715out_free_incoming:
708 batadv_hardif_free_ref(if_incoming); 716 batadv_hardif_put(if_incoming);
709} 717}
710 718
711/* aggregate a new packet into the existing ogm packet */ 719/* aggregate a new packet into the existing ogm packet */
@@ -950,7 +958,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
950 958
951out: 959out:
952 if (primary_if) 960 if (primary_if)
953 batadv_hardif_free_ref(primary_if); 961 batadv_hardif_put(primary_if);
954} 962}
955 963
956/** 964/**
@@ -995,9 +1003,9 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
995 neigh_addr = tmp_neigh_node->addr; 1003 neigh_addr = tmp_neigh_node->addr;
996 if (batadv_compare_eth(neigh_addr, ethhdr->h_source) && 1004 if (batadv_compare_eth(neigh_addr, ethhdr->h_source) &&
997 tmp_neigh_node->if_incoming == if_incoming && 1005 tmp_neigh_node->if_incoming == if_incoming &&
998 atomic_inc_not_zero(&tmp_neigh_node->refcount)) { 1006 kref_get_unless_zero(&tmp_neigh_node->refcount)) {
999 if (WARN(neigh_node, "too many matching neigh_nodes")) 1007 if (WARN(neigh_node, "too many matching neigh_nodes"))
1000 batadv_neigh_node_free_ref(neigh_node); 1008 batadv_neigh_node_put(neigh_node);
1001 neigh_node = tmp_neigh_node; 1009 neigh_node = tmp_neigh_node;
1002 continue; 1010 continue;
1003 } 1011 }
@@ -1018,7 +1026,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
1018 neigh_ifinfo->bat_iv.tq_avg = tq_avg; 1026 neigh_ifinfo->bat_iv.tq_avg = tq_avg;
1019 spin_unlock_bh(&tmp_neigh_node->ifinfo_lock); 1027 spin_unlock_bh(&tmp_neigh_node->ifinfo_lock);
1020 1028
1021 batadv_neigh_ifinfo_free_ref(neigh_ifinfo); 1029 batadv_neigh_ifinfo_put(neigh_ifinfo);
1022 neigh_ifinfo = NULL; 1030 neigh_ifinfo = NULL;
1023 } 1031 }
1024 1032
@@ -1033,7 +1041,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
1033 ethhdr->h_source, 1041 ethhdr->h_source,
1034 orig_node, orig_tmp); 1042 orig_node, orig_tmp);
1035 1043
1036 batadv_orig_node_free_ref(orig_tmp); 1044 batadv_orig_node_put(orig_tmp);
1037 if (!neigh_node) 1045 if (!neigh_node)
1038 goto unlock; 1046 goto unlock;
1039 } else { 1047 } else {
@@ -1108,13 +1116,13 @@ unlock:
1108 rcu_read_unlock(); 1116 rcu_read_unlock();
1109out: 1117out:
1110 if (neigh_node) 1118 if (neigh_node)
1111 batadv_neigh_node_free_ref(neigh_node); 1119 batadv_neigh_node_put(neigh_node);
1112 if (router) 1120 if (router)
1113 batadv_neigh_node_free_ref(router); 1121 batadv_neigh_node_put(router);
1114 if (neigh_ifinfo) 1122 if (neigh_ifinfo)
1115 batadv_neigh_ifinfo_free_ref(neigh_ifinfo); 1123 batadv_neigh_ifinfo_put(neigh_ifinfo);
1116 if (router_ifinfo) 1124 if (router_ifinfo)
1117 batadv_neigh_ifinfo_free_ref(router_ifinfo); 1125 batadv_neigh_ifinfo_put(router_ifinfo);
1118} 1126}
1119 1127
1120/** 1128/**
@@ -1125,7 +1133,7 @@ out:
1125 * @if_incoming: interface where the packet was received 1133 * @if_incoming: interface where the packet was received
1126 * @if_outgoing: interface for which the retransmission should be considered 1134 * @if_outgoing: interface for which the retransmission should be considered
1127 * 1135 *
1128 * Returns 1 if the link can be considered bidirectional, 0 otherwise 1136 * Return: 1 if the link can be considered bidirectional, 0 otherwise
1129 */ 1137 */
1130static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, 1138static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
1131 struct batadv_orig_node *orig_neigh_node, 1139 struct batadv_orig_node *orig_neigh_node,
@@ -1154,7 +1162,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
1154 if (tmp_neigh_node->if_incoming != if_incoming) 1162 if (tmp_neigh_node->if_incoming != if_incoming)
1155 continue; 1163 continue;
1156 1164
1157 if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) 1165 if (!kref_get_unless_zero(&tmp_neigh_node->refcount))
1158 continue; 1166 continue;
1159 1167
1160 neigh_node = tmp_neigh_node; 1168 neigh_node = tmp_neigh_node;
@@ -1184,7 +1192,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
1184 neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); 1192 neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing);
1185 if (neigh_ifinfo) { 1193 if (neigh_ifinfo) {
1186 neigh_rq_count = neigh_ifinfo->bat_iv.real_packet_count; 1194 neigh_rq_count = neigh_ifinfo->bat_iv.real_packet_count;
1187 batadv_neigh_ifinfo_free_ref(neigh_ifinfo); 1195 batadv_neigh_ifinfo_put(neigh_ifinfo);
1188 } else { 1196 } else {
1189 neigh_rq_count = 0; 1197 neigh_rq_count = 0;
1190 } 1198 }
@@ -1257,7 +1265,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
1257 1265
1258out: 1266out:
1259 if (neigh_node) 1267 if (neigh_node)
1260 batadv_neigh_node_free_ref(neigh_node); 1268 batadv_neigh_node_put(neigh_node);
1261 return ret; 1269 return ret;
1262} 1270}
1263 1271
@@ -1269,7 +1277,7 @@ out:
1269 * @if_incoming: interface on which the OGM packet was received 1277 * @if_incoming: interface on which the OGM packet was received
1270 * @if_outgoing: interface for which the retransmission should be considered 1278 * @if_outgoing: interface for which the retransmission should be considered
1271 * 1279 *
1272 * Returns duplicate status as enum batadv_dup_status 1280 * Return: duplicate status as enum batadv_dup_status
1273 */ 1281 */
1274static enum batadv_dup_status 1282static enum batadv_dup_status
1275batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, 1283batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
@@ -1298,7 +1306,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
1298 1306
1299 orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing); 1307 orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing);
1300 if (WARN_ON(!orig_ifinfo)) { 1308 if (WARN_ON(!orig_ifinfo)) {
1301 batadv_orig_node_free_ref(orig_node); 1309 batadv_orig_node_put(orig_node);
1302 return 0; 1310 return 0;
1303 } 1311 }
1304 1312
@@ -1308,7 +1316,8 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
1308 /* signalize caller that the packet is to be dropped. */ 1316 /* signalize caller that the packet is to be dropped. */
1309 if (!hlist_empty(&orig_node->neigh_list) && 1317 if (!hlist_empty(&orig_node->neigh_list) &&
1310 batadv_window_protected(bat_priv, seq_diff, 1318 batadv_window_protected(bat_priv, seq_diff,
1311 &orig_ifinfo->batman_seqno_reset)) { 1319 BATADV_TQ_LOCAL_WINDOW_SIZE,
1320 &orig_ifinfo->batman_seqno_reset, NULL)) {
1312 ret = BATADV_PROTECTED; 1321 ret = BATADV_PROTECTED;
1313 goto out; 1322 goto out;
1314 } 1323 }
@@ -1344,7 +1353,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
1344 packet_count = bitmap_weight(bitmap, 1353 packet_count = bitmap_weight(bitmap,
1345 BATADV_TQ_LOCAL_WINDOW_SIZE); 1354 BATADV_TQ_LOCAL_WINDOW_SIZE);
1346 neigh_ifinfo->bat_iv.real_packet_count = packet_count; 1355 neigh_ifinfo->bat_iv.real_packet_count = packet_count;
1347 batadv_neigh_ifinfo_free_ref(neigh_ifinfo); 1356 batadv_neigh_ifinfo_put(neigh_ifinfo);
1348 } 1357 }
1349 rcu_read_unlock(); 1358 rcu_read_unlock();
1350 1359
@@ -1358,8 +1367,8 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
1358 1367
1359out: 1368out:
1360 spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); 1369 spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
1361 batadv_orig_node_free_ref(orig_node); 1370 batadv_orig_node_put(orig_node);
1362 batadv_orig_ifinfo_free_ref(orig_ifinfo); 1371 batadv_orig_ifinfo_put(orig_ifinfo);
1363 return ret; 1372 return ret;
1364} 1373}
1365 1374
@@ -1505,7 +1514,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
1505 ogm_packet, if_incoming, 1514 ogm_packet, if_incoming,
1506 if_outgoing, dup_status); 1515 if_outgoing, dup_status);
1507 } 1516 }
1508 batadv_orig_ifinfo_free_ref(orig_ifinfo); 1517 batadv_orig_ifinfo_put(orig_ifinfo);
1509 1518
1510 /* only forward for specific interface, not for the default one. */ 1519 /* only forward for specific interface, not for the default one. */
1511 if (if_outgoing == BATADV_IF_DEFAULT) 1520 if (if_outgoing == BATADV_IF_DEFAULT)
@@ -1554,18 +1563,18 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
1554 1563
1555out_neigh: 1564out_neigh:
1556 if ((orig_neigh_node) && (!is_single_hop_neigh)) 1565 if ((orig_neigh_node) && (!is_single_hop_neigh))
1557 batadv_orig_node_free_ref(orig_neigh_node); 1566 batadv_orig_node_put(orig_neigh_node);
1558out: 1567out:
1559 if (router_ifinfo) 1568 if (router_ifinfo)
1560 batadv_neigh_ifinfo_free_ref(router_ifinfo); 1569 batadv_neigh_ifinfo_put(router_ifinfo);
1561 if (router) 1570 if (router)
1562 batadv_neigh_node_free_ref(router); 1571 batadv_neigh_node_put(router);
1563 if (router_router) 1572 if (router_router)
1564 batadv_neigh_node_free_ref(router_router); 1573 batadv_neigh_node_put(router_router);
1565 if (orig_neigh_router) 1574 if (orig_neigh_router)
1566 batadv_neigh_node_free_ref(orig_neigh_router); 1575 batadv_neigh_node_put(orig_neigh_router);
1567 if (hardif_neigh) 1576 if (hardif_neigh)
1568 batadv_hardif_neigh_free_ref(hardif_neigh); 1577 batadv_hardif_neigh_put(hardif_neigh);
1569 1578
1570 kfree_skb(skb_priv); 1579 kfree_skb(skb_priv);
1571} 1580}
@@ -1688,7 +1697,7 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
1688 1697
1689 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 1698 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
1690 "Drop packet: originator packet from myself (via neighbor)\n"); 1699 "Drop packet: originator packet from myself (via neighbor)\n");
1691 batadv_orig_node_free_ref(orig_neigh_node); 1700 batadv_orig_node_put(orig_neigh_node);
1692 return; 1701 return;
1693 } 1702 }
1694 1703
@@ -1726,7 +1735,7 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
1726 } 1735 }
1727 rcu_read_unlock(); 1736 rcu_read_unlock();
1728 1737
1729 batadv_orig_node_free_ref(orig_node); 1738 batadv_orig_node_put(orig_node);
1730} 1739}
1731 1740
1732static int batadv_iv_ogm_receive(struct sk_buff *skb, 1741static int batadv_iv_ogm_receive(struct sk_buff *skb,
@@ -1796,7 +1805,7 @@ batadv_iv_ogm_orig_print_neigh(struct batadv_orig_node *orig_node,
1796 neigh_node->addr, 1805 neigh_node->addr,
1797 n_ifinfo->bat_iv.tq_avg); 1806 n_ifinfo->bat_iv.tq_avg);
1798 1807
1799 batadv_neigh_ifinfo_free_ref(n_ifinfo); 1808 batadv_neigh_ifinfo_put(n_ifinfo);
1800 } 1809 }
1801} 1810}
1802 1811
@@ -1859,9 +1868,9 @@ static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv,
1859 batman_count++; 1868 batman_count++;
1860 1869
1861next: 1870next:
1862 batadv_neigh_node_free_ref(neigh_node); 1871 batadv_neigh_node_put(neigh_node);
1863 if (n_ifinfo) 1872 if (n_ifinfo)
1864 batadv_neigh_ifinfo_free_ref(n_ifinfo); 1873 batadv_neigh_ifinfo_put(n_ifinfo);
1865 } 1874 }
1866 rcu_read_unlock(); 1875 rcu_read_unlock();
1867 } 1876 }
@@ -1929,7 +1938,7 @@ static void batadv_iv_neigh_print(struct batadv_priv *bat_priv,
1929 * @neigh2: the second neighbor object of the comparison 1938 * @neigh2: the second neighbor object of the comparison
1930 * @if_outgoing2: outgoing interface for the second neighbor 1939 * @if_outgoing2: outgoing interface for the second neighbor
1931 * 1940 *
1932 * Returns a value less, equal to or greater than 0 if the metric via neigh1 is 1941 * Return: a value less, equal to or greater than 0 if the metric via neigh1 is
1933 * lower, the same as or higher than the metric via neigh2 1942 * lower, the same as or higher than the metric via neigh2
1934 */ 1943 */
1935static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1, 1944static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1,
@@ -1955,9 +1964,9 @@ static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1,
1955 1964
1956out: 1965out:
1957 if (neigh1_ifinfo) 1966 if (neigh1_ifinfo)
1958 batadv_neigh_ifinfo_free_ref(neigh1_ifinfo); 1967 batadv_neigh_ifinfo_put(neigh1_ifinfo);
1959 if (neigh2_ifinfo) 1968 if (neigh2_ifinfo)
1960 batadv_neigh_ifinfo_free_ref(neigh2_ifinfo); 1969 batadv_neigh_ifinfo_put(neigh2_ifinfo);
1961 1970
1962 return diff; 1971 return diff;
1963} 1972}
@@ -1970,7 +1979,7 @@ out:
1970 * @neigh2: the second neighbor object of the comparison 1979 * @neigh2: the second neighbor object of the comparison
1971 * @if_outgoing2: outgoing interface for the second neighbor 1980 * @if_outgoing2: outgoing interface for the second neighbor
1972 * 1981 *
1973 * Returns true if the metric via neigh1 is equally good or better than 1982 * Return: true if the metric via neigh1 is equally good or better than
1974 * the metric via neigh2, false otherwise. 1983 * the metric via neigh2, false otherwise.
1975 */ 1984 */
1976static bool 1985static bool
@@ -1998,9 +2007,9 @@ batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1,
1998 2007
1999out: 2008out:
2000 if (neigh1_ifinfo) 2009 if (neigh1_ifinfo)
2001 batadv_neigh_ifinfo_free_ref(neigh1_ifinfo); 2010 batadv_neigh_ifinfo_put(neigh1_ifinfo);
2002 if (neigh2_ifinfo) 2011 if (neigh2_ifinfo)
2003 batadv_neigh_ifinfo_free_ref(neigh2_ifinfo); 2012 batadv_neigh_ifinfo_put(neigh2_ifinfo);
2004 2013
2005 return ret; 2014 return ret;
2006} 2015}
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
new file mode 100644
index 000000000000..4026f198a734
--- /dev/null
+++ b/net/batman-adv/bat_v.c
@@ -0,0 +1,359 @@
1/* Copyright (C) 2013-2016 B.A.T.M.A.N. contributors:
2 *
3 * Linus Lüssing, Marek Lindner
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "bat_algo.h"
19#include "main.h"
20
21#include <linux/atomic.h>
22#include <linux/bug.h>
23#include <linux/cache.h>
24#include <linux/init.h>
25#include <linux/jiffies.h>
26#include <linux/netdevice.h>
27#include <linux/rculist.h>
28#include <linux/rcupdate.h>
29#include <linux/seq_file.h>
30#include <linux/types.h>
31#include <linux/workqueue.h>
32
33#include "bat_v_elp.h"
34#include "bat_v_ogm.h"
35#include "hard-interface.h"
36#include "hash.h"
37#include "originator.h"
38#include "packet.h"
39
40static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface)
41{
42 /* B.A.T.M.A.N. V does not use any queuing mechanism, therefore it can
43 * set the interface as ACTIVE right away, without any risk of race
44 * condition
45 */
46 if (hard_iface->if_status == BATADV_IF_TO_BE_ACTIVATED)
47 hard_iface->if_status = BATADV_IF_ACTIVE;
48}
49
50static int batadv_v_iface_enable(struct batadv_hard_iface *hard_iface)
51{
52 int ret;
53
54 ret = batadv_v_elp_iface_enable(hard_iface);
55 if (ret < 0)
56 return ret;
57
58 ret = batadv_v_ogm_iface_enable(hard_iface);
59 if (ret < 0)
60 batadv_v_elp_iface_disable(hard_iface);
61
62 /* enable link throughput auto-detection by setting the throughput
63 * override to zero
64 */
65 atomic_set(&hard_iface->bat_v.throughput_override, 0);
66
67 return ret;
68}
69
70static void batadv_v_iface_disable(struct batadv_hard_iface *hard_iface)
71{
72 batadv_v_elp_iface_disable(hard_iface);
73}
74
75static void batadv_v_iface_update_mac(struct batadv_hard_iface *hard_iface)
76{
77}
78
79static void batadv_v_primary_iface_set(struct batadv_hard_iface *hard_iface)
80{
81 batadv_v_elp_primary_iface_set(hard_iface);
82 batadv_v_ogm_primary_iface_set(hard_iface);
83}
84
85static void
86batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh)
87{
88 ewma_throughput_init(&hardif_neigh->bat_v.throughput);
89 INIT_WORK(&hardif_neigh->bat_v.metric_work,
90 batadv_v_elp_throughput_metric_update);
91}
92
93static void batadv_v_ogm_schedule(struct batadv_hard_iface *hard_iface)
94{
95}
96
97static void batadv_v_ogm_emit(struct batadv_forw_packet *forw_packet)
98{
99}
100
101/**
102 * batadv_v_orig_print_neigh - print neighbors for the originator table
103 * @orig_node: the orig_node for which the neighbors are printed
104 * @if_outgoing: outgoing interface for these entries
105 * @seq: debugfs table seq_file struct
106 *
107 * Must be called while holding an rcu lock.
108 */
109static void
110batadv_v_orig_print_neigh(struct batadv_orig_node *orig_node,
111 struct batadv_hard_iface *if_outgoing,
112 struct seq_file *seq)
113{
114 struct batadv_neigh_node *neigh_node;
115 struct batadv_neigh_ifinfo *n_ifinfo;
116
117 hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) {
118 n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
119 if (!n_ifinfo)
120 continue;
121
122 seq_printf(seq, " %pM (%9u.%1u)",
123 neigh_node->addr,
124 n_ifinfo->bat_v.throughput / 10,
125 n_ifinfo->bat_v.throughput % 10);
126
127 batadv_neigh_ifinfo_put(n_ifinfo);
128 }
129}
130
131/**
132 * batadv_v_hardif_neigh_print - print a single ELP neighbour node
133 * @seq: neighbour table seq_file struct
134 * @hardif_neigh: hardif neighbour information
135 */
136static void
137batadv_v_hardif_neigh_print(struct seq_file *seq,
138 struct batadv_hardif_neigh_node *hardif_neigh)
139{
140 int last_secs, last_msecs;
141 u32 throughput;
142
143 last_secs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen) / 1000;
144 last_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen) % 1000;
145 throughput = ewma_throughput_read(&hardif_neigh->bat_v.throughput);
146
147 seq_printf(seq, "%pM %4i.%03is (%9u.%1u) [%10s]\n",
148 hardif_neigh->addr, last_secs, last_msecs, throughput / 10,
149 throughput % 10, hardif_neigh->if_incoming->net_dev->name);
150}
151
152/**
153 * batadv_v_neigh_print - print the single hop neighbour list
154 * @bat_priv: the bat priv with all the soft interface information
155 * @seq: neighbour table seq_file struct
156 */
157static void batadv_v_neigh_print(struct batadv_priv *bat_priv,
158 struct seq_file *seq)
159{
160 struct net_device *net_dev = (struct net_device *)seq->private;
161 struct batadv_hardif_neigh_node *hardif_neigh;
162 struct batadv_hard_iface *hard_iface;
163 int batman_count = 0;
164
165 seq_printf(seq, " %-15s %s (%11s) [%10s]\n", "Neighbor",
166 "last-seen", "throughput", "IF");
167
168 rcu_read_lock();
169 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
170 if (hard_iface->soft_iface != net_dev)
171 continue;
172
173 hlist_for_each_entry_rcu(hardif_neigh,
174 &hard_iface->neigh_list, list) {
175 batadv_v_hardif_neigh_print(seq, hardif_neigh);
176 batman_count++;
177 }
178 }
179 rcu_read_unlock();
180
181 if (batman_count == 0)
182 seq_puts(seq, "No batman nodes in range ...\n");
183}
184
185/**
186 * batadv_v_orig_print - print the originator table
187 * @bat_priv: the bat priv with all the soft interface information
188 * @seq: debugfs table seq_file struct
189 * @if_outgoing: the outgoing interface for which this should be printed
190 */
191static void batadv_v_orig_print(struct batadv_priv *bat_priv,
192 struct seq_file *seq,
193 struct batadv_hard_iface *if_outgoing)
194{
195 struct batadv_neigh_node *neigh_node;
196 struct batadv_hashtable *hash = bat_priv->orig_hash;
197 int last_seen_msecs, last_seen_secs;
198 struct batadv_orig_node *orig_node;
199 struct batadv_neigh_ifinfo *n_ifinfo;
200 unsigned long last_seen_jiffies;
201 struct hlist_head *head;
202 int batman_count = 0;
203 u32 i;
204
205 seq_printf(seq, " %-15s %s (%11s) %17s [%10s]: %20s ...\n",
206 "Originator", "last-seen", "throughput", "Nexthop",
207 "outgoingIF", "Potential nexthops");
208
209 for (i = 0; i < hash->size; i++) {
210 head = &hash->table[i];
211
212 rcu_read_lock();
213 hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
214 neigh_node = batadv_orig_router_get(orig_node,
215 if_outgoing);
216 if (!neigh_node)
217 continue;
218
219 n_ifinfo = batadv_neigh_ifinfo_get(neigh_node,
220 if_outgoing);
221 if (!n_ifinfo)
222 goto next;
223
224 last_seen_jiffies = jiffies - orig_node->last_seen;
225 last_seen_msecs = jiffies_to_msecs(last_seen_jiffies);
226 last_seen_secs = last_seen_msecs / 1000;
227 last_seen_msecs = last_seen_msecs % 1000;
228
229 seq_printf(seq, "%pM %4i.%03is (%9u.%1u) %pM [%10s]:",
230 orig_node->orig, last_seen_secs,
231 last_seen_msecs,
232 n_ifinfo->bat_v.throughput / 10,
233 n_ifinfo->bat_v.throughput % 10,
234 neigh_node->addr,
235 neigh_node->if_incoming->net_dev->name);
236
237 batadv_v_orig_print_neigh(orig_node, if_outgoing, seq);
238 seq_puts(seq, "\n");
239 batman_count++;
240
241next:
242 batadv_neigh_node_put(neigh_node);
243 if (n_ifinfo)
244 batadv_neigh_ifinfo_put(n_ifinfo);
245 }
246 rcu_read_unlock();
247 }
248
249 if (batman_count == 0)
250 seq_puts(seq, "No batman nodes in range ...\n");
251}
252
253static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1,
254 struct batadv_hard_iface *if_outgoing1,
255 struct batadv_neigh_node *neigh2,
256 struct batadv_hard_iface *if_outgoing2)
257{
258 struct batadv_neigh_ifinfo *ifinfo1, *ifinfo2;
259
260 ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
261 ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
262
263 if (WARN_ON(!ifinfo1 || !ifinfo2))
264 return 0;
265
266 return ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput;
267}
268
269static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1,
270 struct batadv_hard_iface *if_outgoing1,
271 struct batadv_neigh_node *neigh2,
272 struct batadv_hard_iface *if_outgoing2)
273{
274 struct batadv_neigh_ifinfo *ifinfo1, *ifinfo2;
275 u32 threshold;
276
277 ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
278 ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
279
280 threshold = ifinfo1->bat_v.throughput / 4;
281 threshold = ifinfo1->bat_v.throughput - threshold;
282
283 return ifinfo2->bat_v.throughput > threshold;
284}
285
286static struct batadv_algo_ops batadv_batman_v __read_mostly = {
287 .name = "BATMAN_V",
288 .bat_iface_activate = batadv_v_iface_activate,
289 .bat_iface_enable = batadv_v_iface_enable,
290 .bat_iface_disable = batadv_v_iface_disable,
291 .bat_iface_update_mac = batadv_v_iface_update_mac,
292 .bat_primary_iface_set = batadv_v_primary_iface_set,
293 .bat_hardif_neigh_init = batadv_v_hardif_neigh_init,
294 .bat_ogm_emit = batadv_v_ogm_emit,
295 .bat_ogm_schedule = batadv_v_ogm_schedule,
296 .bat_orig_print = batadv_v_orig_print,
297 .bat_neigh_cmp = batadv_v_neigh_cmp,
298 .bat_neigh_is_similar_or_better = batadv_v_neigh_is_sob,
299 .bat_neigh_print = batadv_v_neigh_print,
300};
301
302/**
303 * batadv_v_mesh_init - initialize the B.A.T.M.A.N. V private resources for a
304 * mesh
305 * @bat_priv: the object representing the mesh interface to initialise
306 *
307 * Return: 0 on success or a negative error code otherwise
308 */
309int batadv_v_mesh_init(struct batadv_priv *bat_priv)
310{
311 return batadv_v_ogm_init(bat_priv);
312}
313
314/**
315 * batadv_v_mesh_free - free the B.A.T.M.A.N. V private resources for a mesh
316 * @bat_priv: the object representing the mesh interface to free
317 */
318void batadv_v_mesh_free(struct batadv_priv *bat_priv)
319{
320 batadv_v_ogm_free(bat_priv);
321}
322
323/**
324 * batadv_v_init - B.A.T.M.A.N. V initialization function
325 *
326 * Description: Takes care of initializing all the subcomponents.
327 * It is invoked upon module load only.
328 *
329 * Return: 0 on success or a negative error code otherwise
330 */
331int __init batadv_v_init(void)
332{
333 int ret;
334
335 /* B.A.T.M.A.N. V echo location protocol packet */
336 ret = batadv_recv_handler_register(BATADV_ELP,
337 batadv_v_elp_packet_recv);
338 if (ret < 0)
339 return ret;
340
341 ret = batadv_recv_handler_register(BATADV_OGM2,
342 batadv_v_ogm_packet_recv);
343 if (ret < 0)
344 goto elp_unregister;
345
346 ret = batadv_algo_register(&batadv_batman_v);
347 if (ret < 0)
348 goto ogm_unregister;
349
350 return ret;
351
352ogm_unregister:
353 batadv_recv_handler_unregister(BATADV_OGM2);
354
355elp_unregister:
356 batadv_recv_handler_unregister(BATADV_ELP);
357
358 return ret;
359}
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
new file mode 100644
index 000000000000..3844e7efd0b0
--- /dev/null
+++ b/net/batman-adv/bat_v_elp.c
@@ -0,0 +1,515 @@
1/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors:
2 *
3 * Linus Lüssing, Marek Lindner
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "bat_v_elp.h"
19#include "main.h"
20
21#include <linux/atomic.h>
22#include <linux/byteorder/generic.h>
23#include <linux/errno.h>
24#include <linux/etherdevice.h>
25#include <linux/ethtool.h>
26#include <linux/fs.h>
27#include <linux/if_ether.h>
28#include <linux/jiffies.h>
29#include <linux/kernel.h>
30#include <linux/kref.h>
31#include <linux/netdevice.h>
32#include <linux/random.h>
33#include <linux/rculist.h>
34#include <linux/rcupdate.h>
35#include <linux/rtnetlink.h>
36#include <linux/skbuff.h>
37#include <linux/stddef.h>
38#include <linux/string.h>
39#include <linux/types.h>
40#include <linux/workqueue.h>
41#include <net/cfg80211.h>
42
43#include "bat_algo.h"
44#include "bat_v_ogm.h"
45#include "hard-interface.h"
46#include "originator.h"
47#include "packet.h"
48#include "routing.h"
49#include "send.h"
50
51/**
52 * batadv_v_elp_start_timer - restart timer for ELP periodic work
53 * @hard_iface: the interface for which the timer has to be reset
54 */
55static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface)
56{
57 unsigned int msecs;
58
59 msecs = atomic_read(&hard_iface->bat_v.elp_interval) - BATADV_JITTER;
60 msecs += prandom_u32() % (2 * BATADV_JITTER);
61
62 queue_delayed_work(batadv_event_workqueue, &hard_iface->bat_v.elp_wq,
63 msecs_to_jiffies(msecs));
64}
65
66/**
67 * batadv_v_elp_get_throughput - get the throughput towards a neighbour
68 * @neigh: the neighbour for which the throughput has to be obtained
69 *
70 * Return: The throughput towards the given neighbour in multiples of 100kpbs
71 * (a value of '1' equals to 0.1Mbps, '10' equals 1Mbps, etc).
72 */
73static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
74{
75 struct batadv_hard_iface *hard_iface = neigh->if_incoming;
76 struct ethtool_link_ksettings link_settings;
77 struct station_info sinfo;
78 u32 throughput;
79 int ret;
80
81 /* if the user specified a customised value for this interface, then
82 * return it directly
83 */
84 throughput = atomic_read(&hard_iface->bat_v.throughput_override);
85 if (throughput != 0)
86 return throughput;
87
88 /* if this is a wireless device, then ask its throughput through
89 * cfg80211 API
90 */
91 if (batadv_is_wifi_netdev(hard_iface->net_dev)) {
92 if (hard_iface->net_dev->ieee80211_ptr) {
93 ret = cfg80211_get_station(hard_iface->net_dev,
94 neigh->addr, &sinfo);
95 if (ret == -ENOENT) {
96 /* Node is not associated anymore! It would be
97 * possible to delete this neighbor. For now set
98 * the throughput metric to 0.
99 */
100 return 0;
101 }
102 if (!ret)
103 return sinfo.expected_throughput / 100;
104 }
105
106 /* unsupported WiFi driver version */
107 goto default_throughput;
108 }
109
110 /* if not a wifi interface, check if this device provides data via
111 * ethtool (e.g. an Ethernet adapter)
112 */
113 memset(&link_settings, 0, sizeof(link_settings));
114 rtnl_lock();
115 ret = __ethtool_get_link_ksettings(hard_iface->net_dev, &link_settings);
116 rtnl_unlock();
117 if (ret == 0) {
118 /* link characteristics might change over time */
119 if (link_settings.base.duplex == DUPLEX_FULL)
120 hard_iface->bat_v.flags |= BATADV_FULL_DUPLEX;
121 else
122 hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX;
123
124 throughput = link_settings.base.speed;
125 if (throughput && (throughput != SPEED_UNKNOWN))
126 return throughput * 10;
127 }
128
129default_throughput:
130 if (!(hard_iface->bat_v.flags & BATADV_WARNING_DEFAULT)) {
131 batadv_info(hard_iface->soft_iface,
132 "WiFi driver or ethtool info does not provide information about link speeds on interface %s, therefore defaulting to hardcoded throughput values of %u.%1u Mbps. Consider overriding the throughput manually or checking your driver.\n",
133 hard_iface->net_dev->name,
134 BATADV_THROUGHPUT_DEFAULT_VALUE / 10,
135 BATADV_THROUGHPUT_DEFAULT_VALUE % 10);
136 hard_iface->bat_v.flags |= BATADV_WARNING_DEFAULT;
137 }
138
139 /* if none of the above cases apply, return the base_throughput */
140 return BATADV_THROUGHPUT_DEFAULT_VALUE;
141}
142
143/**
144 * batadv_v_elp_throughput_metric_update - worker updating the throughput metric
145 * of a single hop neighbour
146 * @work: the work queue item
147 */
148void batadv_v_elp_throughput_metric_update(struct work_struct *work)
149{
150 struct batadv_hardif_neigh_node_bat_v *neigh_bat_v;
151 struct batadv_hardif_neigh_node *neigh;
152
153 neigh_bat_v = container_of(work, struct batadv_hardif_neigh_node_bat_v,
154 metric_work);
155 neigh = container_of(neigh_bat_v, struct batadv_hardif_neigh_node,
156 bat_v);
157
158 ewma_throughput_add(&neigh->bat_v.throughput,
159 batadv_v_elp_get_throughput(neigh));
160
161 /* decrement refcounter to balance increment performed before scheduling
162 * this task
163 */
164 batadv_hardif_neigh_put(neigh);
165}
166
167/**
168 * batadv_v_elp_wifi_neigh_probe - send link probing packets to a neighbour
169 * @neigh: the neighbour to probe
170 *
171 * Sends a predefined number of unicast wifi packets to a given neighbour in
172 * order to trigger the throughput estimation on this link by the RC algorithm.
173 * Packets are sent only if there there is not enough payload unicast traffic
174 * towards this neighbour..
175 *
176 * Return: True on success and false in case of error during skb preparation.
177 */
178static bool
179batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh)
180{
181 struct batadv_hard_iface *hard_iface = neigh->if_incoming;
182 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
183 unsigned long last_tx_diff;
184 struct sk_buff *skb;
185 int probe_len, i;
186 int elp_skb_len;
187
188 /* this probing routine is for Wifi neighbours only */
189 if (!batadv_is_wifi_netdev(hard_iface->net_dev))
190 return true;
191
192 /* probe the neighbor only if no unicast packets have been sent
193 * to it in the last 100 milliseconds: this is the rate control
194 * algorithm sampling interval (minstrel). In this way, if not
195 * enough traffic has been sent to the neighbor, batman-adv can
196 * generate 2 probe packets and push the RC algorithm to perform
197 * the sampling
198 */
199 last_tx_diff = jiffies_to_msecs(jiffies - neigh->bat_v.last_unicast_tx);
200 if (last_tx_diff <= BATADV_ELP_PROBE_MAX_TX_DIFF)
201 return true;
202
203 probe_len = max_t(int, sizeof(struct batadv_elp_packet),
204 BATADV_ELP_MIN_PROBE_SIZE);
205
206 for (i = 0; i < BATADV_ELP_PROBES_PER_NODE; i++) {
207 elp_skb_len = hard_iface->bat_v.elp_skb->len;
208 skb = skb_copy_expand(hard_iface->bat_v.elp_skb, 0,
209 probe_len - elp_skb_len,
210 GFP_ATOMIC);
211 if (!skb)
212 return false;
213
214 /* Tell the skb to get as big as the allocated space (we want
215 * the packet to be exactly of that size to make the link
216 * throughput estimation effective.
217 */
218 skb_put(skb, probe_len - hard_iface->bat_v.elp_skb->len);
219
220 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
221 "Sending unicast (probe) ELP packet on interface %s to %pM\n",
222 hard_iface->net_dev->name, neigh->addr);
223
224 batadv_send_skb_packet(skb, hard_iface, neigh->addr);
225 }
226
227 return true;
228}
229
230/**
231 * batadv_v_elp_periodic_work - ELP periodic task per interface
232 * @work: work queue item
233 *
234 * Emits broadcast ELP message in regular intervals.
235 */
236static void batadv_v_elp_periodic_work(struct work_struct *work)
237{
238 struct batadv_hardif_neigh_node *hardif_neigh;
239 struct batadv_hard_iface *hard_iface;
240 struct batadv_hard_iface_bat_v *bat_v;
241 struct batadv_elp_packet *elp_packet;
242 struct batadv_priv *bat_priv;
243 struct sk_buff *skb;
244 u32 elp_interval;
245
246 bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work);
247 hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v);
248 bat_priv = netdev_priv(hard_iface->soft_iface);
249
250 if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
251 goto out;
252
253 /* we are in the process of shutting this interface down */
254 if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) ||
255 (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED))
256 goto out;
257
258 /* the interface was enabled but may not be ready yet */
259 if (hard_iface->if_status != BATADV_IF_ACTIVE)
260 goto restart_timer;
261
262 skb = skb_copy(hard_iface->bat_v.elp_skb, GFP_ATOMIC);
263 if (!skb)
264 goto restart_timer;
265
266 elp_packet = (struct batadv_elp_packet *)skb->data;
267 elp_packet->seqno = htonl(atomic_read(&hard_iface->bat_v.elp_seqno));
268 elp_interval = atomic_read(&hard_iface->bat_v.elp_interval);
269 elp_packet->elp_interval = htonl(elp_interval);
270
271 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
272 "Sending broadcast ELP packet on interface %s, seqno %u\n",
273 hard_iface->net_dev->name,
274 atomic_read(&hard_iface->bat_v.elp_seqno));
275
276 batadv_send_broadcast_skb(skb, hard_iface);
277
278 atomic_inc(&hard_iface->bat_v.elp_seqno);
279
280 /* The throughput metric is updated on each sent packet. This way, if a
281 * node is dead and no longer sends packets, batman-adv is still able to
282 * react timely to its death.
283 *
284 * The throughput metric is updated by following these steps:
285 * 1) if the hard_iface is wifi => send a number of unicast ELPs for
286 * probing/sampling to each neighbor
287 * 2) update the throughput metric value of each neighbor (note that the
288 * value retrieved in this step might be 100ms old because the
289 * probing packets at point 1) could still be in the HW queue)
290 */
291 rcu_read_lock();
292 hlist_for_each_entry_rcu(hardif_neigh, &hard_iface->neigh_list, list) {
293 if (!batadv_v_elp_wifi_neigh_probe(hardif_neigh))
294 /* if something goes wrong while probing, better to stop
295 * sending packets immediately and reschedule the task
296 */
297 break;
298
299 if (!kref_get_unless_zero(&hardif_neigh->refcount))
300 continue;
301
302 /* Reading the estimated throughput from cfg80211 is a task that
303 * may sleep and that is not allowed in an rcu protected
304 * context. Therefore schedule a task for that.
305 */
306 queue_work(batadv_event_workqueue,
307 &hardif_neigh->bat_v.metric_work);
308 }
309 rcu_read_unlock();
310
311restart_timer:
312 batadv_v_elp_start_timer(hard_iface);
313out:
314 return;
315}
316
317/**
318 * batadv_v_elp_iface_enable - setup the ELP interface private resources
319 * @hard_iface: interface for which the data has to be prepared
320 *
321 * Return: 0 on success or a -ENOMEM in case of failure.
322 */
323int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface)
324{
325 struct batadv_elp_packet *elp_packet;
326 unsigned char *elp_buff;
327 u32 random_seqno;
328 size_t size;
329 int res = -ENOMEM;
330
331 size = ETH_HLEN + NET_IP_ALIGN + BATADV_ELP_HLEN;
332 hard_iface->bat_v.elp_skb = dev_alloc_skb(size);
333 if (!hard_iface->bat_v.elp_skb)
334 goto out;
335
336 skb_reserve(hard_iface->bat_v.elp_skb, ETH_HLEN + NET_IP_ALIGN);
337 elp_buff = skb_push(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN);
338 elp_packet = (struct batadv_elp_packet *)elp_buff;
339 memset(elp_packet, 0, BATADV_ELP_HLEN);
340
341 elp_packet->packet_type = BATADV_ELP;
342 elp_packet->version = BATADV_COMPAT_VERSION;
343
344 /* randomize initial seqno to avoid collision */
345 get_random_bytes(&random_seqno, sizeof(random_seqno));
346 atomic_set(&hard_iface->bat_v.elp_seqno, random_seqno);
347 atomic_set(&hard_iface->bat_v.elp_interval, 500);
348
349 /* assume full-duplex by default */
350 hard_iface->bat_v.flags |= BATADV_FULL_DUPLEX;
351
352 /* warn the user (again) if there is no throughput data is available */
353 hard_iface->bat_v.flags &= ~BATADV_WARNING_DEFAULT;
354
355 if (batadv_is_wifi_netdev(hard_iface->net_dev))
356 hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX;
357
358 INIT_DELAYED_WORK(&hard_iface->bat_v.elp_wq,
359 batadv_v_elp_periodic_work);
360 batadv_v_elp_start_timer(hard_iface);
361 res = 0;
362
363out:
364 return res;
365}
366
367/**
368 * batadv_v_elp_iface_disable - release ELP interface private resources
369 * @hard_iface: interface for which the resources have to be released
370 */
371void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface)
372{
373 cancel_delayed_work_sync(&hard_iface->bat_v.elp_wq);
374
375 dev_kfree_skb(hard_iface->bat_v.elp_skb);
376 hard_iface->bat_v.elp_skb = NULL;
377}
378
379/**
380 * batadv_v_elp_primary_iface_set - change internal data to reflect the new
381 * primary interface
382 * @primary_iface: the new primary interface
383 */
384void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface)
385{
386 struct batadv_hard_iface *hard_iface;
387 struct batadv_elp_packet *elp_packet;
388 struct sk_buff *skb;
389
390 /* update orig field of every elp iface belonging to this mesh */
391 rcu_read_lock();
392 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
393 if (primary_iface->soft_iface != hard_iface->soft_iface)
394 continue;
395
396 if (!hard_iface->bat_v.elp_skb)
397 continue;
398
399 skb = hard_iface->bat_v.elp_skb;
400 elp_packet = (struct batadv_elp_packet *)skb->data;
401 ether_addr_copy(elp_packet->orig,
402 primary_iface->net_dev->dev_addr);
403 }
404 rcu_read_unlock();
405}
406
407/**
408 * batadv_v_elp_neigh_update - update an ELP neighbour node
409 * @bat_priv: the bat priv with all the soft interface information
410 * @neigh_addr: the neighbour interface address
411 * @if_incoming: the interface the packet was received through
412 * @elp_packet: the received ELP packet
413 *
414 * Updates the ELP neighbour node state with the data received within the new
415 * ELP packet.
416 */
417static void batadv_v_elp_neigh_update(struct batadv_priv *bat_priv,
418 u8 *neigh_addr,
419 struct batadv_hard_iface *if_incoming,
420 struct batadv_elp_packet *elp_packet)
421
422{
423 struct batadv_neigh_node *neigh;
424 struct batadv_orig_node *orig_neigh;
425 struct batadv_hardif_neigh_node *hardif_neigh;
426 s32 seqno_diff;
427 s32 elp_latest_seqno;
428
429 orig_neigh = batadv_v_ogm_orig_get(bat_priv, elp_packet->orig);
430 if (!orig_neigh)
431 return;
432
433 neigh = batadv_neigh_node_new(orig_neigh, if_incoming, neigh_addr);
434 if (!neigh)
435 goto orig_free;
436
437 hardif_neigh = batadv_hardif_neigh_get(if_incoming, neigh_addr);
438 if (!hardif_neigh)
439 goto neigh_free;
440
441 elp_latest_seqno = hardif_neigh->bat_v.elp_latest_seqno;
442 seqno_diff = ntohl(elp_packet->seqno) - elp_latest_seqno;
443
444 /* known or older sequence numbers are ignored. However always adopt
445 * if the router seems to have been restarted.
446 */
447 if (seqno_diff < 1 && seqno_diff > -BATADV_ELP_MAX_AGE)
448 goto hardif_free;
449
450 neigh->last_seen = jiffies;
451 hardif_neigh->last_seen = jiffies;
452 hardif_neigh->bat_v.elp_latest_seqno = ntohl(elp_packet->seqno);
453 hardif_neigh->bat_v.elp_interval = ntohl(elp_packet->elp_interval);
454
455hardif_free:
456 if (hardif_neigh)
457 batadv_hardif_neigh_put(hardif_neigh);
458neigh_free:
459 if (neigh)
460 batadv_neigh_node_put(neigh);
461orig_free:
462 if (orig_neigh)
463 batadv_orig_node_put(orig_neigh);
464}
465
466/**
467 * batadv_v_elp_packet_recv - main ELP packet handler
468 * @skb: the received packet
469 * @if_incoming: the interface this packet was received through
470 *
471 * Return: NET_RX_SUCCESS and consumes the skb if the packet was peoperly
472 * processed or NET_RX_DROP in case of failure.
473 */
474int batadv_v_elp_packet_recv(struct sk_buff *skb,
475 struct batadv_hard_iface *if_incoming)
476{
477 struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
478 struct batadv_elp_packet *elp_packet;
479 struct batadv_hard_iface *primary_if;
480 struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
481 bool ret;
482
483 ret = batadv_check_management_packet(skb, if_incoming, BATADV_ELP_HLEN);
484 if (!ret)
485 return NET_RX_DROP;
486
487 if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
488 return NET_RX_DROP;
489
490 /* did we receive a B.A.T.M.A.N. V ELP packet on an interface
491 * that does not have B.A.T.M.A.N. V ELP enabled ?
492 */
493 if (strcmp(bat_priv->bat_algo_ops->name, "BATMAN_V") != 0)
494 return NET_RX_DROP;
495
496 elp_packet = (struct batadv_elp_packet *)skb->data;
497
498 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
499 "Received ELP packet from %pM seqno %u ORIG: %pM\n",
500 ethhdr->h_source, ntohl(elp_packet->seqno),
501 elp_packet->orig);
502
503 primary_if = batadv_primary_if_get_selected(bat_priv);
504 if (!primary_if)
505 goto out;
506
507 batadv_v_elp_neigh_update(bat_priv, ethhdr->h_source, if_incoming,
508 elp_packet);
509
510out:
511 if (primary_if)
512 batadv_hardif_put(primary_if);
513 consume_skb(skb);
514 return NET_RX_SUCCESS;
515}
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
new file mode 100644
index 000000000000..e95f1bca0785
--- /dev/null
+++ b/net/batman-adv/bat_v_elp.h
@@ -0,0 +1,33 @@
1/* Copyright (C) 2013-2016 B.A.T.M.A.N. contributors:
2 *
3 * Linus Lüssing, Marek Lindner
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "main.h"
19
20#ifndef _NET_BATMAN_ADV_BAT_V_ELP_H_
21#define _NET_BATMAN_ADV_BAT_V_ELP_H_
22
23struct sk_buff;
24struct work_struct;
25
26int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface);
27void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface);
28void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface);
29int batadv_v_elp_packet_recv(struct sk_buff *skb,
30 struct batadv_hard_iface *if_incoming);
31void batadv_v_elp_throughput_metric_update(struct work_struct *work);
32
33#endif /* _NET_BATMAN_ADV_BAT_V_ELP_H_ */
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
new file mode 100644
index 000000000000..d9bcbe6e7d65
--- /dev/null
+++ b/net/batman-adv/bat_v_ogm.c
@@ -0,0 +1,833 @@
1/* Copyright (C) 2013-2016 B.A.T.M.A.N. contributors:
2 *
3 * Antonio Quartulli
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "bat_v_ogm.h"
19#include "main.h"
20
21#include <linux/atomic.h>
22#include <linux/byteorder/generic.h>
23#include <linux/errno.h>
24#include <linux/etherdevice.h>
25#include <linux/fs.h>
26#include <linux/if_ether.h>
27#include <linux/jiffies.h>
28#include <linux/kernel.h>
29#include <linux/list.h>
30#include <linux/netdevice.h>
31#include <linux/random.h>
32#include <linux/rculist.h>
33#include <linux/rcupdate.h>
34#include <linux/skbuff.h>
35#include <linux/slab.h>
36#include <linux/stddef.h>
37#include <linux/string.h>
38#include <linux/types.h>
39#include <linux/workqueue.h>
40
41#include "hard-interface.h"
42#include "hash.h"
43#include "originator.h"
44#include "packet.h"
45#include "routing.h"
46#include "send.h"
47#include "translation-table.h"
48
49/**
50 * batadv_v_ogm_orig_get - retrieve and possibly create an originator node
51 * @bat_priv: the bat priv with all the soft interface information
52 * @addr: the address of the originator
53 *
54 * Return: the orig_node corresponding to the specified address. If such object
55 * does not exist it is allocated here. In case of allocation failure returns
56 * NULL.
57 */
58struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv,
59 const u8 *addr)
60{
61 struct batadv_orig_node *orig_node;
62 int hash_added;
63
64 orig_node = batadv_orig_hash_find(bat_priv, addr);
65 if (orig_node)
66 return orig_node;
67
68 orig_node = batadv_orig_node_new(bat_priv, addr);
69 if (!orig_node)
70 return NULL;
71
72 hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig,
73 batadv_choose_orig, orig_node,
74 &orig_node->hash_entry);
75 if (hash_added != 0) {
76 /* orig_node->refcounter is initialised to 2 by
77 * batadv_orig_node_new()
78 */
79 batadv_orig_node_put(orig_node);
80 batadv_orig_node_put(orig_node);
81 orig_node = NULL;
82 }
83
84 return orig_node;
85}
86
87/**
88 * batadv_v_ogm_start_timer - restart the OGM sending timer
89 * @bat_priv: the bat priv with all the soft interface information
90 */
91static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv)
92{
93 unsigned long msecs;
94 /* this function may be invoked in different contexts (ogm rescheduling
95 * or hard_iface activation), but the work timer should not be reset
96 */
97 if (delayed_work_pending(&bat_priv->bat_v.ogm_wq))
98 return;
99
100 msecs = atomic_read(&bat_priv->orig_interval) - BATADV_JITTER;
101 msecs += prandom_u32() % (2 * BATADV_JITTER);
102 queue_delayed_work(batadv_event_workqueue, &bat_priv->bat_v.ogm_wq,
103 msecs_to_jiffies(msecs));
104}
105
106/**
107 * batadv_v_ogm_send_to_if - send a batman ogm using a given interface
108 * @skb: the OGM to send
109 * @hard_iface: the interface to use to send the OGM
110 */
111static void batadv_v_ogm_send_to_if(struct sk_buff *skb,
112 struct batadv_hard_iface *hard_iface)
113{
114 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
115
116 if (hard_iface->if_status != BATADV_IF_ACTIVE)
117 return;
118
119 batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_TX);
120 batadv_add_counter(bat_priv, BATADV_CNT_MGMT_TX_BYTES,
121 skb->len + ETH_HLEN);
122
123 batadv_send_broadcast_skb(skb, hard_iface);
124}
125
126/**
127 * batadv_v_ogm_send - periodic worker broadcasting the own OGM
128 * @work: work queue item
129 */
130static void batadv_v_ogm_send(struct work_struct *work)
131{
132 struct batadv_hard_iface *hard_iface;
133 struct batadv_priv_bat_v *bat_v;
134 struct batadv_priv *bat_priv;
135 struct batadv_ogm2_packet *ogm_packet;
136 struct sk_buff *skb, *skb_tmp;
137 unsigned char *ogm_buff, *pkt_buff;
138 int ogm_buff_len;
139 u16 tvlv_len = 0;
140
141 bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work);
142 bat_priv = container_of(bat_v, struct batadv_priv, bat_v);
143
144 if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
145 goto out;
146
147 ogm_buff = bat_priv->bat_v.ogm_buff;
148 ogm_buff_len = bat_priv->bat_v.ogm_buff_len;
149 /* tt changes have to be committed before the tvlv data is
150 * appended as it may alter the tt tvlv container
151 */
152 batadv_tt_local_commit_changes(bat_priv);
153 tvlv_len = batadv_tvlv_container_ogm_append(bat_priv, &ogm_buff,
154 &ogm_buff_len,
155 BATADV_OGM2_HLEN);
156
157 bat_priv->bat_v.ogm_buff = ogm_buff;
158 bat_priv->bat_v.ogm_buff_len = ogm_buff_len;
159
160 skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + ogm_buff_len);
161 if (!skb)
162 goto reschedule;
163
164 skb_reserve(skb, ETH_HLEN);
165 pkt_buff = skb_put(skb, ogm_buff_len);
166 memcpy(pkt_buff, ogm_buff, ogm_buff_len);
167
168 ogm_packet = (struct batadv_ogm2_packet *)skb->data;
169 ogm_packet->seqno = htonl(atomic_read(&bat_priv->bat_v.ogm_seqno));
170 atomic_inc(&bat_priv->bat_v.ogm_seqno);
171 ogm_packet->tvlv_len = htons(tvlv_len);
172
173 /* broadcast on every interface */
174 rcu_read_lock();
175 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
176 if (hard_iface->soft_iface != bat_priv->soft_iface)
177 continue;
178
179 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
180 "Sending own OGM2 packet (originator %pM, seqno %u, throughput %u, TTL %d) on interface %s [%pM]\n",
181 ogm_packet->orig, ntohl(ogm_packet->seqno),
182 ntohl(ogm_packet->throughput), ogm_packet->ttl,
183 hard_iface->net_dev->name,
184 hard_iface->net_dev->dev_addr);
185
186 /* this skb gets consumed by batadv_v_ogm_send_to_if() */
187 skb_tmp = skb_clone(skb, GFP_ATOMIC);
188 if (!skb_tmp)
189 break;
190
191 batadv_v_ogm_send_to_if(skb_tmp, hard_iface);
192 }
193 rcu_read_unlock();
194
195 consume_skb(skb);
196
197reschedule:
198 batadv_v_ogm_start_timer(bat_priv);
199out:
200 return;
201}
202
203/**
204 * batadv_v_ogm_iface_enable - prepare an interface for B.A.T.M.A.N. V
205 * @hard_iface: the interface to prepare
206 *
207 * Takes care of scheduling own OGM sending routine for this interface.
208 *
209 * Return: 0 on success or a negative error code otherwise
210 */
211int batadv_v_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
212{
213 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
214
215 batadv_v_ogm_start_timer(bat_priv);
216
217 return 0;
218}
219
220/**
221 * batadv_v_ogm_primary_iface_set - set a new primary interface
222 * @primary_iface: the new primary interface
223 */
224void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface)
225{
226 struct batadv_priv *bat_priv = netdev_priv(primary_iface->soft_iface);
227 struct batadv_ogm2_packet *ogm_packet;
228
229 if (!bat_priv->bat_v.ogm_buff)
230 return;
231
232 ogm_packet = (struct batadv_ogm2_packet *)bat_priv->bat_v.ogm_buff;
233 ether_addr_copy(ogm_packet->orig, primary_iface->net_dev->dev_addr);
234}
235
236/**
237 * batadv_v_ogm_orig_update - update the originator status based on the received
238 * OGM
239 * @bat_priv: the bat priv with all the soft interface information
240 * @orig_node: the originator to update
241 * @neigh_node: the neighbour the OGM has been received from (to update)
242 * @ogm2: the received OGM
243 * @if_outgoing: the interface where this OGM is going to be forwarded through
244 */
245static void
246batadv_v_ogm_orig_update(struct batadv_priv *bat_priv,
247 struct batadv_orig_node *orig_node,
248 struct batadv_neigh_node *neigh_node,
249 const struct batadv_ogm2_packet *ogm2,
250 struct batadv_hard_iface *if_outgoing)
251{
252 struct batadv_neigh_ifinfo *router_ifinfo = NULL, *neigh_ifinfo = NULL;
253 struct batadv_neigh_node *router = NULL;
254 s32 neigh_seq_diff;
255 u32 neigh_last_seqno;
256 u32 router_last_seqno;
257 u32 router_throughput, neigh_throughput;
258
259 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
260 "Searching and updating originator entry of received packet\n");
261
262 /* if this neighbor already is our next hop there is nothing
263 * to change
264 */
265 router = batadv_orig_router_get(orig_node, if_outgoing);
266 if (router == neigh_node)
267 goto out;
268
269 /* don't consider neighbours with worse throughput.
270 * also switch route if this seqno is BATADV_V_MAX_ORIGDIFF newer than
271 * the last received seqno from our best next hop.
272 */
273 if (router) {
274 router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing);
275 neigh_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
276
277 /* if these are not allocated, something is wrong. */
278 if (!router_ifinfo || !neigh_ifinfo)
279 goto out;
280
281 neigh_last_seqno = neigh_ifinfo->bat_v.last_seqno;
282 router_last_seqno = router_ifinfo->bat_v.last_seqno;
283 neigh_seq_diff = neigh_last_seqno - router_last_seqno;
284 router_throughput = router_ifinfo->bat_v.throughput;
285 neigh_throughput = neigh_ifinfo->bat_v.throughput;
286
287 if ((neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF) &&
288 (router_throughput >= neigh_throughput))
289 goto out;
290 }
291
292 batadv_update_route(bat_priv, orig_node, if_outgoing, neigh_node);
293
294out:
295 if (router_ifinfo)
296 batadv_neigh_ifinfo_put(router_ifinfo);
297 if (neigh_ifinfo)
298 batadv_neigh_ifinfo_put(neigh_ifinfo);
299 if (router)
300 batadv_neigh_node_put(router);
301}
302
303/**
304 * batadv_v_forward_penalty - apply a penalty to the throughput metric forwarded
305 * with B.A.T.M.A.N. V OGMs
306 * @bat_priv: the bat priv with all the soft interface information
307 * @if_incoming: the interface where the OGM has been received
308 * @if_outgoing: the interface where the OGM has to be forwarded to
309 * @throughput: the current throughput
310 *
311 * Apply a penalty on the current throughput metric value based on the
312 * characteristic of the interface where the OGM has been received. The return
313 * value is computed as follows:
314 * - throughput * 50% if the incoming and outgoing interface are the
315 * same WiFi interface and the throughput is above
316 * 1MBit/s
317 * - throughput if the outgoing interface is the default
318 * interface (i.e. this OGM is processed for the
319 * internal table and not forwarded)
320 * - throughput * hop penalty otherwise
321 *
322 * Return: the penalised throughput metric.
323 */
324static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv,
325 struct batadv_hard_iface *if_incoming,
326 struct batadv_hard_iface *if_outgoing,
327 u32 throughput)
328{
329 int hop_penalty = atomic_read(&bat_priv->hop_penalty);
330 int hop_penalty_max = BATADV_TQ_MAX_VALUE;
331
332 /* Don't apply hop penalty in default originator table. */
333 if (if_outgoing == BATADV_IF_DEFAULT)
334 return throughput;
335
336 /* Forwarding on the same WiFi interface cuts the throughput in half
337 * due to the store & forward characteristics of WIFI.
338 * Very low throughput values are the exception.
339 */
340 if ((throughput > 10) &&
341 (if_incoming == if_outgoing) &&
342 !(if_incoming->bat_v.flags & BATADV_FULL_DUPLEX))
343 return throughput / 2;
344
345 /* hop penalty of 255 equals 100% */
346 return throughput * (hop_penalty_max - hop_penalty) / hop_penalty_max;
347}
348
349/**
350 * batadv_v_ogm_forward - forward an OGM to the given outgoing interface
351 * @bat_priv: the bat priv with all the soft interface information
352 * @ogm_received: previously received OGM to be forwarded
353 * @throughput: throughput to announce, may vary per outgoing interface
354 * @if_incoming: the interface on which this OGM was received on
355 * @if_outgoing: the interface to which the OGM has to be forwarded to
356 *
357 * Forward an OGM to an interface after having altered the throughput metric and
358 * the TTL value contained in it. The original OGM isn't modified.
359 */
360static void batadv_v_ogm_forward(struct batadv_priv *bat_priv,
361 const struct batadv_ogm2_packet *ogm_received,
362 u32 throughput,
363 struct batadv_hard_iface *if_incoming,
364 struct batadv_hard_iface *if_outgoing)
365{
366 struct batadv_ogm2_packet *ogm_forward;
367 unsigned char *skb_buff;
368 struct sk_buff *skb;
369 size_t packet_len;
370 u16 tvlv_len;
371
372 if (ogm_received->ttl <= 1) {
373 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "ttl exceeded\n");
374 return;
375 }
376
377 tvlv_len = ntohs(ogm_received->tvlv_len);
378
379 packet_len = BATADV_OGM2_HLEN + tvlv_len;
380 skb = netdev_alloc_skb_ip_align(if_outgoing->net_dev,
381 ETH_HLEN + packet_len);
382 if (!skb)
383 return;
384
385 skb_reserve(skb, ETH_HLEN);
386 skb_buff = skb_put(skb, packet_len);
387 memcpy(skb_buff, ogm_received, packet_len);
388
389 /* apply forward penalty */
390 ogm_forward = (struct batadv_ogm2_packet *)skb_buff;
391 ogm_forward->throughput = htonl(throughput);
392 ogm_forward->ttl--;
393
394 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
395 "Forwarding OGM2 packet on %s: throughput %u, ttl %u, received via %s\n",
396 if_outgoing->net_dev->name, throughput, ogm_forward->ttl,
397 if_incoming->net_dev->name);
398
399 batadv_v_ogm_send_to_if(skb, if_outgoing);
400}
401
402/**
403 * batadv_v_ogm_metric_update - update route metric based on OGM
404 * @bat_priv: the bat priv with all the soft interface information
405 * @ogm2: OGM2 structure
406 * @orig_node: Originator structure for which the OGM has been received
407 * @neigh_node: the neigh_node through with the OGM has been received
408 * @if_incoming: the interface where this packet was received
409 * @if_outgoing: the interface for which the packet should be considered
410 *
411 * Return:
412 * 1 if the OGM is new,
413 * 0 if it is not new but valid,
414 * <0 on error (e.g. old OGM)
415 */
416static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv,
417 const struct batadv_ogm2_packet *ogm2,
418 struct batadv_orig_node *orig_node,
419 struct batadv_neigh_node *neigh_node,
420 struct batadv_hard_iface *if_incoming,
421 struct batadv_hard_iface *if_outgoing)
422{
423 struct batadv_orig_ifinfo *orig_ifinfo = NULL;
424 struct batadv_neigh_ifinfo *neigh_ifinfo = NULL;
425 bool protection_started = false;
426 int ret = -EINVAL;
427 u32 path_throughput;
428 s32 seq_diff;
429
430 orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing);
431 if (!orig_ifinfo)
432 goto out;
433
434 seq_diff = ntohl(ogm2->seqno) - orig_ifinfo->last_real_seqno;
435
436 if (!hlist_empty(&orig_node->neigh_list) &&
437 batadv_window_protected(bat_priv, seq_diff,
438 BATADV_OGM_MAX_AGE,
439 &orig_ifinfo->batman_seqno_reset,
440 &protection_started)) {
441 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
442 "Drop packet: packet within window protection time from %pM\n",
443 ogm2->orig);
444 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
445 "Last reset: %ld, %ld\n",
446 orig_ifinfo->batman_seqno_reset, jiffies);
447 goto out;
448 }
449
450 /* drop packets with old seqnos, however accept the first packet after
451 * a host has been rebooted.
452 */
453 if ((seq_diff < 0) && !protection_started)
454 goto out;
455
456 neigh_node->last_seen = jiffies;
457
458 orig_node->last_seen = jiffies;
459
460 orig_ifinfo->last_real_seqno = ntohl(ogm2->seqno);
461 orig_ifinfo->last_ttl = ogm2->ttl;
462
463 neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing);
464 if (!neigh_ifinfo)
465 goto out;
466
467 path_throughput = batadv_v_forward_penalty(bat_priv, if_incoming,
468 if_outgoing,
469 ntohl(ogm2->throughput));
470 neigh_ifinfo->bat_v.throughput = path_throughput;
471 neigh_ifinfo->bat_v.last_seqno = ntohl(ogm2->seqno);
472 neigh_ifinfo->last_ttl = ogm2->ttl;
473
474 if (seq_diff > 0 || protection_started)
475 ret = 1;
476 else
477 ret = 0;
478out:
479 if (orig_ifinfo)
480 batadv_orig_ifinfo_put(orig_ifinfo);
481 if (neigh_ifinfo)
482 batadv_neigh_ifinfo_put(neigh_ifinfo);
483
484 return ret;
485}
486
487/**
488 * batadv_v_ogm_route_update - update routes based on OGM
489 * @bat_priv: the bat priv with all the soft interface information
490 * @ethhdr: the Ethernet header of the OGM2
491 * @ogm2: OGM2 structure
492 * @orig_node: Originator structure for which the OGM has been received
493 * @neigh_node: the neigh_node through with the OGM has been received
494 * @if_incoming: the interface where this packet was received
495 * @if_outgoing: the interface for which the packet should be considered
496 */
497static void batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
498 const struct ethhdr *ethhdr,
499 const struct batadv_ogm2_packet *ogm2,
500 struct batadv_orig_node *orig_node,
501 struct batadv_neigh_node *neigh_node,
502 struct batadv_hard_iface *if_incoming,
503 struct batadv_hard_iface *if_outgoing)
504{
505 struct batadv_neigh_node *router = NULL;
506 struct batadv_neigh_ifinfo *neigh_ifinfo = NULL;
507 struct batadv_orig_node *orig_neigh_node = NULL;
508 struct batadv_orig_ifinfo *orig_ifinfo = NULL;
509 struct batadv_neigh_node *orig_neigh_router = NULL;
510
511 neigh_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
512 if (!neigh_ifinfo)
513 goto out;
514
515 orig_neigh_node = batadv_v_ogm_orig_get(bat_priv, ethhdr->h_source);
516 if (!orig_neigh_node)
517 goto out;
518
519 orig_neigh_router = batadv_orig_router_get(orig_neigh_node,
520 if_outgoing);
521
522 /* drop packet if sender is not a direct neighbor and if we
523 * don't route towards it
524 */
525 router = batadv_orig_router_get(orig_node, if_outgoing);
526 if (router && router->orig_node != orig_node && !orig_neigh_router) {
527 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
528 "Drop packet: OGM via unknown neighbor!\n");
529 goto out;
530 }
531
532 if (router)
533 batadv_neigh_node_put(router);
534
535 /* Update routes, and check if the OGM is from the best next hop */
536 batadv_v_ogm_orig_update(bat_priv, orig_node, neigh_node, ogm2,
537 if_outgoing);
538
539 orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing);
540 if (!orig_ifinfo)
541 goto out;
542
543 /* don't forward the same seqno twice on one interface */
544 if (orig_ifinfo->last_seqno_forwarded == ntohl(ogm2->seqno))
545 goto out;
546
547 /* acquire possibly updated router */
548 router = batadv_orig_router_get(orig_node, if_outgoing);
549
550 /* strict rule: forward packets coming from the best next hop only */
551 if (neigh_node != router)
552 goto out;
553
554 /* only forward for specific interface, not for the default one. */
555 if (if_outgoing != BATADV_IF_DEFAULT) {
556 orig_ifinfo->last_seqno_forwarded = ntohl(ogm2->seqno);
557 batadv_v_ogm_forward(bat_priv, ogm2,
558 neigh_ifinfo->bat_v.throughput,
559 if_incoming, if_outgoing);
560 }
561
562out:
563 if (orig_ifinfo)
564 batadv_orig_ifinfo_put(orig_ifinfo);
565 if (router)
566 batadv_neigh_node_put(router);
567 if (orig_neigh_router)
568 batadv_neigh_node_put(orig_neigh_router);
569 if (orig_neigh_node)
570 batadv_orig_node_put(orig_neigh_node);
571 if (neigh_ifinfo)
572 batadv_neigh_ifinfo_put(neigh_ifinfo);
573}
574
575/**
576 * batadv_v_ogm_process_per_outif - process a batman v OGM for an outgoing if
577 * @bat_priv: the bat priv with all the soft interface information
578 * @ethhdr: the Ethernet header of the OGM2
579 * @ogm2: OGM2 structure
580 * @orig_node: Originator structure for which the OGM has been received
581 * @neigh_node: the neigh_node through with the OGM has been received
582 * @if_incoming: the interface where this packet was received
583 * @if_outgoing: the interface for which the packet should be considered
584 */
585static void
586batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv,
587 const struct ethhdr *ethhdr,
588 const struct batadv_ogm2_packet *ogm2,
589 struct batadv_orig_node *orig_node,
590 struct batadv_neigh_node *neigh_node,
591 struct batadv_hard_iface *if_incoming,
592 struct batadv_hard_iface *if_outgoing)
593{
594 int seqno_age;
595
596 /* first, update the metric with according sanity checks */
597 seqno_age = batadv_v_ogm_metric_update(bat_priv, ogm2, orig_node,
598 neigh_node, if_incoming,
599 if_outgoing);
600
601 /* outdated sequence numbers are to be discarded */
602 if (seqno_age < 0)
603 return;
604
605 /* only unknown & newer OGMs contain TVLVs we are interested in */
606 if ((seqno_age > 0) && (if_outgoing == BATADV_IF_DEFAULT))
607 batadv_tvlv_containers_process(bat_priv, true, orig_node,
608 NULL, NULL,
609 (unsigned char *)(ogm2 + 1),
610 ntohs(ogm2->tvlv_len));
611
612 /* if the metric update went through, update routes if needed */
613 batadv_v_ogm_route_update(bat_priv, ethhdr, ogm2, orig_node,
614 neigh_node, if_incoming, if_outgoing);
615}
616
617/**
618 * batadv_v_ogm_aggr_packet - checks if there is another OGM aggregated
619 * @buff_pos: current position in the skb
620 * @packet_len: total length of the skb
621 * @tvlv_len: tvlv length of the previously considered OGM
622 *
623 * Return: true if there is enough space for another OGM, false otherwise.
624 */
625static bool batadv_v_ogm_aggr_packet(int buff_pos, int packet_len,
626 __be16 tvlv_len)
627{
628 int next_buff_pos = 0;
629
630 next_buff_pos += buff_pos + BATADV_OGM2_HLEN;
631 next_buff_pos += ntohs(tvlv_len);
632
633 return (next_buff_pos <= packet_len) &&
634 (next_buff_pos <= BATADV_MAX_AGGREGATION_BYTES);
635}
636
637/**
638 * batadv_v_ogm_process - process an incoming batman v OGM
639 * @skb: the skb containing the OGM
640 * @ogm_offset: offset to the OGM which should be processed (for aggregates)
641 * @if_incoming: the interface where this packet was receved
642 */
643static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
644 struct batadv_hard_iface *if_incoming)
645{
646 struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
647 struct ethhdr *ethhdr;
648 struct batadv_orig_node *orig_node = NULL;
649 struct batadv_hardif_neigh_node *hardif_neigh = NULL;
650 struct batadv_neigh_node *neigh_node = NULL;
651 struct batadv_hard_iface *hard_iface;
652 struct batadv_ogm2_packet *ogm_packet;
653 u32 ogm_throughput, link_throughput, path_throughput;
654
655 ethhdr = eth_hdr(skb);
656 ogm_packet = (struct batadv_ogm2_packet *)(skb->data + ogm_offset);
657
658 ogm_throughput = ntohl(ogm_packet->throughput);
659
660 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
661 "Received OGM2 packet via NB: %pM, IF: %s [%pM] (from OG: %pM, seqno %u, troughput %u, TTL %u, V %u, tvlv_len %u)\n",
662 ethhdr->h_source, if_incoming->net_dev->name,
663 if_incoming->net_dev->dev_addr, ogm_packet->orig,
664 ntohl(ogm_packet->seqno), ogm_throughput, ogm_packet->ttl,
665 ogm_packet->version, ntohs(ogm_packet->tvlv_len));
666
667 /* If the troughput metric is 0, immediately drop the packet. No need to
668 * create orig_node / neigh_node for an unusable route.
669 */
670 if (ogm_throughput == 0) {
671 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
672 "Drop packet: originator packet with troughput metric of 0\n");
673 return;
674 }
675
676 /* require ELP packets be to received from this neighbor first */
677 hardif_neigh = batadv_hardif_neigh_get(if_incoming, ethhdr->h_source);
678 if (!hardif_neigh) {
679 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
680 "Drop packet: OGM via unknown neighbor!\n");
681 goto out;
682 }
683
684 orig_node = batadv_v_ogm_orig_get(bat_priv, ogm_packet->orig);
685 if (!orig_node)
686 return;
687
688 neigh_node = batadv_neigh_node_new(orig_node, if_incoming,
689 ethhdr->h_source);
690 if (!neigh_node)
691 goto out;
692
693 /* Update the received throughput metric to match the link
694 * characteristic:
695 * - If this OGM traveled one hop so far (emitted by single hop
696 * neighbor) the path throughput metric equals the link throughput.
697 * - For OGMs traversing more than hop the path throughput metric is
698 * the smaller of the path throughput and the link throughput.
699 */
700 link_throughput = ewma_throughput_read(&hardif_neigh->bat_v.throughput);
701 path_throughput = min_t(u32, link_throughput, ogm_throughput);
702 ogm_packet->throughput = htonl(path_throughput);
703
704 batadv_v_ogm_process_per_outif(bat_priv, ethhdr, ogm_packet, orig_node,
705 neigh_node, if_incoming,
706 BATADV_IF_DEFAULT);
707
708 rcu_read_lock();
709 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
710 if (hard_iface->if_status != BATADV_IF_ACTIVE)
711 continue;
712
713 if (hard_iface->soft_iface != bat_priv->soft_iface)
714 continue;
715
716 batadv_v_ogm_process_per_outif(bat_priv, ethhdr, ogm_packet,
717 orig_node, neigh_node,
718 if_incoming, hard_iface);
719 }
720 rcu_read_unlock();
721out:
722 if (orig_node)
723 batadv_orig_node_put(orig_node);
724 if (neigh_node)
725 batadv_neigh_node_put(neigh_node);
726 if (hardif_neigh)
727 batadv_hardif_neigh_put(hardif_neigh);
728}
729
730/**
731 * batadv_v_ogm_packet_recv - OGM2 receiving handler
732 * @skb: the received OGM
733 * @if_incoming: the interface where this OGM has been received
734 *
735 * Return: NET_RX_SUCCESS and consume the skb on success or returns NET_RX_DROP
736 * (without freeing the skb) on failure
737 */
738int batadv_v_ogm_packet_recv(struct sk_buff *skb,
739 struct batadv_hard_iface *if_incoming)
740{
741 struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
742 struct batadv_ogm2_packet *ogm_packet;
743 struct ethhdr *ethhdr = eth_hdr(skb);
744 int ogm_offset;
745 u8 *packet_pos;
746 int ret = NET_RX_DROP;
747
748 /* did we receive a OGM2 packet on an interface that does not have
749 * B.A.T.M.A.N. V enabled ?
750 */
751 if (strcmp(bat_priv->bat_algo_ops->name, "BATMAN_V") != 0)
752 return NET_RX_DROP;
753
754 if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN))
755 return NET_RX_DROP;
756
757 if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
758 return NET_RX_DROP;
759
760 ogm_packet = (struct batadv_ogm2_packet *)skb->data;
761
762 if (batadv_is_my_mac(bat_priv, ogm_packet->orig))
763 return NET_RX_DROP;
764
765 batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX);
766 batadv_add_counter(bat_priv, BATADV_CNT_MGMT_RX_BYTES,
767 skb->len + ETH_HLEN);
768
769 ogm_offset = 0;
770 ogm_packet = (struct batadv_ogm2_packet *)skb->data;
771
772 while (batadv_v_ogm_aggr_packet(ogm_offset, skb_headlen(skb),
773 ogm_packet->tvlv_len)) {
774 batadv_v_ogm_process(skb, ogm_offset, if_incoming);
775
776 ogm_offset += BATADV_OGM2_HLEN;
777 ogm_offset += ntohs(ogm_packet->tvlv_len);
778
779 packet_pos = skb->data + ogm_offset;
780 ogm_packet = (struct batadv_ogm2_packet *)packet_pos;
781 }
782
783 ret = NET_RX_SUCCESS;
784 consume_skb(skb);
785
786 return ret;
787}
788
789/**
790 * batadv_v_ogm_init - initialise the OGM2 engine
791 * @bat_priv: the bat priv with all the soft interface information
792 *
793 * Return: 0 on success or a negative error code in case of failure
794 */
795int batadv_v_ogm_init(struct batadv_priv *bat_priv)
796{
797 struct batadv_ogm2_packet *ogm_packet;
798 unsigned char *ogm_buff;
799 u32 random_seqno;
800
801 bat_priv->bat_v.ogm_buff_len = BATADV_OGM2_HLEN;
802 ogm_buff = kzalloc(bat_priv->bat_v.ogm_buff_len, GFP_ATOMIC);
803 if (!ogm_buff)
804 return -ENOMEM;
805
806 bat_priv->bat_v.ogm_buff = ogm_buff;
807 ogm_packet = (struct batadv_ogm2_packet *)ogm_buff;
808 ogm_packet->packet_type = BATADV_OGM2;
809 ogm_packet->version = BATADV_COMPAT_VERSION;
810 ogm_packet->ttl = BATADV_TTL;
811 ogm_packet->flags = BATADV_NO_FLAGS;
812 ogm_packet->throughput = htonl(BATADV_THROUGHPUT_MAX_VALUE);
813
814 /* randomize initial seqno to avoid collision */
815 get_random_bytes(&random_seqno, sizeof(random_seqno));
816 atomic_set(&bat_priv->bat_v.ogm_seqno, random_seqno);
817 INIT_DELAYED_WORK(&bat_priv->bat_v.ogm_wq, batadv_v_ogm_send);
818
819 return 0;
820}
821
822/**
823 * batadv_v_ogm_free - free OGM private resources
824 * @bat_priv: the bat priv with all the soft interface information
825 */
826void batadv_v_ogm_free(struct batadv_priv *bat_priv)
827{
828 cancel_delayed_work_sync(&bat_priv->bat_v.ogm_wq);
829
830 kfree(bat_priv->bat_v.ogm_buff);
831 bat_priv->bat_v.ogm_buff = NULL;
832 bat_priv->bat_v.ogm_buff_len = 0;
833}
diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
new file mode 100644
index 000000000000..d849c75ada0e
--- /dev/null
+++ b/net/batman-adv/bat_v_ogm.h
@@ -0,0 +1,36 @@
1/* Copyright (C) 2013-2016 B.A.T.M.A.N. contributors:
2 *
3 * Antonio Quartulli
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef _BATMAN_ADV_BATADV_V_OGM_H_
19#define _BATMAN_ADV_BATADV_V_OGM_H_
20
21#include <linux/types.h>
22
23struct batadv_hard_iface;
24struct batadv_priv;
25struct sk_buff;
26
27int batadv_v_ogm_init(struct batadv_priv *bat_priv);
28void batadv_v_ogm_free(struct batadv_priv *bat_priv);
29int batadv_v_ogm_iface_enable(struct batadv_hard_iface *hard_iface);
30struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv,
31 const u8 *addr);
32void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface);
33int batadv_v_ogm_packet_recv(struct sk_buff *skb,
34 struct batadv_hard_iface *if_incoming);
35
36#endif /* _BATMAN_ADV_BATADV_V_OGM_H_ */
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index 25cbc36e997a..b56bb000a0ab 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2006-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Simon Wunderlich, Marek Lindner 3 * Simon Wunderlich, Marek Lindner
4 * 4 *
@@ -29,10 +29,16 @@ static void batadv_bitmap_shift_left(unsigned long *seq_bits, s32 n)
29 bitmap_shift_left(seq_bits, seq_bits, n, BATADV_TQ_LOCAL_WINDOW_SIZE); 29 bitmap_shift_left(seq_bits, seq_bits, n, BATADV_TQ_LOCAL_WINDOW_SIZE);
30} 30}
31 31
32/* receive and process one packet within the sequence number window. 32/**
33 * batadv_bit_get_packet - receive and process one packet within the sequence
34 * number window
35 * @priv: the bat priv with all the soft interface information
36 * @seq_bits: pointer to the sequence number receive packet
37 * @seq_num_diff: difference between the current/received sequence number and
38 * the last sequence number
39 * @set_mark: whether this packet should be marked in seq_bits
33 * 40 *
34 * returns: 41 * Return: 1 if the window was moved (either new or very old),
35 * 1 if the window was moved (either new or very old)
36 * 0 if the window was not moved/shifted. 42 * 0 if the window was not moved/shifted.
37 */ 43 */
38int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff, 44int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff,
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index 0226b220fe5b..3e41bb80eb81 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2006-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Simon Wunderlich, Marek Lindner 3 * Simon Wunderlich, Marek Lindner
4 * 4 *
@@ -24,7 +24,14 @@
24#include <linux/compiler.h> 24#include <linux/compiler.h>
25#include <linux/types.h> 25#include <linux/types.h>
26 26
27/* Returns 1 if the corresponding bit in the given seq_bits indicates true 27/**
28 * batadv_test_bit - check if bit is set in the current window
29 *
30 * @seq_bits: pointer to the sequence number receive packet
31 * @last_seqno: latest sequence number in seq_bits
32 * @curr_seqno: sequence number to test for
33 *
34 * Return: 1 if the corresponding bit in the given seq_bits indicates true
28 * and curr_seqno is within range of last_seqno. Otherwise returns 0. 35 * and curr_seqno is within range of last_seqno. Otherwise returns 0.
29 */ 36 */
30static inline int batadv_test_bit(const unsigned long *seq_bits, 37static inline int batadv_test_bit(const unsigned long *seq_bits,
@@ -48,9 +55,6 @@ static inline void batadv_set_bit(unsigned long *seq_bits, s32 n)
48 set_bit(n, seq_bits); /* turn the position on */ 55 set_bit(n, seq_bits); /* turn the position on */
49} 56}
50 57
51/* receive and process one packet, returns 1 if received seq_num is considered
52 * new, 0 if old
53 */
54int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff, 58int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff,
55 int set_mark); 59 int set_mark);
56 60
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index c24c481b666f..0a6c8b824a00 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Simon Wunderlich 3 * Simon Wunderlich
4 * 4 *
@@ -31,6 +31,7 @@
31#include <linux/jhash.h> 31#include <linux/jhash.h>
32#include <linux/jiffies.h> 32#include <linux/jiffies.h>
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/kref.h>
34#include <linux/list.h> 35#include <linux/list.h>
35#include <linux/lockdep.h> 36#include <linux/lockdep.h>
36#include <linux/netdevice.h> 37#include <linux/netdevice.h>
@@ -58,7 +59,13 @@ static void
58batadv_bla_send_announce(struct batadv_priv *bat_priv, 59batadv_bla_send_announce(struct batadv_priv *bat_priv,
59 struct batadv_bla_backbone_gw *backbone_gw); 60 struct batadv_bla_backbone_gw *backbone_gw);
60 61
61/* return the index of the claim */ 62/**
63 * batadv_choose_claim - choose the right bucket for a claim.
64 * @data: data to hash
65 * @size: size of the hash table
66 *
67 * Return: the hash index of the claim
68 */
62static inline u32 batadv_choose_claim(const void *data, u32 size) 69static inline u32 batadv_choose_claim(const void *data, u32 size)
63{ 70{
64 struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data; 71 struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
@@ -70,7 +77,13 @@ static inline u32 batadv_choose_claim(const void *data, u32 size)
70 return hash % size; 77 return hash % size;
71} 78}
72 79
73/* return the index of the backbone gateway */ 80/**
81 * batadv_choose_backbone_gw - choose the right bucket for a backbone gateway.
82 * @data: data to hash
83 * @size: size of the hash table
84 *
85 * Return: the hash index of the backbone gateway
86 */
74static inline u32 batadv_choose_backbone_gw(const void *data, u32 size) 87static inline u32 batadv_choose_backbone_gw(const void *data, u32 size)
75{ 88{
76 const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data; 89 const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
@@ -82,7 +95,13 @@ static inline u32 batadv_choose_backbone_gw(const void *data, u32 size)
82 return hash % size; 95 return hash % size;
83} 96}
84 97
85/* compares address and vid of two backbone gws */ 98/**
99 * batadv_compare_backbone_gw - compare address and vid of two backbone gws
100 * @node: list node of the first entry to compare
101 * @data2: pointer to the second backbone gateway
102 *
103 * Return: 1 if the backbones have the same data, 0 otherwise
104 */
86static int batadv_compare_backbone_gw(const struct hlist_node *node, 105static int batadv_compare_backbone_gw(const struct hlist_node *node,
87 const void *data2) 106 const void *data2)
88{ 107{
@@ -100,7 +119,13 @@ static int batadv_compare_backbone_gw(const struct hlist_node *node,
100 return 1; 119 return 1;
101} 120}
102 121
103/* compares address and vid of two claims */ 122/**
123 * batadv_compare_backbone_gw - compare address and vid of two claims
124 * @node: list node of the first entry to compare
125 * @data2: pointer to the second claims
126 *
127 * Return: 1 if the claim have the same data, 0 otherwise
128 */
104static int batadv_compare_claim(const struct hlist_node *node, 129static int batadv_compare_claim(const struct hlist_node *node,
105 const void *data2) 130 const void *data2)
106{ 131{
@@ -118,35 +143,62 @@ static int batadv_compare_claim(const struct hlist_node *node,
118 return 1; 143 return 1;
119} 144}
120 145
121/* free a backbone gw */ 146/**
122static void 147 * batadv_backbone_gw_release - release backbone gw from lists and queue for
123batadv_backbone_gw_free_ref(struct batadv_bla_backbone_gw *backbone_gw) 148 * free after rcu grace period
149 * @ref: kref pointer of the backbone gw
150 */
151static void batadv_backbone_gw_release(struct kref *ref)
124{ 152{
125 if (atomic_dec_and_test(&backbone_gw->refcount)) 153 struct batadv_bla_backbone_gw *backbone_gw;
126 kfree_rcu(backbone_gw, rcu); 154
155 backbone_gw = container_of(ref, struct batadv_bla_backbone_gw,
156 refcount);
157
158 kfree_rcu(backbone_gw, rcu);
127} 159}
128 160
129/* finally deinitialize the claim */ 161/**
130static void batadv_claim_release(struct batadv_bla_claim *claim) 162 * batadv_backbone_gw_put - decrement the backbone gw refcounter and possibly
163 * release it
164 * @backbone_gw: backbone gateway to be free'd
165 */
166static void batadv_backbone_gw_put(struct batadv_bla_backbone_gw *backbone_gw)
131{ 167{
132 batadv_backbone_gw_free_ref(claim->backbone_gw); 168 kref_put(&backbone_gw->refcount, batadv_backbone_gw_release);
169}
170
171/**
172 * batadv_claim_release - release claim from lists and queue for free after rcu
173 * grace period
174 * @ref: kref pointer of the claim
175 */
176static void batadv_claim_release(struct kref *ref)
177{
178 struct batadv_bla_claim *claim;
179
180 claim = container_of(ref, struct batadv_bla_claim, refcount);
181
182 batadv_backbone_gw_put(claim->backbone_gw);
133 kfree_rcu(claim, rcu); 183 kfree_rcu(claim, rcu);
134} 184}
135 185
136/* free a claim, call claim_free_rcu if its the last reference */ 186/**
137static void batadv_claim_free_ref(struct batadv_bla_claim *claim) 187 * batadv_claim_put - decrement the claim refcounter and possibly
188 * release it
189 * @claim: claim to be free'd
190 */
191static void batadv_claim_put(struct batadv_bla_claim *claim)
138{ 192{
139 if (atomic_dec_and_test(&claim->refcount)) 193 kref_put(&claim->refcount, batadv_claim_release);
140 batadv_claim_release(claim);
141} 194}
142 195
143/** 196/**
144 * batadv_claim_hash_find 197 * batadv_claim_hash_find - looks for a claim in the claim hash
145 * @bat_priv: the bat priv with all the soft interface information 198 * @bat_priv: the bat priv with all the soft interface information
146 * @data: search data (may be local/static data) 199 * @data: search data (may be local/static data)
147 * 200 *
148 * looks for a claim in the hash, and returns it if found 201 * Return: claim if found or NULL otherwise.
149 * or NULL otherwise.
150 */ 202 */
151static struct batadv_bla_claim 203static struct batadv_bla_claim
152*batadv_claim_hash_find(struct batadv_priv *bat_priv, 204*batadv_claim_hash_find(struct batadv_priv *bat_priv,
@@ -169,7 +221,7 @@ static struct batadv_bla_claim
169 if (!batadv_compare_claim(&claim->hash_entry, data)) 221 if (!batadv_compare_claim(&claim->hash_entry, data))
170 continue; 222 continue;
171 223
172 if (!atomic_inc_not_zero(&claim->refcount)) 224 if (!kref_get_unless_zero(&claim->refcount))
173 continue; 225 continue;
174 226
175 claim_tmp = claim; 227 claim_tmp = claim;
@@ -181,12 +233,12 @@ static struct batadv_bla_claim
181} 233}
182 234
183/** 235/**
184 * batadv_backbone_hash_find - looks for a claim in the hash 236 * batadv_backbone_hash_find - looks for a backbone gateway in the hash
185 * @bat_priv: the bat priv with all the soft interface information 237 * @bat_priv: the bat priv with all the soft interface information
186 * @addr: the address of the originator 238 * @addr: the address of the originator
187 * @vid: the VLAN ID 239 * @vid: the VLAN ID
188 * 240 *
189 * Returns claim if found or NULL otherwise. 241 * Return: backbone gateway if found or NULL otherwise
190 */ 242 */
191static struct batadv_bla_backbone_gw * 243static struct batadv_bla_backbone_gw *
192batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr, 244batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr,
@@ -213,7 +265,7 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr,
213 &search_entry)) 265 &search_entry))
214 continue; 266 continue;
215 267
216 if (!atomic_inc_not_zero(&backbone_gw->refcount)) 268 if (!kref_get_unless_zero(&backbone_gw->refcount))
217 continue; 269 continue;
218 270
219 backbone_gw_tmp = backbone_gw; 271 backbone_gw_tmp = backbone_gw;
@@ -224,7 +276,10 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr,
224 return backbone_gw_tmp; 276 return backbone_gw_tmp;
225} 277}
226 278
227/* delete all claims for a backbone */ 279/**
280 * batadv_bla_del_backbone_claims - delete all claims for a backbone
281 * @backbone_gw: backbone gateway where the claims should be removed
282 */
228static void 283static void
229batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) 284batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
230{ 285{
@@ -249,7 +304,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
249 if (claim->backbone_gw != backbone_gw) 304 if (claim->backbone_gw != backbone_gw)
250 continue; 305 continue;
251 306
252 batadv_claim_free_ref(claim); 307 batadv_claim_put(claim);
253 hlist_del_rcu(&claim->hash_entry); 308 hlist_del_rcu(&claim->hash_entry);
254 } 309 }
255 spin_unlock_bh(list_lock); 310 spin_unlock_bh(list_lock);
@@ -368,18 +423,17 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
368 netif_rx(skb); 423 netif_rx(skb);
369out: 424out:
370 if (primary_if) 425 if (primary_if)
371 batadv_hardif_free_ref(primary_if); 426 batadv_hardif_put(primary_if);
372} 427}
373 428
374/** 429/**
375 * batadv_bla_get_backbone_gw 430 * batadv_bla_get_backbone_gw - finds or creates a backbone gateway
376 * @bat_priv: the bat priv with all the soft interface information 431 * @bat_priv: the bat priv with all the soft interface information
377 * @orig: the mac address of the originator 432 * @orig: the mac address of the originator
378 * @vid: the VLAN ID 433 * @vid: the VLAN ID
379 * @own_backbone: set if the requested backbone is local 434 * @own_backbone: set if the requested backbone is local
380 * 435 *
381 * searches for the backbone gw or creates a new one if it could not 436 * Return: the (possibly created) backbone gateway or NULL on error
382 * be found.
383 */ 437 */
384static struct batadv_bla_backbone_gw * 438static struct batadv_bla_backbone_gw *
385batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig, 439batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
@@ -412,7 +466,8 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
412 ether_addr_copy(entry->orig, orig); 466 ether_addr_copy(entry->orig, orig);
413 467
414 /* one for the hash, one for returning */ 468 /* one for the hash, one for returning */
415 atomic_set(&entry->refcount, 2); 469 kref_init(&entry->refcount);
470 kref_get(&entry->refcount);
416 471
417 hash_added = batadv_hash_add(bat_priv->bla.backbone_hash, 472 hash_added = batadv_hash_add(bat_priv->bla.backbone_hash,
418 batadv_compare_backbone_gw, 473 batadv_compare_backbone_gw,
@@ -430,7 +485,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
430 if (orig_node) { 485 if (orig_node) {
431 batadv_tt_global_del_orig(bat_priv, orig_node, vid, 486 batadv_tt_global_del_orig(bat_priv, orig_node, vid,
432 "became a backbone gateway"); 487 "became a backbone gateway");
433 batadv_orig_node_free_ref(orig_node); 488 batadv_orig_node_put(orig_node);
434 } 489 }
435 490
436 if (own_backbone) { 491 if (own_backbone) {
@@ -445,7 +500,13 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
445 return entry; 500 return entry;
446} 501}
447 502
448/* update or add the own backbone gw to make sure we announce 503/**
504 * batadv_bla_update_own_backbone_gw - updates the own backbone gw for a VLAN
505 * @bat_priv: the bat priv with all the soft interface information
506 * @primary_if: the selected primary interface
507 * @vid: VLAN identifier
508 *
509 * update or add the own backbone gw to make sure we announce
449 * where we receive other backbone gws 510 * where we receive other backbone gws
450 */ 511 */
451static void 512static void
@@ -462,7 +523,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv,
462 return; 523 return;
463 524
464 backbone_gw->lasttime = jiffies; 525 backbone_gw->lasttime = jiffies;
465 batadv_backbone_gw_free_ref(backbone_gw); 526 batadv_backbone_gw_put(backbone_gw);
466} 527}
467 528
468/** 529/**
@@ -511,7 +572,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv,
511 572
512 /* finally, send an announcement frame */ 573 /* finally, send an announcement frame */
513 batadv_bla_send_announce(bat_priv, backbone_gw); 574 batadv_bla_send_announce(bat_priv, backbone_gw);
514 batadv_backbone_gw_free_ref(backbone_gw); 575 batadv_backbone_gw_put(backbone_gw);
515} 576}
516 577
517/** 578/**
@@ -542,12 +603,9 @@ static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw)
542} 603}
543 604
544/** 605/**
545 * batadv_bla_send_announce 606 * batadv_bla_send_announce - Send an announcement frame
546 * @bat_priv: the bat priv with all the soft interface information 607 * @bat_priv: the bat priv with all the soft interface information
547 * @backbone_gw: our backbone gateway which should be announced 608 * @backbone_gw: our backbone gateway which should be announced
548 *
549 * This function sends an announcement. It is called from multiple
550 * places.
551 */ 609 */
552static void batadv_bla_send_announce(struct batadv_priv *bat_priv, 610static void batadv_bla_send_announce(struct batadv_priv *bat_priv,
553 struct batadv_bla_backbone_gw *backbone_gw) 611 struct batadv_bla_backbone_gw *backbone_gw)
@@ -595,7 +653,8 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
595 claim->lasttime = jiffies; 653 claim->lasttime = jiffies;
596 claim->backbone_gw = backbone_gw; 654 claim->backbone_gw = backbone_gw;
597 655
598 atomic_set(&claim->refcount, 2); 656 kref_init(&claim->refcount);
657 kref_get(&claim->refcount);
599 batadv_dbg(BATADV_DBG_BLA, bat_priv, 658 batadv_dbg(BATADV_DBG_BLA, bat_priv,
600 "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n", 659 "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n",
601 mac, BATADV_PRINT_VID(vid)); 660 mac, BATADV_PRINT_VID(vid));
@@ -622,10 +681,10 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
622 spin_lock_bh(&claim->backbone_gw->crc_lock); 681 spin_lock_bh(&claim->backbone_gw->crc_lock);
623 claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); 682 claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
624 spin_unlock_bh(&claim->backbone_gw->crc_lock); 683 spin_unlock_bh(&claim->backbone_gw->crc_lock);
625 batadv_backbone_gw_free_ref(claim->backbone_gw); 684 batadv_backbone_gw_put(claim->backbone_gw);
626 } 685 }
627 /* set (new) backbone gw */ 686 /* set (new) backbone gw */
628 atomic_inc(&backbone_gw->refcount); 687 kref_get(&backbone_gw->refcount);
629 claim->backbone_gw = backbone_gw; 688 claim->backbone_gw = backbone_gw;
630 689
631 spin_lock_bh(&backbone_gw->crc_lock); 690 spin_lock_bh(&backbone_gw->crc_lock);
@@ -634,11 +693,14 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
634 backbone_gw->lasttime = jiffies; 693 backbone_gw->lasttime = jiffies;
635 694
636claim_free_ref: 695claim_free_ref:
637 batadv_claim_free_ref(claim); 696 batadv_claim_put(claim);
638} 697}
639 698
640/* Delete a claim from the claim hash which has the 699/**
641 * given mac address and vid. 700 * batadv_bla_del_claim - delete a claim from the claim hash
701 * @bat_priv: the bat priv with all the soft interface information
702 * @mac: mac address of the claim to be removed
703 * @vid: VLAN id for the claim to be removed
642 */ 704 */
643static void batadv_bla_del_claim(struct batadv_priv *bat_priv, 705static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
644 const u8 *mac, const unsigned short vid) 706 const u8 *mac, const unsigned short vid)
@@ -656,17 +718,25 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
656 718
657 batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, 719 batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim,
658 batadv_choose_claim, claim); 720 batadv_choose_claim, claim);
659 batadv_claim_free_ref(claim); /* reference from the hash is gone */ 721 batadv_claim_put(claim); /* reference from the hash is gone */
660 722
661 spin_lock_bh(&claim->backbone_gw->crc_lock); 723 spin_lock_bh(&claim->backbone_gw->crc_lock);
662 claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); 724 claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
663 spin_unlock_bh(&claim->backbone_gw->crc_lock); 725 spin_unlock_bh(&claim->backbone_gw->crc_lock);
664 726
665 /* don't need the reference from hash_find() anymore */ 727 /* don't need the reference from hash_find() anymore */
666 batadv_claim_free_ref(claim); 728 batadv_claim_put(claim);
667} 729}
668 730
669/* check for ANNOUNCE frame, return 1 if handled */ 731/**
732 * batadv_handle_announce - check for ANNOUNCE frame
733 * @bat_priv: the bat priv with all the soft interface information
734 * @an_addr: announcement mac address (ARP Sender HW address)
735 * @backbone_addr: originator address of the sender (Ethernet source MAC)
736 * @vid: the VLAN ID of the frame
737 *
738 * Return: 1 if handled
739 */
670static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, 740static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
671 u8 *backbone_addr, unsigned short vid) 741 u8 *backbone_addr, unsigned short vid)
672{ 742{
@@ -712,11 +782,20 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
712 } 782 }
713 } 783 }
714 784
715 batadv_backbone_gw_free_ref(backbone_gw); 785 batadv_backbone_gw_put(backbone_gw);
716 return 1; 786 return 1;
717} 787}
718 788
719/* check for REQUEST frame, return 1 if handled */ 789/**
790 * batadv_handle_request - check for REQUEST frame
791 * @bat_priv: the bat priv with all the soft interface information
792 * @primary_if: the primary hard interface of this batman soft interface
793 * @backbone_addr: backbone address to be requested (ARP sender HW MAC)
794 * @ethhdr: ethernet header of a packet
795 * @vid: the VLAN ID of the frame
796 *
797 * Return: 1 if handled
798 */
720static int batadv_handle_request(struct batadv_priv *bat_priv, 799static int batadv_handle_request(struct batadv_priv *bat_priv,
721 struct batadv_hard_iface *primary_if, 800 struct batadv_hard_iface *primary_if,
722 u8 *backbone_addr, struct ethhdr *ethhdr, 801 u8 *backbone_addr, struct ethhdr *ethhdr,
@@ -740,7 +819,16 @@ static int batadv_handle_request(struct batadv_priv *bat_priv,
740 return 1; 819 return 1;
741} 820}
742 821
743/* check for UNCLAIM frame, return 1 if handled */ 822/**
823 * batadv_handle_unclaim - check for UNCLAIM frame
824 * @bat_priv: the bat priv with all the soft interface information
825 * @primary_if: the primary hard interface of this batman soft interface
826 * @backbone_addr: originator address of the backbone (Ethernet source)
827 * @claim_addr: Client to be unclaimed (ARP sender HW MAC)
828 * @vid: the VLAN ID of the frame
829 *
830 * Return: 1 if handled
831 */
744static int batadv_handle_unclaim(struct batadv_priv *bat_priv, 832static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
745 struct batadv_hard_iface *primary_if, 833 struct batadv_hard_iface *primary_if,
746 u8 *backbone_addr, u8 *claim_addr, 834 u8 *backbone_addr, u8 *claim_addr,
@@ -765,11 +853,20 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
765 claim_addr, BATADV_PRINT_VID(vid), backbone_gw->orig); 853 claim_addr, BATADV_PRINT_VID(vid), backbone_gw->orig);
766 854
767 batadv_bla_del_claim(bat_priv, claim_addr, vid); 855 batadv_bla_del_claim(bat_priv, claim_addr, vid);
768 batadv_backbone_gw_free_ref(backbone_gw); 856 batadv_backbone_gw_put(backbone_gw);
769 return 1; 857 return 1;
770} 858}
771 859
772/* check for CLAIM frame, return 1 if handled */ 860/**
861 * batadv_handle_claim - check for CLAIM frame
862 * @bat_priv: the bat priv with all the soft interface information
863 * @primary_if: the primary hard interface of this batman soft interface
864 * @backbone_addr: originator address of the backbone (Ethernet Source)
865 * @claim_addr: client mac address to be claimed (ARP sender HW MAC)
866 * @vid: the VLAN ID of the frame
867 *
868 * Return: 1 if handled
869 */
773static int batadv_handle_claim(struct batadv_priv *bat_priv, 870static int batadv_handle_claim(struct batadv_priv *bat_priv,
774 struct batadv_hard_iface *primary_if, 871 struct batadv_hard_iface *primary_if,
775 u8 *backbone_addr, u8 *claim_addr, 872 u8 *backbone_addr, u8 *claim_addr,
@@ -793,12 +890,12 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv,
793 890
794 /* TODO: we could call something like tt_local_del() here. */ 891 /* TODO: we could call something like tt_local_del() here. */
795 892
796 batadv_backbone_gw_free_ref(backbone_gw); 893 batadv_backbone_gw_put(backbone_gw);
797 return 1; 894 return 1;
798} 895}
799 896
800/** 897/**
801 * batadv_check_claim_group 898 * batadv_check_claim_group - check for claim group membership
802 * @bat_priv: the bat priv with all the soft interface information 899 * @bat_priv: the bat priv with all the soft interface information
803 * @primary_if: the primary interface of this batman interface 900 * @primary_if: the primary interface of this batman interface
804 * @hw_src: the Hardware source in the ARP Header 901 * @hw_src: the Hardware source in the ARP Header
@@ -809,7 +906,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv,
809 * This function also applies the group ID of the sender 906 * This function also applies the group ID of the sender
810 * if it is in the same mesh. 907 * if it is in the same mesh.
811 * 908 *
812 * returns: 909 * Return:
813 * 2 - if it is a claim packet and on the same group 910 * 2 - if it is a claim packet and on the same group
814 * 1 - if is a claim packet from another group 911 * 1 - if is a claim packet from another group
815 * 0 - if it is not a claim packet 912 * 0 - if it is not a claim packet
@@ -867,20 +964,18 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv,
867 bla_dst_own->group = bla_dst->group; 964 bla_dst_own->group = bla_dst->group;
868 } 965 }
869 966
870 batadv_orig_node_free_ref(orig_node); 967 batadv_orig_node_put(orig_node);
871 968
872 return 2; 969 return 2;
873} 970}
874 971
875/** 972/**
876 * batadv_bla_process_claim 973 * batadv_bla_process_claim - Check if this is a claim frame, and process it
877 * @bat_priv: the bat priv with all the soft interface information 974 * @bat_priv: the bat priv with all the soft interface information
878 * @primary_if: the primary hard interface of this batman soft interface 975 * @primary_if: the primary hard interface of this batman soft interface
879 * @skb: the frame to be checked 976 * @skb: the frame to be checked
880 * 977 *
881 * Check if this is a claim frame, and process it accordingly. 978 * Return: 1 if it was a claim frame, otherwise return 0 to
882 *
883 * returns 1 if it was a claim frame, otherwise return 0 to
884 * tell the callee that it can use the frame on its own. 979 * tell the callee that it can use the frame on its own.
885 */ 980 */
886static int batadv_bla_process_claim(struct batadv_priv *bat_priv, 981static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
@@ -1011,7 +1106,13 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
1011 return 1; 1106 return 1;
1012} 1107}
1013 1108
1014/* Check when we last heard from other nodes, and remove them in case of 1109/**
1110 * batadv_bla_purge_backbone_gw - Remove backbone gateways after a timeout or
1111 * immediately
1112 * @bat_priv: the bat priv with all the soft interface information
1113 * @now: whether the whole hash shall be wiped now
1114 *
1115 * Check when we last heard from other nodes, and remove them in case of
1015 * a time out, or clean all backbone gws if now is set. 1116 * a time out, or clean all backbone gws if now is set.
1016 */ 1117 */
1017static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) 1118static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now)
@@ -1052,14 +1153,14 @@ purge_now:
1052 batadv_bla_del_backbone_claims(backbone_gw); 1153 batadv_bla_del_backbone_claims(backbone_gw);
1053 1154
1054 hlist_del_rcu(&backbone_gw->hash_entry); 1155 hlist_del_rcu(&backbone_gw->hash_entry);
1055 batadv_backbone_gw_free_ref(backbone_gw); 1156 batadv_backbone_gw_put(backbone_gw);
1056 } 1157 }
1057 spin_unlock_bh(list_lock); 1158 spin_unlock_bh(list_lock);
1058 } 1159 }
1059} 1160}
1060 1161
1061/** 1162/**
1062 * batadv_bla_purge_claims 1163 * batadv_bla_purge_claims - Remove claims after a timeout or immediately
1063 * @bat_priv: the bat priv with all the soft interface information 1164 * @bat_priv: the bat priv with all the soft interface information
1064 * @primary_if: the selected primary interface, may be NULL if now is set 1165 * @primary_if: the selected primary interface, may be NULL if now is set
1065 * @now: whether the whole hash shall be wiped now 1166 * @now: whether the whole hash shall be wiped now
@@ -1108,12 +1209,11 @@ purge_now:
1108} 1209}
1109 1210
1110/** 1211/**
1111 * batadv_bla_update_orig_address 1212 * batadv_bla_update_orig_address - Update the backbone gateways when the own
1213 * originator address changes
1112 * @bat_priv: the bat priv with all the soft interface information 1214 * @bat_priv: the bat priv with all the soft interface information
1113 * @primary_if: the new selected primary_if 1215 * @primary_if: the new selected primary_if
1114 * @oldif: the old primary interface, may be NULL 1216 * @oldif: the old primary interface, may be NULL
1115 *
1116 * Update the backbone gateways when the own orig address changes.
1117 */ 1217 */
1118void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, 1218void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
1119 struct batadv_hard_iface *primary_if, 1219 struct batadv_hard_iface *primary_if,
@@ -1181,10 +1281,14 @@ void batadv_bla_status_update(struct net_device *net_dev)
1181 * so just call that one. 1281 * so just call that one.
1182 */ 1282 */
1183 batadv_bla_update_orig_address(bat_priv, primary_if, primary_if); 1283 batadv_bla_update_orig_address(bat_priv, primary_if, primary_if);
1184 batadv_hardif_free_ref(primary_if); 1284 batadv_hardif_put(primary_if);
1185} 1285}
1186 1286
1187/* periodic work to do: 1287/**
1288 * batadv_bla_periodic_work - performs periodic bla work
1289 * @work: kernel work struct
1290 *
1291 * periodic work to do:
1188 * * purge structures when they are too old 1292 * * purge structures when they are too old
1189 * * send announcements 1293 * * send announcements
1190 */ 1294 */
@@ -1251,7 +1355,7 @@ static void batadv_bla_periodic_work(struct work_struct *work)
1251 } 1355 }
1252out: 1356out:
1253 if (primary_if) 1357 if (primary_if)
1254 batadv_hardif_free_ref(primary_if); 1358 batadv_hardif_put(primary_if);
1255 1359
1256 queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work, 1360 queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work,
1257 msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH)); 1361 msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH));
@@ -1265,7 +1369,12 @@ out:
1265static struct lock_class_key batadv_claim_hash_lock_class_key; 1369static struct lock_class_key batadv_claim_hash_lock_class_key;
1266static struct lock_class_key batadv_backbone_hash_lock_class_key; 1370static struct lock_class_key batadv_backbone_hash_lock_class_key;
1267 1371
1268/* initialize all bla structures */ 1372/**
1373 * batadv_bla_init - initialize all bla structures
1374 * @bat_priv: the bat priv with all the soft interface information
1375 *
1376 * Return: 0 on success, < 0 on error.
1377 */
1269int batadv_bla_init(struct batadv_priv *bat_priv) 1378int batadv_bla_init(struct batadv_priv *bat_priv)
1270{ 1379{
1271 int i; 1380 int i;
@@ -1285,7 +1394,7 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
1285 if (primary_if) { 1394 if (primary_if) {
1286 crc = crc16(0, primary_if->net_dev->dev_addr, ETH_ALEN); 1395 crc = crc16(0, primary_if->net_dev->dev_addr, ETH_ALEN);
1287 bat_priv->bla.claim_dest.group = htons(crc); 1396 bat_priv->bla.claim_dest.group = htons(crc);
1288 batadv_hardif_free_ref(primary_if); 1397 batadv_hardif_put(primary_if);
1289 } else { 1398 } else {
1290 bat_priv->bla.claim_dest.group = 0; /* will be set later */ 1399 bat_priv->bla.claim_dest.group = 0; /* will be set later */
1291 } 1400 }
@@ -1320,7 +1429,7 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
1320} 1429}
1321 1430
1322/** 1431/**
1323 * batadv_bla_check_bcast_duplist 1432 * batadv_bla_check_bcast_duplist - Check if a frame is in the broadcast dup.
1324 * @bat_priv: the bat priv with all the soft interface information 1433 * @bat_priv: the bat priv with all the soft interface information
1325 * @skb: contains the bcast_packet to be checked 1434 * @skb: contains the bcast_packet to be checked
1326 * 1435 *
@@ -1332,6 +1441,8 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
1332 * with a good chance that it is the same packet. If it is furthermore 1441 * with a good chance that it is the same packet. If it is furthermore
1333 * sent by another host, drop it. We allow equal packets from 1442 * sent by another host, drop it. We allow equal packets from
1334 * the same host however as this might be intended. 1443 * the same host however as this might be intended.
1444 *
1445 * Return: 1 if a packet is in the duplicate list, 0 otherwise.
1335 */ 1446 */
1336int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, 1447int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
1337 struct sk_buff *skb) 1448 struct sk_buff *skb)
@@ -1390,14 +1501,13 @@ out:
1390} 1501}
1391 1502
1392/** 1503/**
1393 * batadv_bla_is_backbone_gw_orig 1504 * batadv_bla_is_backbone_gw_orig - Check if the originator is a gateway for
1505 * the VLAN identified by vid.
1394 * @bat_priv: the bat priv with all the soft interface information 1506 * @bat_priv: the bat priv with all the soft interface information
1395 * @orig: originator mac address 1507 * @orig: originator mac address
1396 * @vid: VLAN identifier 1508 * @vid: VLAN identifier
1397 * 1509 *
1398 * Check if the originator is a gateway for the VLAN identified by vid. 1510 * Return: true if orig is a backbone for this vid, false otherwise.
1399 *
1400 * Returns true if orig is a backbone for this vid, false otherwise.
1401 */ 1511 */
1402bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig, 1512bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
1403 unsigned short vid) 1513 unsigned short vid)
@@ -1431,14 +1541,13 @@ bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
1431} 1541}
1432 1542
1433/** 1543/**
1434 * batadv_bla_is_backbone_gw 1544 * batadv_bla_is_backbone_gw - check if originator is a backbone gw for a VLAN.
1435 * @skb: the frame to be checked 1545 * @skb: the frame to be checked
1436 * @orig_node: the orig_node of the frame 1546 * @orig_node: the orig_node of the frame
1437 * @hdr_size: maximum length of the frame 1547 * @hdr_size: maximum length of the frame
1438 * 1548 *
1439 * bla_is_backbone_gw inspects the skb for the VLAN ID and returns 1 1549 * Return: 1 if the orig_node is also a gateway on the soft interface, otherwise
1440 * if the orig_node is also a gateway on the soft interface, otherwise it 1550 * it returns 0.
1441 * returns 0.
1442 */ 1551 */
1443int batadv_bla_is_backbone_gw(struct sk_buff *skb, 1552int batadv_bla_is_backbone_gw(struct sk_buff *skb,
1444 struct batadv_orig_node *orig_node, int hdr_size) 1553 struct batadv_orig_node *orig_node, int hdr_size)
@@ -1461,11 +1570,16 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb,
1461 if (!backbone_gw) 1570 if (!backbone_gw)
1462 return 0; 1571 return 0;
1463 1572
1464 batadv_backbone_gw_free_ref(backbone_gw); 1573 batadv_backbone_gw_put(backbone_gw);
1465 return 1; 1574 return 1;
1466} 1575}
1467 1576
1468/* free all bla structures (for softinterface free or module unload) */ 1577/**
1578 * batadv_bla_init - free all bla structures
1579 * @bat_priv: the bat priv with all the soft interface information
1580 *
1581 * for softinterface free or module unload
1582 */
1469void batadv_bla_free(struct batadv_priv *bat_priv) 1583void batadv_bla_free(struct batadv_priv *bat_priv)
1470{ 1584{
1471 struct batadv_hard_iface *primary_if; 1585 struct batadv_hard_iface *primary_if;
@@ -1484,22 +1598,23 @@ void batadv_bla_free(struct batadv_priv *bat_priv)
1484 bat_priv->bla.backbone_hash = NULL; 1598 bat_priv->bla.backbone_hash = NULL;
1485 } 1599 }
1486 if (primary_if) 1600 if (primary_if)
1487 batadv_hardif_free_ref(primary_if); 1601 batadv_hardif_put(primary_if);
1488} 1602}
1489 1603
1490/** 1604/**
1491 * batadv_bla_rx 1605 * batadv_bla_rx - check packets coming from the mesh.
1492 * @bat_priv: the bat priv with all the soft interface information 1606 * @bat_priv: the bat priv with all the soft interface information
1493 * @skb: the frame to be checked 1607 * @skb: the frame to be checked
1494 * @vid: the VLAN ID of the frame 1608 * @vid: the VLAN ID of the frame
1495 * @is_bcast: the packet came in a broadcast packet type. 1609 * @is_bcast: the packet came in a broadcast packet type.
1496 * 1610 *
1497 * bla_rx avoidance checks if: 1611 * batadv_bla_rx avoidance checks if:
1498 * * we have to race for a claim 1612 * * we have to race for a claim
1499 * * if the frame is allowed on the LAN 1613 * * if the frame is allowed on the LAN
1500 * 1614 *
1501 * in these cases, the skb is further handled by this function and 1615 * in these cases, the skb is further handled by this function
1502 * returns 1, otherwise it returns 0 and the caller shall further 1616 *
1617 * Return: 1 if handled, otherwise it returns 0 and the caller shall further
1503 * process the skb. 1618 * process the skb.
1504 */ 1619 */
1505int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, 1620int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
@@ -1576,27 +1691,28 @@ handled:
1576 1691
1577out: 1692out:
1578 if (primary_if) 1693 if (primary_if)
1579 batadv_hardif_free_ref(primary_if); 1694 batadv_hardif_put(primary_if);
1580 if (claim) 1695 if (claim)
1581 batadv_claim_free_ref(claim); 1696 batadv_claim_put(claim);
1582 return ret; 1697 return ret;
1583} 1698}
1584 1699
1585/** 1700/**
1586 * batadv_bla_tx 1701 * batadv_bla_tx - check packets going into the mesh
1587 * @bat_priv: the bat priv with all the soft interface information 1702 * @bat_priv: the bat priv with all the soft interface information
1588 * @skb: the frame to be checked 1703 * @skb: the frame to be checked
1589 * @vid: the VLAN ID of the frame 1704 * @vid: the VLAN ID of the frame
1590 * 1705 *
1591 * bla_tx checks if: 1706 * batadv_bla_tx checks if:
1592 * * a claim was received which has to be processed 1707 * * a claim was received which has to be processed
1593 * * the frame is allowed on the mesh 1708 * * the frame is allowed on the mesh
1594 * 1709 *
1595 * in these cases, the skb is further handled by this function and 1710 * in these cases, the skb is further handled by this function.
1596 * returns 1, otherwise it returns 0 and the caller shall further
1597 * process the skb.
1598 * 1711 *
1599 * This call might reallocate skb data. 1712 * This call might reallocate skb data.
1713 *
1714 * Return: 1 if handled, otherwise it returns 0 and the caller shall further
1715 * process the skb.
1600 */ 1716 */
1601int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, 1717int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
1602 unsigned short vid) 1718 unsigned short vid)
@@ -1664,12 +1780,19 @@ handled:
1664 ret = 1; 1780 ret = 1;
1665out: 1781out:
1666 if (primary_if) 1782 if (primary_if)
1667 batadv_hardif_free_ref(primary_if); 1783 batadv_hardif_put(primary_if);
1668 if (claim) 1784 if (claim)
1669 batadv_claim_free_ref(claim); 1785 batadv_claim_put(claim);
1670 return ret; 1786 return ret;
1671} 1787}
1672 1788
1789/**
1790 * batadv_bla_claim_table_seq_print_text - print the claim table in a seq file
1791 * @seq: seq file to print on
1792 * @offset: not used
1793 *
1794 * Return: always 0
1795 */
1673int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) 1796int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
1674{ 1797{
1675 struct net_device *net_dev = (struct net_device *)seq->private; 1798 struct net_device *net_dev = (struct net_device *)seq->private;
@@ -1715,10 +1838,18 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
1715 } 1838 }
1716out: 1839out:
1717 if (primary_if) 1840 if (primary_if)
1718 batadv_hardif_free_ref(primary_if); 1841 batadv_hardif_put(primary_if);
1719 return 0; 1842 return 0;
1720} 1843}
1721 1844
1845/**
1846 * batadv_bla_backbone_table_seq_print_text - print the backbone table in a seq
1847 * file
1848 * @seq: seq file to print on
1849 * @offset: not used
1850 *
1851 * Return: always 0
1852 */
1722int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) 1853int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
1723{ 1854{
1724 struct net_device *net_dev = (struct net_device *)seq->private; 1855 struct net_device *net_dev = (struct net_device *)seq->private;
@@ -1772,6 +1903,6 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
1772 } 1903 }
1773out: 1904out:
1774 if (primary_if) 1905 if (primary_if)
1775 batadv_hardif_free_ref(primary_if); 1906 batadv_hardif_put(primary_if);
1776 return 0; 1907 return 0;
1777} 1908}
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 7ea199b8b5ab..579f0fa6fe6a 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Simon Wunderlich 3 * Simon Wunderlich
4 * 4 *
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 037ad0a5f485..48253cf8341b 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2010-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -281,6 +281,8 @@ static int batadv_originators_open(struct inode *inode, struct file *file)
281 * originator table of an hard interface 281 * originator table of an hard interface
282 * @inode: inode pointer to debugfs file 282 * @inode: inode pointer to debugfs file
283 * @file: pointer to the seq_file 283 * @file: pointer to the seq_file
284 *
285 * Return: 0 on success or negative error number in case of failure
284 */ 286 */
285static int batadv_originators_hardif_open(struct inode *inode, 287static int batadv_originators_hardif_open(struct inode *inode,
286 struct file *file) 288 struct file *file)
@@ -329,6 +331,8 @@ static int batadv_bla_backbone_table_open(struct inode *inode,
329 * batadv_dat_cache_open - Prepare file handler for reads from dat_chache 331 * batadv_dat_cache_open - Prepare file handler for reads from dat_chache
330 * @inode: inode which was opened 332 * @inode: inode which was opened
331 * @file: file handle to be initialized 333 * @file: file handle to be initialized
334 *
335 * Return: 0 on success or negative error number in case of failure
332 */ 336 */
333static int batadv_dat_cache_open(struct inode *inode, struct file *file) 337static int batadv_dat_cache_open(struct inode *inode, struct file *file)
334{ 338{
@@ -483,6 +487,8 @@ void batadv_debugfs_destroy(void)
483 * batadv_debugfs_add_hardif - creates the base directory for a hard interface 487 * batadv_debugfs_add_hardif - creates the base directory for a hard interface
484 * in debugfs. 488 * in debugfs.
485 * @hard_iface: hard interface which should be added. 489 * @hard_iface: hard interface which should be added.
490 *
491 * Return: 0 on success or negative error number in case of failure
486 */ 492 */
487int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface) 493int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
488{ 494{
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 80ab8d6f0ab3..1ab4e2e63afc 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2010-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index a49c705fb86b..3e6b2624f980 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Antonio Quartulli 3 * Antonio Quartulli
4 * 4 *
@@ -30,6 +30,7 @@
30#include <linux/in.h> 30#include <linux/in.h>
31#include <linux/jiffies.h> 31#include <linux/jiffies.h>
32#include <linux/kernel.h> 32#include <linux/kernel.h>
33#include <linux/kref.h>
33#include <linux/list.h> 34#include <linux/list.h>
34#include <linux/rculist.h> 35#include <linux/rculist.h>
35#include <linux/rcupdate.h> 36#include <linux/rcupdate.h>
@@ -62,21 +63,34 @@ static void batadv_dat_start_timer(struct batadv_priv *bat_priv)
62} 63}
63 64
64/** 65/**
65 * batadv_dat_entry_free_ref - decrement the dat_entry refcounter and possibly 66 * batadv_dat_entry_release - release dat_entry from lists and queue for free
66 * free it 67 * after rcu grace period
67 * @dat_entry: the entry to free 68 * @ref: kref pointer of the dat_entry
68 */ 69 */
69static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry) 70static void batadv_dat_entry_release(struct kref *ref)
70{ 71{
71 if (atomic_dec_and_test(&dat_entry->refcount)) 72 struct batadv_dat_entry *dat_entry;
72 kfree_rcu(dat_entry, rcu); 73
74 dat_entry = container_of(ref, struct batadv_dat_entry, refcount);
75
76 kfree_rcu(dat_entry, rcu);
77}
78
79/**
80 * batadv_dat_entry_put - decrement the dat_entry refcounter and possibly
81 * release it
82 * @dat_entry: dat_entry to be free'd
83 */
84static void batadv_dat_entry_put(struct batadv_dat_entry *dat_entry)
85{
86 kref_put(&dat_entry->refcount, batadv_dat_entry_release);
73} 87}
74 88
75/** 89/**
76 * batadv_dat_to_purge - check whether a dat_entry has to be purged or not 90 * batadv_dat_to_purge - check whether a dat_entry has to be purged or not
77 * @dat_entry: the entry to check 91 * @dat_entry: the entry to check
78 * 92 *
79 * Returns true if the entry has to be purged now, false otherwise. 93 * Return: true if the entry has to be purged now, false otherwise.
80 */ 94 */
81static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry) 95static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry)
82{ 96{
@@ -121,7 +135,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv,
121 continue; 135 continue;
122 136
123 hlist_del_rcu(&dat_entry->hash_entry); 137 hlist_del_rcu(&dat_entry->hash_entry);
124 batadv_dat_entry_free_ref(dat_entry); 138 batadv_dat_entry_put(dat_entry);
125 } 139 }
126 spin_unlock_bh(list_lock); 140 spin_unlock_bh(list_lock);
127 } 141 }
@@ -151,7 +165,7 @@ static void batadv_dat_purge(struct work_struct *work)
151 * @node: node in the local table 165 * @node: node in the local table
152 * @data2: second object to compare the node to 166 * @data2: second object to compare the node to
153 * 167 *
154 * Returns 1 if the two entries are the same, 0 otherwise. 168 * Return: 1 if the two entries are the same, 0 otherwise.
155 */ 169 */
156static int batadv_compare_dat(const struct hlist_node *node, const void *data2) 170static int batadv_compare_dat(const struct hlist_node *node, const void *data2)
157{ 171{
@@ -166,7 +180,7 @@ static int batadv_compare_dat(const struct hlist_node *node, const void *data2)
166 * @skb: ARP packet 180 * @skb: ARP packet
167 * @hdr_size: size of the possible header before the ARP packet 181 * @hdr_size: size of the possible header before the ARP packet
168 * 182 *
169 * Returns the value of the hw_src field in the ARP packet. 183 * Return: the value of the hw_src field in the ARP packet.
170 */ 184 */
171static u8 *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size) 185static u8 *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size)
172{ 186{
@@ -183,7 +197,7 @@ static u8 *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size)
183 * @skb: ARP packet 197 * @skb: ARP packet
184 * @hdr_size: size of the possible header before the ARP packet 198 * @hdr_size: size of the possible header before the ARP packet
185 * 199 *
186 * Returns the value of the ip_src field in the ARP packet. 200 * Return: the value of the ip_src field in the ARP packet.
187 */ 201 */
188static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size) 202static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size)
189{ 203{
@@ -195,7 +209,7 @@ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size)
195 * @skb: ARP packet 209 * @skb: ARP packet
196 * @hdr_size: size of the possible header before the ARP packet 210 * @hdr_size: size of the possible header before the ARP packet
197 * 211 *
198 * Returns the value of the hw_dst field in the ARP packet. 212 * Return: the value of the hw_dst field in the ARP packet.
199 */ 213 */
200static u8 *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size) 214static u8 *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size)
201{ 215{
@@ -207,7 +221,7 @@ static u8 *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size)
207 * @skb: ARP packet 221 * @skb: ARP packet
208 * @hdr_size: size of the possible header before the ARP packet 222 * @hdr_size: size of the possible header before the ARP packet
209 * 223 *
210 * Returns the value of the ip_dst field in the ARP packet. 224 * Return: the value of the ip_dst field in the ARP packet.
211 */ 225 */
212static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size) 226static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size)
213{ 227{
@@ -219,7 +233,7 @@ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size)
219 * @data: data to hash 233 * @data: data to hash
220 * @size: size of the hash table 234 * @size: size of the hash table
221 * 235 *
222 * Returns the selected index in the hash table for the given data. 236 * Return: the selected index in the hash table for the given data.
223 */ 237 */
224static u32 batadv_hash_dat(const void *data, u32 size) 238static u32 batadv_hash_dat(const void *data, u32 size)
225{ 239{
@@ -256,7 +270,7 @@ static u32 batadv_hash_dat(const void *data, u32 size)
256 * @ip: search key 270 * @ip: search key
257 * @vid: VLAN identifier 271 * @vid: VLAN identifier
258 * 272 *
259 * Returns the dat_entry if found, NULL otherwise. 273 * Return: the dat_entry if found, NULL otherwise.
260 */ 274 */
261static struct batadv_dat_entry * 275static struct batadv_dat_entry *
262batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip, 276batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip,
@@ -281,7 +295,7 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip,
281 if (dat_entry->ip != ip) 295 if (dat_entry->ip != ip)
282 continue; 296 continue;
283 297
284 if (!atomic_inc_not_zero(&dat_entry->refcount)) 298 if (!kref_get_unless_zero(&dat_entry->refcount))
285 continue; 299 continue;
286 300
287 dat_entry_tmp = dat_entry; 301 dat_entry_tmp = dat_entry;
@@ -326,7 +340,8 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
326 dat_entry->vid = vid; 340 dat_entry->vid = vid;
327 ether_addr_copy(dat_entry->mac_addr, mac_addr); 341 ether_addr_copy(dat_entry->mac_addr, mac_addr);
328 dat_entry->last_update = jiffies; 342 dat_entry->last_update = jiffies;
329 atomic_set(&dat_entry->refcount, 2); 343 kref_init(&dat_entry->refcount);
344 kref_get(&dat_entry->refcount);
330 345
331 hash_added = batadv_hash_add(bat_priv->dat.hash, batadv_compare_dat, 346 hash_added = batadv_hash_add(bat_priv->dat.hash, batadv_compare_dat,
332 batadv_hash_dat, dat_entry, 347 batadv_hash_dat, dat_entry,
@@ -334,7 +349,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
334 349
335 if (unlikely(hash_added != 0)) { 350 if (unlikely(hash_added != 0)) {
336 /* remove the reference for the hash */ 351 /* remove the reference for the hash */
337 batadv_dat_entry_free_ref(dat_entry); 352 batadv_dat_entry_put(dat_entry);
338 goto out; 353 goto out;
339 } 354 }
340 355
@@ -343,7 +358,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
343 358
344out: 359out:
345 if (dat_entry) 360 if (dat_entry)
346 batadv_dat_entry_free_ref(dat_entry); 361 batadv_dat_entry_put(dat_entry);
347} 362}
348 363
349#ifdef CONFIG_BATMAN_ADV_DEBUG 364#ifdef CONFIG_BATMAN_ADV_DEBUG
@@ -440,7 +455,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
440 * @candidate: orig_node under evaluation 455 * @candidate: orig_node under evaluation
441 * @max_orig_node: last selected candidate 456 * @max_orig_node: last selected candidate
442 * 457 *
443 * Returns true if the node has been elected as next candidate or false 458 * Return: true if the node has been elected as next candidate or false
444 * otherwise. 459 * otherwise.
445 */ 460 */
446static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res, 461static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
@@ -527,12 +542,12 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
527 max_orig_node)) 542 max_orig_node))
528 continue; 543 continue;
529 544
530 if (!atomic_inc_not_zero(&orig_node->refcount)) 545 if (!kref_get_unless_zero(&orig_node->refcount))
531 continue; 546 continue;
532 547
533 max = tmp_max; 548 max = tmp_max;
534 if (max_orig_node) 549 if (max_orig_node)
535 batadv_orig_node_free_ref(max_orig_node); 550 batadv_orig_node_put(max_orig_node);
536 max_orig_node = orig_node; 551 max_orig_node = orig_node;
537 } 552 }
538 rcu_read_unlock(); 553 rcu_read_unlock();
@@ -553,15 +568,17 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
553 * be sent to 568 * be sent to
554 * @bat_priv: the bat priv with all the soft interface information 569 * @bat_priv: the bat priv with all the soft interface information
555 * @ip_dst: ipv4 to look up in the DHT 570 * @ip_dst: ipv4 to look up in the DHT
571 * @vid: VLAN identifier
556 * 572 *
557 * An originator O is selected if and only if its DHT_ID value is one of three 573 * An originator O is selected if and only if its DHT_ID value is one of three
558 * closest values (from the LEFT, with wrap around if needed) then the hash 574 * closest values (from the LEFT, with wrap around if needed) then the hash
559 * value of the key. ip_dst is the key. 575 * value of the key. ip_dst is the key.
560 * 576 *
561 * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM. 577 * Return: the candidate array of size BATADV_DAT_CANDIDATE_NUM.
562 */ 578 */
563static struct batadv_dat_candidate * 579static struct batadv_dat_candidate *
564batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) 580batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst,
581 unsigned short vid)
565{ 582{
566 int select; 583 int select;
567 batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key; 584 batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key;
@@ -577,7 +594,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
577 return NULL; 594 return NULL;
578 595
579 dat.ip = ip_dst; 596 dat.ip = ip_dst;
580 dat.vid = 0; 597 dat.vid = vid;
581 ip_key = (batadv_dat_addr_t)batadv_hash_dat(&dat, 598 ip_key = (batadv_dat_addr_t)batadv_hash_dat(&dat,
582 BATADV_DAT_ADDR_MAX); 599 BATADV_DAT_ADDR_MAX);
583 600
@@ -597,17 +614,18 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
597 * @bat_priv: the bat priv with all the soft interface information 614 * @bat_priv: the bat priv with all the soft interface information
598 * @skb: payload to send 615 * @skb: payload to send
599 * @ip: the DHT key 616 * @ip: the DHT key
617 * @vid: VLAN identifier
600 * @packet_subtype: unicast4addr packet subtype to use 618 * @packet_subtype: unicast4addr packet subtype to use
601 * 619 *
602 * This function copies the skb with pskb_copy() and is sent as unicast packet 620 * This function copies the skb with pskb_copy() and is sent as unicast packet
603 * to each of the selected candidates. 621 * to each of the selected candidates.
604 * 622 *
605 * Returns true if the packet is sent to at least one candidate, false 623 * Return: true if the packet is sent to at least one candidate, false
606 * otherwise. 624 * otherwise.
607 */ 625 */
608static bool batadv_dat_send_data(struct batadv_priv *bat_priv, 626static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
609 struct sk_buff *skb, __be32 ip, 627 struct sk_buff *skb, __be32 ip,
610 int packet_subtype) 628 unsigned short vid, int packet_subtype)
611{ 629{
612 int i; 630 int i;
613 bool ret = false; 631 bool ret = false;
@@ -616,7 +634,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
616 struct sk_buff *tmp_skb; 634 struct sk_buff *tmp_skb;
617 struct batadv_dat_candidate *cand; 635 struct batadv_dat_candidate *cand;
618 636
619 cand = batadv_dat_select_candidates(bat_priv, ip); 637 cand = batadv_dat_select_candidates(bat_priv, ip, vid);
620 if (!cand) 638 if (!cand)
621 goto out; 639 goto out;
622 640
@@ -639,9 +657,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
639 goto free_neigh; 657 goto free_neigh;
640 } 658 }
641 659
642 send_status = batadv_send_skb_packet(tmp_skb, 660 send_status = batadv_send_unicast_skb(tmp_skb, neigh_node);
643 neigh_node->if_incoming,
644 neigh_node->addr);
645 if (send_status == NET_XMIT_SUCCESS) { 661 if (send_status == NET_XMIT_SUCCESS) {
646 /* count the sent packet */ 662 /* count the sent packet */
647 switch (packet_subtype) { 663 switch (packet_subtype) {
@@ -659,9 +675,9 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
659 ret = true; 675 ret = true;
660 } 676 }
661free_neigh: 677free_neigh:
662 batadv_neigh_node_free_ref(neigh_node); 678 batadv_neigh_node_put(neigh_node);
663free_orig: 679free_orig:
664 batadv_orig_node_free_ref(cand[i].orig_node); 680 batadv_orig_node_put(cand[i].orig_node);
665 } 681 }
666 682
667out: 683out:
@@ -741,6 +757,8 @@ static void batadv_dat_hash_free(struct batadv_priv *bat_priv)
741/** 757/**
742 * batadv_dat_init - initialise the DAT internals 758 * batadv_dat_init - initialise the DAT internals
743 * @bat_priv: the bat priv with all the soft interface information 759 * @bat_priv: the bat priv with all the soft interface information
760 *
761 * Return: 0 in case of success, a negative error code otherwise
744 */ 762 */
745int batadv_dat_init(struct batadv_priv *bat_priv) 763int batadv_dat_init(struct batadv_priv *bat_priv)
746{ 764{
@@ -779,6 +797,8 @@ void batadv_dat_free(struct batadv_priv *bat_priv)
779 * batadv_dat_cache_seq_print_text - print the local DAT hash table 797 * batadv_dat_cache_seq_print_text - print the local DAT hash table
780 * @seq: seq file to print on 798 * @seq: seq file to print on
781 * @offset: not used 799 * @offset: not used
800 *
801 * Return: always 0
782 */ 802 */
783int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) 803int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
784{ 804{
@@ -821,7 +841,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
821 841
822out: 842out:
823 if (primary_if) 843 if (primary_if)
824 batadv_hardif_free_ref(primary_if); 844 batadv_hardif_put(primary_if);
825 return 0; 845 return 0;
826} 846}
827 847
@@ -831,7 +851,7 @@ out:
831 * @skb: packet to analyse 851 * @skb: packet to analyse
832 * @hdr_size: size of the possible header before the ARP packet in the skb 852 * @hdr_size: size of the possible header before the ARP packet in the skb
833 * 853 *
834 * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise. 854 * Return: the ARP type if the skb contains a valid ARP packet, 0 otherwise.
835 */ 855 */
836static u16 batadv_arp_get_type(struct batadv_priv *bat_priv, 856static u16 batadv_arp_get_type(struct batadv_priv *bat_priv,
837 struct sk_buff *skb, int hdr_size) 857 struct sk_buff *skb, int hdr_size)
@@ -904,8 +924,9 @@ out:
904 * @skb: the buffer containing the packet to extract the VID from 924 * @skb: the buffer containing the packet to extract the VID from
905 * @hdr_size: the size of the batman-adv header encapsulating the packet 925 * @hdr_size: the size of the batman-adv header encapsulating the packet
906 * 926 *
907 * If the packet embedded in the skb is vlan tagged this function returns the 927 * Return: If the packet embedded in the skb is vlan tagged this function
908 * VID with the BATADV_VLAN_HAS_TAG flag. Otherwise BATADV_NO_FLAGS is returned. 928 * returns the VID with the BATADV_VLAN_HAS_TAG flag. Otherwise BATADV_NO_FLAGS
929 * is returned.
909 */ 930 */
910static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size) 931static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size)
911{ 932{
@@ -930,7 +951,7 @@ static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size)
930 * @bat_priv: the bat priv with all the soft interface information 951 * @bat_priv: the bat priv with all the soft interface information
931 * @skb: packet to check 952 * @skb: packet to check
932 * 953 *
933 * Returns true if the message has been sent to the dht candidates, false 954 * Return: true if the message has been sent to the dht candidates, false
934 * otherwise. In case of a positive return value the message has to be enqueued 955 * otherwise. In case of a positive return value the message has to be enqueued
935 * to permit the fallback. 956 * to permit the fallback.
936 */ 957 */
@@ -1004,12 +1025,12 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
1004 ret = true; 1025 ret = true;
1005 } else { 1026 } else {
1006 /* Send the request to the DHT */ 1027 /* Send the request to the DHT */
1007 ret = batadv_dat_send_data(bat_priv, skb, ip_dst, 1028 ret = batadv_dat_send_data(bat_priv, skb, ip_dst, vid,
1008 BATADV_P_DAT_DHT_GET); 1029 BATADV_P_DAT_DHT_GET);
1009 } 1030 }
1010out: 1031out:
1011 if (dat_entry) 1032 if (dat_entry)
1012 batadv_dat_entry_free_ref(dat_entry); 1033 batadv_dat_entry_put(dat_entry);
1013 return ret; 1034 return ret;
1014} 1035}
1015 1036
@@ -1020,7 +1041,7 @@ out:
1020 * @skb: packet to check 1041 * @skb: packet to check
1021 * @hdr_size: size of the encapsulation header 1042 * @hdr_size: size of the encapsulation header
1022 * 1043 *
1023 * Returns true if the request has been answered, false otherwise. 1044 * Return: true if the request has been answered, false otherwise.
1024 */ 1045 */
1025bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, 1046bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
1026 struct sk_buff *skb, int hdr_size) 1047 struct sk_buff *skb, int hdr_size)
@@ -1089,7 +1110,7 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
1089 } 1110 }
1090out: 1111out:
1091 if (dat_entry) 1112 if (dat_entry)
1092 batadv_dat_entry_free_ref(dat_entry); 1113 batadv_dat_entry_put(dat_entry);
1093 if (ret) 1114 if (ret)
1094 kfree_skb(skb); 1115 kfree_skb(skb);
1095 return ret; 1116 return ret;
@@ -1132,8 +1153,8 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
1132 /* Send the ARP reply to the candidates for both the IP addresses that 1153 /* Send the ARP reply to the candidates for both the IP addresses that
1133 * the node obtained from the ARP reply 1154 * the node obtained from the ARP reply
1134 */ 1155 */
1135 batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT); 1156 batadv_dat_send_data(bat_priv, skb, ip_src, vid, BATADV_P_DAT_DHT_PUT);
1136 batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT); 1157 batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT);
1137} 1158}
1138 1159
1139/** 1160/**
@@ -1143,7 +1164,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
1143 * @skb: packet to check 1164 * @skb: packet to check
1144 * @hdr_size: size of the encapsulation header 1165 * @hdr_size: size of the encapsulation header
1145 * 1166 *
1146 * Returns true if the packet was snooped and consumed by DAT. False if the 1167 * Return: true if the packet was snooped and consumed by DAT. False if the
1147 * packet has to be delivered to the interface 1168 * packet has to be delivered to the interface
1148 */ 1169 */
1149bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, 1170bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
@@ -1200,7 +1221,7 @@ out:
1200 * @bat_priv: the bat priv with all the soft interface information 1221 * @bat_priv: the bat priv with all the soft interface information
1201 * @forw_packet: the broadcast packet 1222 * @forw_packet: the broadcast packet
1202 * 1223 *
1203 * Returns true if the node can drop the packet, false otherwise. 1224 * Return: true if the node can drop the packet, false otherwise.
1204 */ 1225 */
1205bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, 1226bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
1206 struct batadv_forw_packet *forw_packet) 1227 struct batadv_forw_packet *forw_packet)
@@ -1242,6 +1263,6 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
1242 1263
1243out: 1264out:
1244 if (dat_entry) 1265 if (dat_entry)
1245 batadv_dat_entry_free_ref(dat_entry); 1266 batadv_dat_entry_put(dat_entry);
1246 return ret; 1267 return ret;
1247} 1268}
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index 26d4a525a798..813ecea96cf9 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Antonio Quartulli 3 * Antonio Quartulli
4 * 4 *
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 20d9282f895b..e6956d0746a2 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2013-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2013-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Martin Hundebøll <martin@hundeboll.net> 3 * Martin Hundebøll <martin@hundeboll.net>
4 * 4 *
@@ -85,7 +85,7 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node,
85/** 85/**
86 * batadv_frag_size_limit - maximum possible size of packet to be fragmented 86 * batadv_frag_size_limit - maximum possible size of packet to be fragmented
87 * 87 *
88 * Returns the maximum size of payload that can be fragmented. 88 * Return: the maximum size of payload that can be fragmented.
89 */ 89 */
90static int batadv_frag_size_limit(void) 90static int batadv_frag_size_limit(void)
91{ 91{
@@ -107,7 +107,7 @@ static int batadv_frag_size_limit(void)
107 * 107 *
108 * Caller must hold chain->lock. 108 * Caller must hold chain->lock.
109 * 109 *
110 * Returns true if chain is empty and caller can just insert the new fragment 110 * Return: true if chain is empty and caller can just insert the new fragment
111 * without searching for the right position. 111 * without searching for the right position.
112 */ 112 */
113static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain, 113static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain,
@@ -136,7 +136,7 @@ static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain,
136 * Insert a new fragment into the reverse ordered chain in the right table 136 * Insert a new fragment into the reverse ordered chain in the right table
137 * entry. The hash table entry is cleared if "old" fragments exist in it. 137 * entry. The hash table entry is cleared if "old" fragments exist in it.
138 * 138 *
139 * Returns true if skb is buffered, false on error. If the chain has all the 139 * Return: true if skb is buffered, false on error. If the chain has all the
140 * fragments needed to merge the packet, the chain is moved to the passed head 140 * fragments needed to merge the packet, the chain is moved to the passed head
141 * to avoid locking the chain in the table. 141 * to avoid locking the chain in the table.
142 */ 142 */
@@ -242,12 +242,11 @@ err:
242/** 242/**
243 * batadv_frag_merge_packets - merge a chain of fragments 243 * batadv_frag_merge_packets - merge a chain of fragments
244 * @chain: head of chain with fragments 244 * @chain: head of chain with fragments
245 * @skb: packet with total size of skb after merging
246 * 245 *
247 * Expand the first skb in the chain and copy the content of the remaining 246 * Expand the first skb in the chain and copy the content of the remaining
248 * skb's into the expanded one. After doing so, clear the chain. 247 * skb's into the expanded one. After doing so, clear the chain.
249 * 248 *
250 * Returns the merged skb or NULL on error. 249 * Return: the merged skb or NULL on error.
251 */ 250 */
252static struct sk_buff * 251static struct sk_buff *
253batadv_frag_merge_packets(struct hlist_head *chain) 252batadv_frag_merge_packets(struct hlist_head *chain)
@@ -307,6 +306,9 @@ free:
307 * There are three possible outcomes: 1) Packet is merged: Return true and 306 * There are three possible outcomes: 1) Packet is merged: Return true and
308 * set *skb to merged packet; 2) Packet is buffered: Return true and set *skb 307 * set *skb to merged packet; 2) Packet is buffered: Return true and set *skb
309 * to NULL; 3) Error: Return false and leave skb as is. 308 * to NULL; 3) Error: Return false and leave skb as is.
309 *
310 * Return: true when packet is merged or buffered, false when skb is not not
311 * used.
310 */ 312 */
311bool batadv_frag_skb_buffer(struct sk_buff **skb, 313bool batadv_frag_skb_buffer(struct sk_buff **skb,
312 struct batadv_orig_node *orig_node_src) 314 struct batadv_orig_node *orig_node_src)
@@ -344,7 +346,7 @@ out_err:
344 * will exceed the MTU towards the next-hop. If so, the fragment is forwarded 346 * will exceed the MTU towards the next-hop. If so, the fragment is forwarded
345 * without merging it. 347 * without merging it.
346 * 348 *
347 * Returns true if the fragment is consumed/forwarded, false otherwise. 349 * Return: true if the fragment is consumed/forwarded, false otherwise.
348 */ 350 */
349bool batadv_frag_skb_fwd(struct sk_buff *skb, 351bool batadv_frag_skb_fwd(struct sk_buff *skb,
350 struct batadv_hard_iface *recv_if, 352 struct batadv_hard_iface *recv_if,
@@ -376,16 +378,15 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
376 skb->len + ETH_HLEN); 378 skb->len + ETH_HLEN);
377 379
378 packet->ttl--; 380 packet->ttl--;
379 batadv_send_skb_packet(skb, neigh_node->if_incoming, 381 batadv_send_unicast_skb(skb, neigh_node);
380 neigh_node->addr);
381 ret = true; 382 ret = true;
382 } 383 }
383 384
384out: 385out:
385 if (orig_node_dst) 386 if (orig_node_dst)
386 batadv_orig_node_free_ref(orig_node_dst); 387 batadv_orig_node_put(orig_node_dst);
387 if (neigh_node) 388 if (neigh_node)
388 batadv_neigh_node_free_ref(neigh_node); 389 batadv_neigh_node_put(neigh_node);
389 return ret; 390 return ret;
390} 391}
391 392
@@ -399,7 +400,7 @@ out:
399 * passed mtu and the old one with the rest. The new skb contains data from the 400 * passed mtu and the old one with the rest. The new skb contains data from the
400 * tail of the old skb. 401 * tail of the old skb.
401 * 402 *
402 * Returns the new fragment, NULL on error. 403 * Return: the new fragment, NULL on error.
403 */ 404 */
404static struct sk_buff *batadv_frag_create(struct sk_buff *skb, 405static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
405 struct batadv_frag_packet *frag_head, 406 struct batadv_frag_packet *frag_head,
@@ -433,7 +434,7 @@ err:
433 * @orig_node: final destination of the created fragments 434 * @orig_node: final destination of the created fragments
434 * @neigh_node: next-hop of the created fragments 435 * @neigh_node: next-hop of the created fragments
435 * 436 *
436 * Returns true on success, false otherwise. 437 * Return: true on success, false otherwise.
437 */ 438 */
438bool batadv_frag_send_packet(struct sk_buff *skb, 439bool batadv_frag_send_packet(struct sk_buff *skb,
439 struct batadv_orig_node *orig_node, 440 struct batadv_orig_node *orig_node,
@@ -484,8 +485,7 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
484 batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX); 485 batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
485 batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES, 486 batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
486 skb_fragment->len + ETH_HLEN); 487 skb_fragment->len + ETH_HLEN);
487 batadv_send_skb_packet(skb_fragment, neigh_node->if_incoming, 488 batadv_send_unicast_skb(skb_fragment, neigh_node);
488 neigh_node->addr);
489 frag_header.no++; 489 frag_header.no++;
490 490
491 /* The initial check in this function should cover this case */ 491 /* The initial check in this function should cover this case */
@@ -504,13 +504,13 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
504 batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX); 504 batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
505 batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES, 505 batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
506 skb->len + ETH_HLEN); 506 skb->len + ETH_HLEN);
507 batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); 507 batadv_send_unicast_skb(skb, neigh_node);
508 508
509 ret = true; 509 ret = true;
510 510
511out_err: 511out_err:
512 if (primary_if) 512 if (primary_if)
513 batadv_hardif_free_ref(primary_if); 513 batadv_hardif_put(primary_if);
514 514
515 return ret; 515 return ret;
516} 516}
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 8b9877e70b95..9ff77c7ef7c7 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2013-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2013-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Martin Hundebøll <martin@hundeboll.net> 3 * Martin Hundebøll <martin@hundeboll.net>
4 * 4 *
@@ -42,7 +42,7 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
42 * batadv_frag_check_entry - check if a list of fragments has timed out 42 * batadv_frag_check_entry - check if a list of fragments has timed out
43 * @frags_entry: table entry to check 43 * @frags_entry: table entry to check
44 * 44 *
45 * Returns true if the frags entry has timed out, false otherwise. 45 * Return: true if the frags entry has timed out, false otherwise.
46 */ 46 */
47static inline bool 47static inline bool
48batadv_frag_check_entry(struct batadv_frag_table_entry *frags_entry) 48batadv_frag_check_entry(struct batadv_frag_table_entry *frags_entry)
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index ccf70bed0d0c..c59aff5ccac8 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -28,6 +28,7 @@
28#include <linux/ip.h> 28#include <linux/ip.h>
29#include <linux/ipv6.h> 29#include <linux/ipv6.h>
30#include <linux/kernel.h> 30#include <linux/kernel.h>
31#include <linux/kref.h>
31#include <linux/list.h> 32#include <linux/list.h>
32#include <linux/netdevice.h> 33#include <linux/netdevice.h>
33#include <linux/rculist.h> 34#include <linux/rculist.h>
@@ -59,12 +60,28 @@
59 */ 60 */
60#define BATADV_DHCP_CHADDR_OFFSET 28 61#define BATADV_DHCP_CHADDR_OFFSET 28
61 62
62static void batadv_gw_node_free_ref(struct batadv_gw_node *gw_node) 63/**
64 * batadv_gw_node_release - release gw_node from lists and queue for free after
65 * rcu grace period
66 * @ref: kref pointer of the gw_node
67 */
68static void batadv_gw_node_release(struct kref *ref)
63{ 69{
64 if (atomic_dec_and_test(&gw_node->refcount)) { 70 struct batadv_gw_node *gw_node;
65 batadv_orig_node_free_ref(gw_node->orig_node); 71
66 kfree_rcu(gw_node, rcu); 72 gw_node = container_of(ref, struct batadv_gw_node, refcount);
67 } 73
74 batadv_orig_node_put(gw_node->orig_node);
75 kfree_rcu(gw_node, rcu);
76}
77
78/**
79 * batadv_gw_node_put - decrement the gw_node refcounter and possibly release it
80 * @gw_node: gateway node to free
81 */
82static void batadv_gw_node_put(struct batadv_gw_node *gw_node)
83{
84 kref_put(&gw_node->refcount, batadv_gw_node_release);
68} 85}
69 86
70static struct batadv_gw_node * 87static struct batadv_gw_node *
@@ -77,7 +94,7 @@ batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv)
77 if (!gw_node) 94 if (!gw_node)
78 goto out; 95 goto out;
79 96
80 if (!atomic_inc_not_zero(&gw_node->refcount)) 97 if (!kref_get_unless_zero(&gw_node->refcount))
81 gw_node = NULL; 98 gw_node = NULL;
82 99
83out: 100out:
@@ -100,14 +117,14 @@ batadv_gw_get_selected_orig(struct batadv_priv *bat_priv)
100 if (!orig_node) 117 if (!orig_node)
101 goto unlock; 118 goto unlock;
102 119
103 if (!atomic_inc_not_zero(&orig_node->refcount)) 120 if (!kref_get_unless_zero(&orig_node->refcount))
104 orig_node = NULL; 121 orig_node = NULL;
105 122
106unlock: 123unlock:
107 rcu_read_unlock(); 124 rcu_read_unlock();
108out: 125out:
109 if (gw_node) 126 if (gw_node)
110 batadv_gw_node_free_ref(gw_node); 127 batadv_gw_node_put(gw_node);
111 return orig_node; 128 return orig_node;
112} 129}
113 130
@@ -118,14 +135,14 @@ static void batadv_gw_select(struct batadv_priv *bat_priv,
118 135
119 spin_lock_bh(&bat_priv->gw.list_lock); 136 spin_lock_bh(&bat_priv->gw.list_lock);
120 137
121 if (new_gw_node && !atomic_inc_not_zero(&new_gw_node->refcount)) 138 if (new_gw_node && !kref_get_unless_zero(&new_gw_node->refcount))
122 new_gw_node = NULL; 139 new_gw_node = NULL;
123 140
124 curr_gw_node = rcu_dereference_protected(bat_priv->gw.curr_gw, 1); 141 curr_gw_node = rcu_dereference_protected(bat_priv->gw.curr_gw, 1);
125 rcu_assign_pointer(bat_priv->gw.curr_gw, new_gw_node); 142 rcu_assign_pointer(bat_priv->gw.curr_gw, new_gw_node);
126 143
127 if (curr_gw_node) 144 if (curr_gw_node)
128 batadv_gw_node_free_ref(curr_gw_node); 145 batadv_gw_node_put(curr_gw_node);
129 146
130 spin_unlock_bh(&bat_priv->gw.list_lock); 147 spin_unlock_bh(&bat_priv->gw.list_lock);
131} 148}
@@ -170,7 +187,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
170 if (!router_ifinfo) 187 if (!router_ifinfo)
171 goto next; 188 goto next;
172 189
173 if (!atomic_inc_not_zero(&gw_node->refcount)) 190 if (!kref_get_unless_zero(&gw_node->refcount))
174 goto next; 191 goto next;
175 192
176 tq_avg = router_ifinfo->bat_iv.tq_avg; 193 tq_avg = router_ifinfo->bat_iv.tq_avg;
@@ -186,9 +203,9 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
186 ((tmp_gw_factor == max_gw_factor) && 203 ((tmp_gw_factor == max_gw_factor) &&
187 (tq_avg > max_tq))) { 204 (tq_avg > max_tq))) {
188 if (curr_gw) 205 if (curr_gw)
189 batadv_gw_node_free_ref(curr_gw); 206 batadv_gw_node_put(curr_gw);
190 curr_gw = gw_node; 207 curr_gw = gw_node;
191 atomic_inc(&curr_gw->refcount); 208 kref_get(&curr_gw->refcount);
192 } 209 }
193 break; 210 break;
194 211
@@ -201,9 +218,9 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
201 */ 218 */
202 if (tq_avg > max_tq) { 219 if (tq_avg > max_tq) {
203 if (curr_gw) 220 if (curr_gw)
204 batadv_gw_node_free_ref(curr_gw); 221 batadv_gw_node_put(curr_gw);
205 curr_gw = gw_node; 222 curr_gw = gw_node;
206 atomic_inc(&curr_gw->refcount); 223 kref_get(&curr_gw->refcount);
207 } 224 }
208 break; 225 break;
209 } 226 }
@@ -214,12 +231,12 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
214 if (tmp_gw_factor > max_gw_factor) 231 if (tmp_gw_factor > max_gw_factor)
215 max_gw_factor = tmp_gw_factor; 232 max_gw_factor = tmp_gw_factor;
216 233
217 batadv_gw_node_free_ref(gw_node); 234 batadv_gw_node_put(gw_node);
218 235
219next: 236next:
220 batadv_neigh_node_free_ref(router); 237 batadv_neigh_node_put(router);
221 if (router_ifinfo) 238 if (router_ifinfo)
222 batadv_neigh_ifinfo_free_ref(router_ifinfo); 239 batadv_neigh_ifinfo_put(router_ifinfo);
223 } 240 }
224 rcu_read_unlock(); 241 rcu_read_unlock();
225 242
@@ -255,7 +272,7 @@ void batadv_gw_check_client_stop(struct batadv_priv *bat_priv)
255 */ 272 */
256 batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL, NULL); 273 batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL, NULL);
257 274
258 batadv_gw_node_free_ref(curr_gw); 275 batadv_gw_node_put(curr_gw);
259} 276}
260 277
261void batadv_gw_election(struct batadv_priv *bat_priv) 278void batadv_gw_election(struct batadv_priv *bat_priv)
@@ -330,13 +347,13 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
330 347
331out: 348out:
332 if (curr_gw) 349 if (curr_gw)
333 batadv_gw_node_free_ref(curr_gw); 350 batadv_gw_node_put(curr_gw);
334 if (next_gw) 351 if (next_gw)
335 batadv_gw_node_free_ref(next_gw); 352 batadv_gw_node_put(next_gw);
336 if (router) 353 if (router)
337 batadv_neigh_node_free_ref(router); 354 batadv_neigh_node_put(router);
338 if (router_ifinfo) 355 if (router_ifinfo)
339 batadv_neigh_ifinfo_free_ref(router_ifinfo); 356 batadv_neigh_ifinfo_put(router_ifinfo);
340} 357}
341 358
342void batadv_gw_check_election(struct batadv_priv *bat_priv, 359void batadv_gw_check_election(struct batadv_priv *bat_priv,
@@ -397,15 +414,15 @@ reselect:
397 batadv_gw_reselect(bat_priv); 414 batadv_gw_reselect(bat_priv);
398out: 415out:
399 if (curr_gw_orig) 416 if (curr_gw_orig)
400 batadv_orig_node_free_ref(curr_gw_orig); 417 batadv_orig_node_put(curr_gw_orig);
401 if (router_gw) 418 if (router_gw)
402 batadv_neigh_node_free_ref(router_gw); 419 batadv_neigh_node_put(router_gw);
403 if (router_orig) 420 if (router_orig)
404 batadv_neigh_node_free_ref(router_orig); 421 batadv_neigh_node_put(router_orig);
405 if (router_gw_tq) 422 if (router_gw_tq)
406 batadv_neigh_ifinfo_free_ref(router_gw_tq); 423 batadv_neigh_ifinfo_put(router_gw_tq);
407 if (router_orig_tq) 424 if (router_orig_tq)
408 batadv_neigh_ifinfo_free_ref(router_orig_tq); 425 batadv_neigh_ifinfo_put(router_orig_tq);
409} 426}
410 427
411/** 428/**
@@ -423,12 +440,12 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
423 if (gateway->bandwidth_down == 0) 440 if (gateway->bandwidth_down == 0)
424 return; 441 return;
425 442
426 if (!atomic_inc_not_zero(&orig_node->refcount)) 443 if (!kref_get_unless_zero(&orig_node->refcount))
427 return; 444 return;
428 445
429 gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC); 446 gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC);
430 if (!gw_node) { 447 if (!gw_node) {
431 batadv_orig_node_free_ref(orig_node); 448 batadv_orig_node_put(orig_node);
432 return; 449 return;
433 } 450 }
434 451
@@ -436,7 +453,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
436 gw_node->orig_node = orig_node; 453 gw_node->orig_node = orig_node;
437 gw_node->bandwidth_down = ntohl(gateway->bandwidth_down); 454 gw_node->bandwidth_down = ntohl(gateway->bandwidth_down);
438 gw_node->bandwidth_up = ntohl(gateway->bandwidth_up); 455 gw_node->bandwidth_up = ntohl(gateway->bandwidth_up);
439 atomic_set(&gw_node->refcount, 1); 456 kref_init(&gw_node->refcount);
440 457
441 spin_lock_bh(&bat_priv->gw.list_lock); 458 spin_lock_bh(&bat_priv->gw.list_lock);
442 hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list); 459 hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list);
@@ -456,7 +473,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
456 * @bat_priv: the bat priv with all the soft interface information 473 * @bat_priv: the bat priv with all the soft interface information
457 * @orig_node: originator announcing gateway capabilities 474 * @orig_node: originator announcing gateway capabilities
458 * 475 *
459 * Returns gateway node if found or NULL otherwise. 476 * Return: gateway node if found or NULL otherwise.
460 */ 477 */
461static struct batadv_gw_node * 478static struct batadv_gw_node *
462batadv_gw_node_get(struct batadv_priv *bat_priv, 479batadv_gw_node_get(struct batadv_priv *bat_priv,
@@ -469,7 +486,7 @@ batadv_gw_node_get(struct batadv_priv *bat_priv,
469 if (gw_node_tmp->orig_node != orig_node) 486 if (gw_node_tmp->orig_node != orig_node)
470 continue; 487 continue;
471 488
472 if (!atomic_inc_not_zero(&gw_node_tmp->refcount)) 489 if (!kref_get_unless_zero(&gw_node_tmp->refcount))
473 continue; 490 continue;
474 491
475 gw_node = gw_node_tmp; 492 gw_node = gw_node_tmp;
@@ -529,7 +546,7 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
529 spin_lock_bh(&bat_priv->gw.list_lock); 546 spin_lock_bh(&bat_priv->gw.list_lock);
530 if (!hlist_unhashed(&gw_node->list)) { 547 if (!hlist_unhashed(&gw_node->list)) {
531 hlist_del_init_rcu(&gw_node->list); 548 hlist_del_init_rcu(&gw_node->list);
532 batadv_gw_node_free_ref(gw_node); 549 batadv_gw_node_put(gw_node);
533 } 550 }
534 spin_unlock_bh(&bat_priv->gw.list_lock); 551 spin_unlock_bh(&bat_priv->gw.list_lock);
535 552
@@ -538,12 +555,12 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
538 batadv_gw_reselect(bat_priv); 555 batadv_gw_reselect(bat_priv);
539 556
540 if (curr_gw) 557 if (curr_gw)
541 batadv_gw_node_free_ref(curr_gw); 558 batadv_gw_node_put(curr_gw);
542 } 559 }
543 560
544out: 561out:
545 if (gw_node) 562 if (gw_node)
546 batadv_gw_node_free_ref(gw_node); 563 batadv_gw_node_put(gw_node);
547} 564}
548 565
549void batadv_gw_node_delete(struct batadv_priv *bat_priv, 566void batadv_gw_node_delete(struct batadv_priv *bat_priv,
@@ -566,7 +583,7 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv)
566 hlist_for_each_entry_safe(gw_node, node_tmp, 583 hlist_for_each_entry_safe(gw_node, node_tmp,
567 &bat_priv->gw.list, list) { 584 &bat_priv->gw.list, list) {
568 hlist_del_init_rcu(&gw_node->list); 585 hlist_del_init_rcu(&gw_node->list);
569 batadv_gw_node_free_ref(gw_node); 586 batadv_gw_node_put(gw_node);
570 } 587 }
571 spin_unlock_bh(&bat_priv->gw.list_lock); 588 spin_unlock_bh(&bat_priv->gw.list_lock);
572} 589}
@@ -603,12 +620,12 @@ static int batadv_write_buffer_text(struct batadv_priv *bat_priv,
603 ret = seq_has_overflowed(seq) ? -1 : 0; 620 ret = seq_has_overflowed(seq) ? -1 : 0;
604 621
605 if (curr_gw) 622 if (curr_gw)
606 batadv_gw_node_free_ref(curr_gw); 623 batadv_gw_node_put(curr_gw);
607out: 624out:
608 if (router_ifinfo) 625 if (router_ifinfo)
609 batadv_neigh_ifinfo_free_ref(router_ifinfo); 626 batadv_neigh_ifinfo_put(router_ifinfo);
610 if (router) 627 if (router)
611 batadv_neigh_node_free_ref(router); 628 batadv_neigh_node_put(router);
612 return ret; 629 return ret;
613} 630}
614 631
@@ -645,7 +662,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
645 662
646out: 663out:
647 if (primary_if) 664 if (primary_if)
648 batadv_hardif_free_ref(primary_if); 665 batadv_hardif_put(primary_if);
649 return 0; 666 return 0;
650} 667}
651 668
@@ -656,13 +673,13 @@ out:
656 * @chaddr: buffer where the client address will be stored. Valid 673 * @chaddr: buffer where the client address will be stored. Valid
657 * only if the function returns BATADV_DHCP_TO_CLIENT 674 * only if the function returns BATADV_DHCP_TO_CLIENT
658 * 675 *
659 * Returns: 676 * This function may re-allocate the data buffer of the skb passed as argument.
677 *
678 * Return:
660 * - BATADV_DHCP_NO if the packet is not a dhcp message or if there was an error 679 * - BATADV_DHCP_NO if the packet is not a dhcp message or if there was an error
661 * while parsing it 680 * while parsing it
662 * - BATADV_DHCP_TO_SERVER if this is a message going to the DHCP server 681 * - BATADV_DHCP_TO_SERVER if this is a message going to the DHCP server
663 * - BATADV_DHCP_TO_CLIENT if this is a message going to a DHCP client 682 * - BATADV_DHCP_TO_CLIENT if this is a message going to a DHCP client
664 *
665 * This function may re-allocate the data buffer of the skb passed as argument.
666 */ 683 */
667enum batadv_dhcp_recipient 684enum batadv_dhcp_recipient
668batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, 685batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
@@ -777,11 +794,11 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
777 * server. Due to topology changes it may be the case that the GW server 794 * server. Due to topology changes it may be the case that the GW server
778 * previously selected is not the best one anymore. 795 * previously selected is not the best one anymore.
779 * 796 *
780 * Returns true if the packet destination is unicast and it is not the best gw,
781 * false otherwise.
782 *
783 * This call might reallocate skb data. 797 * This call might reallocate skb data.
784 * Must be invoked only when the DHCP packet is going TO a DHCP SERVER. 798 * Must be invoked only when the DHCP packet is going TO a DHCP SERVER.
799 *
800 * Return: true if the packet destination is unicast and it is not the best gw,
801 * false otherwise.
785 */ 802 */
786bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, 803bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
787 struct sk_buff *skb) 804 struct sk_buff *skb)
@@ -839,7 +856,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
839 goto out; 856 goto out;
840 857
841 curr_tq_avg = curr_ifinfo->bat_iv.tq_avg; 858 curr_tq_avg = curr_ifinfo->bat_iv.tq_avg;
842 batadv_neigh_ifinfo_free_ref(curr_ifinfo); 859 batadv_neigh_ifinfo_put(curr_ifinfo);
843 860
844 break; 861 break;
845 case BATADV_GW_MODE_OFF: 862 case BATADV_GW_MODE_OFF:
@@ -857,18 +874,18 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
857 874
858 if ((curr_tq_avg - old_ifinfo->bat_iv.tq_avg) > BATADV_GW_THRESHOLD) 875 if ((curr_tq_avg - old_ifinfo->bat_iv.tq_avg) > BATADV_GW_THRESHOLD)
859 out_of_range = true; 876 out_of_range = true;
860 batadv_neigh_ifinfo_free_ref(old_ifinfo); 877 batadv_neigh_ifinfo_put(old_ifinfo);
861 878
862out: 879out:
863 if (orig_dst_node) 880 if (orig_dst_node)
864 batadv_orig_node_free_ref(orig_dst_node); 881 batadv_orig_node_put(orig_dst_node);
865 if (curr_gw) 882 if (curr_gw)
866 batadv_gw_node_free_ref(curr_gw); 883 batadv_gw_node_put(curr_gw);
867 if (gw_node) 884 if (gw_node)
868 batadv_gw_node_free_ref(gw_node); 885 batadv_gw_node_put(gw_node);
869 if (neigh_old) 886 if (neigh_old)
870 batadv_neigh_node_free_ref(neigh_old); 887 batadv_neigh_node_put(neigh_old);
871 if (neigh_curr) 888 if (neigh_curr)
872 batadv_neigh_node_free_ref(neigh_curr); 889 batadv_neigh_node_put(neigh_curr);
873 return out_of_range; 890 return out_of_range;
874} 891}
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index fa9527785ed3..582dd8c413c8 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index b51bface8bdd..4423047889e1 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -38,10 +38,10 @@
38 * @description: text shown when throughput string cannot be parsed 38 * @description: text shown when throughput string cannot be parsed
39 * @throughput: pointer holding the returned throughput information 39 * @throughput: pointer holding the returned throughput information
40 * 40 *
41 * Returns false on parse error and true otherwise. 41 * Return: false on parse error and true otherwise.
42 */ 42 */
43static bool batadv_parse_throughput(struct net_device *net_dev, char *buff, 43bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
44 const char *description, u32 *throughput) 44 const char *description, u32 *throughput)
45{ 45{
46 enum batadv_bandwidth_units bw_unit_type = BATADV_BW_UNIT_KBIT; 46 enum batadv_bandwidth_units bw_unit_type = BATADV_BW_UNIT_KBIT;
47 u64 lthroughput; 47 u64 lthroughput;
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index ab893e318229..8a5e1ddf1175 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -49,5 +49,7 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
49void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv); 49void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv);
50void batadv_gw_init(struct batadv_priv *bat_priv); 50void batadv_gw_init(struct batadv_priv *bat_priv);
51void batadv_gw_free(struct batadv_priv *bat_priv); 51void batadv_gw_free(struct batadv_priv *bat_priv);
52bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
53 const char *description, u32 *throughput);
52 54
53#endif /* _NET_BATMAN_ADV_GATEWAY_COMMON_H_ */ 55#endif /* _NET_BATMAN_ADV_GATEWAY_COMMON_H_ */
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 57f7107169f5..0a7deaf2670a 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -18,6 +18,7 @@
18#include "hard-interface.h" 18#include "hard-interface.h"
19#include "main.h" 19#include "main.h"
20 20
21#include <linux/atomic.h>
21#include <linux/bug.h> 22#include <linux/bug.h>
22#include <linux/byteorder/generic.h> 23#include <linux/byteorder/generic.h>
23#include <linux/errno.h> 24#include <linux/errno.h>
@@ -26,6 +27,7 @@
26#include <linux/if_ether.h> 27#include <linux/if_ether.h>
27#include <linux/if.h> 28#include <linux/if.h>
28#include <linux/kernel.h> 29#include <linux/kernel.h>
30#include <linux/kref.h>
29#include <linux/list.h> 31#include <linux/list.h>
30#include <linux/netdevice.h> 32#include <linux/netdevice.h>
31#include <linux/printk.h> 33#include <linux/printk.h>
@@ -47,13 +49,19 @@
47#include "sysfs.h" 49#include "sysfs.h"
48#include "translation-table.h" 50#include "translation-table.h"
49 51
50void batadv_hardif_free_rcu(struct rcu_head *rcu) 52/**
53 * batadv_hardif_release - release hard interface from lists and queue for
54 * free after rcu grace period
55 * @ref: kref pointer of the hard interface
56 */
57void batadv_hardif_release(struct kref *ref)
51{ 58{
52 struct batadv_hard_iface *hard_iface; 59 struct batadv_hard_iface *hard_iface;
53 60
54 hard_iface = container_of(rcu, struct batadv_hard_iface, rcu); 61 hard_iface = container_of(ref, struct batadv_hard_iface, refcount);
55 dev_put(hard_iface->net_dev); 62 dev_put(hard_iface->net_dev);
56 kfree(hard_iface); 63
64 kfree_rcu(hard_iface, rcu);
57} 65}
58 66
59struct batadv_hard_iface * 67struct batadv_hard_iface *
@@ -64,7 +72,7 @@ batadv_hardif_get_by_netdev(const struct net_device *net_dev)
64 rcu_read_lock(); 72 rcu_read_lock();
65 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { 73 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
66 if (hard_iface->net_dev == net_dev && 74 if (hard_iface->net_dev == net_dev &&
67 atomic_inc_not_zero(&hard_iface->refcount)) 75 kref_get_unless_zero(&hard_iface->refcount))
68 goto out; 76 goto out;
69 } 77 }
70 78
@@ -107,7 +115,7 @@ static bool batadv_mutual_parents(const struct net_device *dev1,
107 * This function recursively checks all the fathers of the device passed as 115 * This function recursively checks all the fathers of the device passed as
108 * argument looking for a batman-adv soft interface. 116 * argument looking for a batman-adv soft interface.
109 * 117 *
110 * Returns true if the device is descendant of a batman-adv mesh interface (or 118 * Return: true if the device is descendant of a batman-adv mesh interface (or
111 * if it is a batman-adv interface itself), false otherwise 119 * if it is a batman-adv interface itself), false otherwise
112 */ 120 */
113static bool batadv_is_on_batman_iface(const struct net_device *net_dev) 121static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
@@ -161,7 +169,7 @@ static int batadv_is_valid_iface(const struct net_device *net_dev)
161 * interface 169 * interface
162 * @net_device: the device to check 170 * @net_device: the device to check
163 * 171 *
164 * Returns true if the net device is a 802.11 wireless device, false otherwise. 172 * Return: true if the net device is a 802.11 wireless device, false otherwise.
165 */ 173 */
166bool batadv_is_wifi_netdev(struct net_device *net_device) 174bool batadv_is_wifi_netdev(struct net_device *net_device)
167{ 175{
@@ -194,7 +202,7 @@ batadv_hardif_get_active(const struct net_device *soft_iface)
194 continue; 202 continue;
195 203
196 if (hard_iface->if_status == BATADV_IF_ACTIVE && 204 if (hard_iface->if_status == BATADV_IF_ACTIVE &&
197 atomic_inc_not_zero(&hard_iface->refcount)) 205 kref_get_unless_zero(&hard_iface->refcount))
198 goto out; 206 goto out;
199 } 207 }
200 208
@@ -218,7 +226,7 @@ static void batadv_primary_if_update_addr(struct batadv_priv *bat_priv,
218 batadv_bla_update_orig_address(bat_priv, primary_if, oldif); 226 batadv_bla_update_orig_address(bat_priv, primary_if, oldif);
219out: 227out:
220 if (primary_if) 228 if (primary_if)
221 batadv_hardif_free_ref(primary_if); 229 batadv_hardif_put(primary_if);
222} 230}
223 231
224static void batadv_primary_if_select(struct batadv_priv *bat_priv, 232static void batadv_primary_if_select(struct batadv_priv *bat_priv,
@@ -228,7 +236,7 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv,
228 236
229 ASSERT_RTNL(); 237 ASSERT_RTNL();
230 238
231 if (new_hard_iface && !atomic_inc_not_zero(&new_hard_iface->refcount)) 239 if (new_hard_iface && !kref_get_unless_zero(&new_hard_iface->refcount))
232 new_hard_iface = NULL; 240 new_hard_iface = NULL;
233 241
234 curr_hard_iface = rcu_dereference_protected(bat_priv->primary_if, 1); 242 curr_hard_iface = rcu_dereference_protected(bat_priv->primary_if, 1);
@@ -242,7 +250,7 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv,
242 250
243out: 251out:
244 if (curr_hard_iface) 252 if (curr_hard_iface)
245 batadv_hardif_free_ref(curr_hard_iface); 253 batadv_hardif_put(curr_hard_iface);
246} 254}
247 255
248static bool 256static bool
@@ -399,9 +407,12 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface)
399 407
400 batadv_update_min_mtu(hard_iface->soft_iface); 408 batadv_update_min_mtu(hard_iface->soft_iface);
401 409
410 if (bat_priv->bat_algo_ops->bat_iface_activate)
411 bat_priv->bat_algo_ops->bat_iface_activate(hard_iface);
412
402out: 413out:
403 if (primary_if) 414 if (primary_if)
404 batadv_hardif_free_ref(primary_if); 415 batadv_hardif_put(primary_if);
405} 416}
406 417
407static void 418static void
@@ -426,7 +437,8 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
426 * 437 *
427 * Invoke ndo_del_slave on master passing slave as argument. In this way slave 438 * Invoke ndo_del_slave on master passing slave as argument. In this way slave
428 * is free'd and master can correctly change its internal state. 439 * is free'd and master can correctly change its internal state.
429 * Return 0 on success, a negative value representing the error otherwise 440 *
441 * Return: 0 on success, a negative value representing the error otherwise
430 */ 442 */
431static int batadv_master_del_slave(struct batadv_hard_iface *slave, 443static int batadv_master_del_slave(struct batadv_hard_iface *slave,
432 struct net_device *master) 444 struct net_device *master)
@@ -455,7 +467,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
455 if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) 467 if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
456 goto out; 468 goto out;
457 469
458 if (!atomic_inc_not_zero(&hard_iface->refcount)) 470 if (!kref_get_unless_zero(&hard_iface->refcount))
459 goto out; 471 goto out;
460 472
461 soft_iface = dev_get_by_name(&init_net, iface_name); 473 soft_iface = dev_get_by_name(&init_net, iface_name);
@@ -553,7 +565,7 @@ err_dev:
553 hard_iface->soft_iface = NULL; 565 hard_iface->soft_iface = NULL;
554 dev_put(soft_iface); 566 dev_put(soft_iface);
555err: 567err:
556 batadv_hardif_free_ref(hard_iface); 568 batadv_hardif_put(hard_iface);
557 return ret; 569 return ret;
558} 570}
559 571
@@ -563,8 +575,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
563 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); 575 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
564 struct batadv_hard_iface *primary_if = NULL; 576 struct batadv_hard_iface *primary_if = NULL;
565 577
566 if (hard_iface->if_status == BATADV_IF_ACTIVE) 578 batadv_hardif_deactivate_interface(hard_iface);
567 batadv_hardif_deactivate_interface(hard_iface);
568 579
569 if (hard_iface->if_status != BATADV_IF_INACTIVE) 580 if (hard_iface->if_status != BATADV_IF_INACTIVE)
570 goto out; 581 goto out;
@@ -584,7 +595,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
584 batadv_primary_if_select(bat_priv, new_if); 595 batadv_primary_if_select(bat_priv, new_if);
585 596
586 if (new_if) 597 if (new_if)
587 batadv_hardif_free_ref(new_if); 598 batadv_hardif_put(new_if);
588 } 599 }
589 600
590 bat_priv->bat_algo_ops->bat_iface_disable(hard_iface); 601 bat_priv->bat_algo_ops->bat_iface_disable(hard_iface);
@@ -607,11 +618,11 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
607 } 618 }
608 619
609 hard_iface->soft_iface = NULL; 620 hard_iface->soft_iface = NULL;
610 batadv_hardif_free_ref(hard_iface); 621 batadv_hardif_put(hard_iface);
611 622
612out: 623out:
613 if (primary_if) 624 if (primary_if)
614 batadv_hardif_free_ref(primary_if); 625 batadv_hardif_put(primary_if);
615} 626}
616 627
617/** 628/**
@@ -630,7 +641,7 @@ static void batadv_hardif_remove_interface_finish(struct work_struct *work)
630 641
631 batadv_debugfs_del_hardif(hard_iface); 642 batadv_debugfs_del_hardif(hard_iface);
632 batadv_sysfs_del_hardif(&hard_iface->hardif_obj); 643 batadv_sysfs_del_hardif(&hard_iface->hardif_obj);
633 batadv_hardif_free_ref(hard_iface); 644 batadv_hardif_put(hard_iface);
634} 645}
635 646
636static struct batadv_hard_iface * 647static struct batadv_hard_iface *
@@ -676,7 +687,8 @@ batadv_hardif_add_interface(struct net_device *net_dev)
676 hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; 687 hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
677 688
678 /* extra reference for return */ 689 /* extra reference for return */
679 atomic_set(&hard_iface->refcount, 2); 690 kref_init(&hard_iface->refcount);
691 kref_get(&hard_iface->refcount);
680 692
681 batadv_check_known_mac_addr(hard_iface->net_dev); 693 batadv_check_known_mac_addr(hard_iface->net_dev);
682 list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list); 694 list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list);
@@ -784,10 +796,10 @@ static int batadv_hard_if_event(struct notifier_block *this,
784 } 796 }
785 797
786hardif_put: 798hardif_put:
787 batadv_hardif_free_ref(hard_iface); 799 batadv_hardif_put(hard_iface);
788out: 800out:
789 if (primary_if) 801 if (primary_if)
790 batadv_hardif_free_ref(primary_if); 802 batadv_hardif_put(primary_if);
791 return NOTIFY_DONE; 803 return NOTIFY_DONE;
792} 804}
793 805
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 7b12ea8ea29d..d74f1983f33e 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -20,8 +20,8 @@
20 20
21#include "main.h" 21#include "main.h"
22 22
23#include <linux/atomic.h>
24#include <linux/compiler.h> 23#include <linux/compiler.h>
24#include <linux/kref.h>
25#include <linux/notifier.h> 25#include <linux/notifier.h>
26#include <linux/rcupdate.h> 26#include <linux/rcupdate.h>
27#include <linux/stddef.h> 27#include <linux/stddef.h>
@@ -61,18 +61,16 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
61void batadv_hardif_remove_interfaces(void); 61void batadv_hardif_remove_interfaces(void);
62int batadv_hardif_min_mtu(struct net_device *soft_iface); 62int batadv_hardif_min_mtu(struct net_device *soft_iface);
63void batadv_update_min_mtu(struct net_device *soft_iface); 63void batadv_update_min_mtu(struct net_device *soft_iface);
64void batadv_hardif_free_rcu(struct rcu_head *rcu); 64void batadv_hardif_release(struct kref *ref);
65 65
66/** 66/**
67 * batadv_hardif_free_ref - decrement the hard interface refcounter and 67 * batadv_hardif_put - decrement the hard interface refcounter and possibly
68 * possibly free it 68 * release it
69 * @hard_iface: the hard interface to free 69 * @hard_iface: the hard interface to free
70 */ 70 */
71static inline void 71static inline void batadv_hardif_put(struct batadv_hard_iface *hard_iface)
72batadv_hardif_free_ref(struct batadv_hard_iface *hard_iface)
73{ 72{
74 if (atomic_dec_and_test(&hard_iface->refcount)) 73 kref_put(&hard_iface->refcount, batadv_hardif_release);
75 call_rcu(&hard_iface->rcu, batadv_hardif_free_rcu);
76} 74}
77 75
78static inline struct batadv_hard_iface * 76static inline struct batadv_hard_iface *
@@ -85,7 +83,7 @@ batadv_primary_if_get_selected(struct batadv_priv *bat_priv)
85 if (!hard_iface) 83 if (!hard_iface)
86 goto out; 84 goto out;
87 85
88 if (!atomic_inc_not_zero(&hard_iface->refcount)) 86 if (!kref_get_unless_zero(&hard_iface->refcount))
89 hard_iface = NULL; 87 hard_iface = NULL;
90 88
91out: 89out:
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 2ea6a18d793f..a0a0fdb85805 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2006-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Simon Wunderlich, Marek Lindner 3 * Simon Wunderlich, Marek Lindner
4 * 4 *
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 377626250ac7..9bb57b87447c 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2006-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Simon Wunderlich, Marek Lindner 3 * Simon Wunderlich, Marek Lindner
4 * 4 *
@@ -30,14 +30,17 @@
30struct lock_class_key; 30struct lock_class_key;
31 31
32/* callback to a compare function. should compare 2 element datas for their 32/* callback to a compare function. should compare 2 element datas for their
33 * keys, return 0 if same and not 0 if not same 33 * keys
34 *
35 * Return: 0 if same and not 0 if not same
34 */ 36 */
35typedef int (*batadv_hashdata_compare_cb)(const struct hlist_node *, 37typedef int (*batadv_hashdata_compare_cb)(const struct hlist_node *,
36 const void *); 38 const void *);
37 39
38/* the hashfunction, should return an index 40/* the hashfunction
39 * based on the key in the data of the first 41 *
40 * argument and the size the second 42 * Return: an index based on the key in the data of the first argument and the
43 * size the second
41 */ 44 */
42typedef u32 (*batadv_hashdata_choose_cb)(const void *, u32); 45typedef u32 (*batadv_hashdata_choose_cb)(const void *, u32);
43typedef void (*batadv_hashdata_free_cb)(struct hlist_node *, void *); 46typedef void (*batadv_hashdata_free_cb)(struct hlist_node *, void *);
@@ -96,7 +99,7 @@ static inline void batadv_hash_delete(struct batadv_hashtable *hash,
96 * @data: data passed to the aforementioned callbacks as argument 99 * @data: data passed to the aforementioned callbacks as argument
97 * @data_node: to be added element 100 * @data_node: to be added element
98 * 101 *
99 * Returns 0 on success, 1 if the element already is in the hash 102 * Return: 0 on success, 1 if the element already is in the hash
100 * and -1 on error. 103 * and -1 on error.
101 */ 104 */
102static inline int batadv_hash_add(struct batadv_hashtable *hash, 105static inline int batadv_hash_add(struct batadv_hashtable *hash,
@@ -139,10 +142,11 @@ out:
139 return ret; 142 return ret;
140} 143}
141 144
142/* removes data from hash, if found. returns pointer do data on success, so you 145/* removes data from hash, if found. data could be the structure you use with
143 * can remove the used structure yourself, or NULL on error . data could be the 146 * just the key filled, we just need the key for comparing.
144 * structure you use with just the key filled, we just need the key for 147 *
145 * comparing. 148 * Return: returns pointer do data on success, so you can remove the used
149 * structure yourself, or NULL on error
146 */ 150 */
147static inline void *batadv_hash_remove(struct batadv_hashtable *hash, 151static inline void *batadv_hash_remove(struct batadv_hashtable *hash,
148 batadv_hashdata_compare_cb compare, 152 batadv_hashdata_compare_cb compare,
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index bcabb5e3f4d3..14d0013b387e 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -278,7 +278,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
278 278
279 ether_addr_copy(icmp_header->orig, primary_if->net_dev->dev_addr); 279 ether_addr_copy(icmp_header->orig, primary_if->net_dev->dev_addr);
280 280
281 batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); 281 batadv_send_unicast_skb(skb, neigh_node);
282 goto out; 282 goto out;
283 283
284dst_unreach: 284dst_unreach:
@@ -288,11 +288,11 @@ free_skb:
288 kfree_skb(skb); 288 kfree_skb(skb);
289out: 289out:
290 if (primary_if) 290 if (primary_if)
291 batadv_hardif_free_ref(primary_if); 291 batadv_hardif_put(primary_if);
292 if (neigh_node) 292 if (neigh_node)
293 batadv_neigh_node_free_ref(neigh_node); 293 batadv_neigh_node_put(neigh_node);
294 if (orig_node) 294 if (orig_node)
295 batadv_orig_node_free_ref(orig_node); 295 batadv_orig_node_put(orig_node);
296 return len; 296 return len;
297} 297}
298 298
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index e937143f0b10..618d5de06f20 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 4b5d61fbadb1..d64ddb961979 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -29,6 +29,7 @@
29#include <linux/ip.h> 29#include <linux/ip.h>
30#include <linux/ipv6.h> 30#include <linux/ipv6.h>
31#include <linux/kernel.h> 31#include <linux/kernel.h>
32#include <linux/kref.h>
32#include <linux/list.h> 33#include <linux/list.h>
33#include <linux/lockdep.h> 34#include <linux/lockdep.h>
34#include <linux/module.h> 35#include <linux/module.h>
@@ -86,6 +87,7 @@ static int __init batadv_init(void)
86 87
87 batadv_recv_handler_init(); 88 batadv_recv_handler_init();
88 89
90 batadv_v_init();
89 batadv_iv_init(); 91 batadv_iv_init();
90 batadv_nc_init(); 92 batadv_nc_init();
91 93
@@ -158,6 +160,10 @@ int batadv_mesh_init(struct net_device *soft_iface)
158 INIT_HLIST_HEAD(&bat_priv->tvlv.handler_list); 160 INIT_HLIST_HEAD(&bat_priv->tvlv.handler_list);
159 INIT_HLIST_HEAD(&bat_priv->softif_vlan_list); 161 INIT_HLIST_HEAD(&bat_priv->softif_vlan_list);
160 162
163 ret = batadv_v_mesh_init(bat_priv);
164 if (ret < 0)
165 goto err;
166
161 ret = batadv_originator_init(bat_priv); 167 ret = batadv_originator_init(bat_priv);
162 if (ret < 0) 168 if (ret < 0)
163 goto err; 169 goto err;
@@ -200,6 +206,8 @@ void batadv_mesh_free(struct net_device *soft_iface)
200 batadv_purge_outstanding_packets(bat_priv, NULL); 206 batadv_purge_outstanding_packets(bat_priv, NULL);
201 207
202 batadv_gw_node_free(bat_priv); 208 batadv_gw_node_free(bat_priv);
209
210 batadv_v_mesh_free(bat_priv);
203 batadv_nc_mesh_free(bat_priv); 211 batadv_nc_mesh_free(bat_priv);
204 batadv_dat_free(bat_priv); 212 batadv_dat_free(bat_priv);
205 batadv_bla_free(bat_priv); 213 batadv_bla_free(bat_priv);
@@ -233,7 +241,7 @@ void batadv_mesh_free(struct net_device *soft_iface)
233 * @bat_priv: the bat priv with all the soft interface information 241 * @bat_priv: the bat priv with all the soft interface information
234 * @addr: the address to check 242 * @addr: the address to check
235 * 243 *
236 * Returns 'true' if the mac address was found, false otherwise. 244 * Return: 'true' if the mac address was found, false otherwise.
237 */ 245 */
238bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr) 246bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr)
239{ 247{
@@ -262,7 +270,7 @@ bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr)
262 * function that requires the primary interface 270 * function that requires the primary interface
263 * @seq: debugfs table seq_file struct 271 * @seq: debugfs table seq_file struct
264 * 272 *
265 * Returns primary interface if found or NULL otherwise. 273 * Return: primary interface if found or NULL otherwise.
266 */ 274 */
267struct batadv_hard_iface * 275struct batadv_hard_iface *
268batadv_seq_print_text_primary_if_get(struct seq_file *seq) 276batadv_seq_print_text_primary_if_get(struct seq_file *seq)
@@ -286,7 +294,7 @@ batadv_seq_print_text_primary_if_get(struct seq_file *seq)
286 seq_printf(seq, 294 seq_printf(seq,
287 "BATMAN mesh %s disabled - primary interface not active\n", 295 "BATMAN mesh %s disabled - primary interface not active\n",
288 net_dev->name); 296 net_dev->name);
289 batadv_hardif_free_ref(primary_if); 297 batadv_hardif_put(primary_if);
290 primary_if = NULL; 298 primary_if = NULL;
291 299
292out: 300out:
@@ -297,7 +305,7 @@ out:
297 * batadv_max_header_len - calculate maximum encapsulation overhead for a 305 * batadv_max_header_len - calculate maximum encapsulation overhead for a
298 * payload packet 306 * payload packet
299 * 307 *
300 * Return the maximum encapsulation overhead in bytes. 308 * Return: the maximum encapsulation overhead in bytes.
301 */ 309 */
302int batadv_max_header_len(void) 310int batadv_max_header_len(void)
303{ 311{
@@ -599,6 +607,8 @@ int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
599 * 607 *
600 * payload_ptr must always point to an address in the skb head buffer and not to 608 * payload_ptr must always point to an address in the skb head buffer and not to
601 * a fragment. 609 * a fragment.
610 *
611 * Return: big endian crc32c of the checksummed data
602 */ 612 */
603__be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr) 613__be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr)
604{ 614{
@@ -622,15 +632,26 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr)
622} 632}
623 633
624/** 634/**
625 * batadv_tvlv_handler_free_ref - decrement the tvlv handler refcounter and 635 * batadv_tvlv_handler_release - release tvlv handler from lists and queue for
626 * possibly free it 636 * free after rcu grace period
637 * @ref: kref pointer of the tvlv
638 */
639static void batadv_tvlv_handler_release(struct kref *ref)
640{
641 struct batadv_tvlv_handler *tvlv_handler;
642
643 tvlv_handler = container_of(ref, struct batadv_tvlv_handler, refcount);
644 kfree_rcu(tvlv_handler, rcu);
645}
646
647/**
648 * batadv_tvlv_handler_put - decrement the tvlv container refcounter and
649 * possibly release it
627 * @tvlv_handler: the tvlv handler to free 650 * @tvlv_handler: the tvlv handler to free
628 */ 651 */
629static void 652static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler)
630batadv_tvlv_handler_free_ref(struct batadv_tvlv_handler *tvlv_handler)
631{ 653{
632 if (atomic_dec_and_test(&tvlv_handler->refcount)) 654 kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release);
633 kfree_rcu(tvlv_handler, rcu);
634} 655}
635 656
636/** 657/**
@@ -640,7 +661,7 @@ batadv_tvlv_handler_free_ref(struct batadv_tvlv_handler *tvlv_handler)
640 * @type: tvlv handler type to look for 661 * @type: tvlv handler type to look for
641 * @version: tvlv handler version to look for 662 * @version: tvlv handler version to look for
642 * 663 *
643 * Returns tvlv handler if found or NULL otherwise. 664 * Return: tvlv handler if found or NULL otherwise.
644 */ 665 */
645static struct batadv_tvlv_handler 666static struct batadv_tvlv_handler
646*batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version) 667*batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version)
@@ -656,7 +677,7 @@ static struct batadv_tvlv_handler
656 if (tvlv_handler_tmp->version != version) 677 if (tvlv_handler_tmp->version != version)
657 continue; 678 continue;
658 679
659 if (!atomic_inc_not_zero(&tvlv_handler_tmp->refcount)) 680 if (!kref_get_unless_zero(&tvlv_handler_tmp->refcount))
660 continue; 681 continue;
661 682
662 tvlv_handler = tvlv_handler_tmp; 683 tvlv_handler = tvlv_handler_tmp;
@@ -668,14 +689,25 @@ static struct batadv_tvlv_handler
668} 689}
669 690
670/** 691/**
671 * batadv_tvlv_container_free_ref - decrement the tvlv container refcounter and 692 * batadv_tvlv_container_release - release tvlv from lists and free
672 * possibly free it 693 * @ref: kref pointer of the tvlv
694 */
695static void batadv_tvlv_container_release(struct kref *ref)
696{
697 struct batadv_tvlv_container *tvlv;
698
699 tvlv = container_of(ref, struct batadv_tvlv_container, refcount);
700 kfree(tvlv);
701}
702
703/**
704 * batadv_tvlv_container_put - decrement the tvlv container refcounter and
705 * possibly release it
673 * @tvlv: the tvlv container to free 706 * @tvlv: the tvlv container to free
674 */ 707 */
675static void batadv_tvlv_container_free_ref(struct batadv_tvlv_container *tvlv) 708static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv)
676{ 709{
677 if (atomic_dec_and_test(&tvlv->refcount)) 710 kref_put(&tvlv->refcount, batadv_tvlv_container_release);
678 kfree(tvlv);
679} 711}
680 712
681/** 713/**
@@ -688,13 +720,15 @@ static void batadv_tvlv_container_free_ref(struct batadv_tvlv_container *tvlv)
688 * Has to be called with the appropriate locks being acquired 720 * Has to be called with the appropriate locks being acquired
689 * (tvlv.container_list_lock). 721 * (tvlv.container_list_lock).
690 * 722 *
691 * Returns tvlv container if found or NULL otherwise. 723 * Return: tvlv container if found or NULL otherwise.
692 */ 724 */
693static struct batadv_tvlv_container 725static struct batadv_tvlv_container
694*batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version) 726*batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version)
695{ 727{
696 struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL; 728 struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL;
697 729
730 lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
731
698 hlist_for_each_entry(tvlv_tmp, &bat_priv->tvlv.container_list, list) { 732 hlist_for_each_entry(tvlv_tmp, &bat_priv->tvlv.container_list, list) {
699 if (tvlv_tmp->tvlv_hdr.type != type) 733 if (tvlv_tmp->tvlv_hdr.type != type)
700 continue; 734 continue;
@@ -702,7 +736,7 @@ static struct batadv_tvlv_container
702 if (tvlv_tmp->tvlv_hdr.version != version) 736 if (tvlv_tmp->tvlv_hdr.version != version)
703 continue; 737 continue;
704 738
705 if (!atomic_inc_not_zero(&tvlv_tmp->refcount)) 739 if (!kref_get_unless_zero(&tvlv_tmp->refcount))
706 continue; 740 continue;
707 741
708 tvlv = tvlv_tmp; 742 tvlv = tvlv_tmp;
@@ -720,13 +754,15 @@ static struct batadv_tvlv_container
720 * Has to be called with the appropriate locks being acquired 754 * Has to be called with the appropriate locks being acquired
721 * (tvlv.container_list_lock). 755 * (tvlv.container_list_lock).
722 * 756 *
723 * Returns size of all currently registered tvlv containers in bytes. 757 * Return: size of all currently registered tvlv containers in bytes.
724 */ 758 */
725static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) 759static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
726{ 760{
727 struct batadv_tvlv_container *tvlv; 761 struct batadv_tvlv_container *tvlv;
728 u16 tvlv_len = 0; 762 u16 tvlv_len = 0;
729 763
764 lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
765
730 hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) { 766 hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
731 tvlv_len += sizeof(struct batadv_tvlv_hdr); 767 tvlv_len += sizeof(struct batadv_tvlv_hdr);
732 tvlv_len += ntohs(tvlv->tvlv_hdr.len); 768 tvlv_len += ntohs(tvlv->tvlv_hdr.len);
@@ -755,8 +791,8 @@ static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv,
755 hlist_del(&tvlv->list); 791 hlist_del(&tvlv->list);
756 792
757 /* first call to decrement the counter, second call to free */ 793 /* first call to decrement the counter, second call to free */
758 batadv_tvlv_container_free_ref(tvlv); 794 batadv_tvlv_container_put(tvlv);
759 batadv_tvlv_container_free_ref(tvlv); 795 batadv_tvlv_container_put(tvlv);
760} 796}
761 797
762/** 798/**
@@ -808,7 +844,7 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
808 844
809 memcpy(tvlv_new + 1, tvlv_value, ntohs(tvlv_new->tvlv_hdr.len)); 845 memcpy(tvlv_new + 1, tvlv_value, ntohs(tvlv_new->tvlv_hdr.len));
810 INIT_HLIST_NODE(&tvlv_new->list); 846 INIT_HLIST_NODE(&tvlv_new->list);
811 atomic_set(&tvlv_new->refcount, 1); 847 kref_init(&tvlv_new->refcount);
812 848
813 spin_lock_bh(&bat_priv->tvlv.container_list_lock); 849 spin_lock_bh(&bat_priv->tvlv.container_list_lock);
814 tvlv_old = batadv_tvlv_container_get(bat_priv, type, version); 850 tvlv_old = batadv_tvlv_container_get(bat_priv, type, version);
@@ -826,7 +862,7 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
826 * @additional_packet_len: requested additional packet size on top of minimum 862 * @additional_packet_len: requested additional packet size on top of minimum
827 * size 863 * size
828 * 864 *
829 * Returns true of the packet buffer could be changed to the requested size, 865 * Return: true of the packet buffer could be changed to the requested size,
830 * false otherwise. 866 * false otherwise.
831 */ 867 */
832static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff, 868static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
@@ -862,7 +898,7 @@ static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
862 * The ogm packet might be enlarged or shrunk depending on the current size 898 * The ogm packet might be enlarged or shrunk depending on the current size
863 * and the size of the to-be-appended tvlv containers. 899 * and the size of the to-be-appended tvlv containers.
864 * 900 *
865 * Returns size of all appended tvlv containers in bytes. 901 * Return: size of all appended tvlv containers in bytes.
866 */ 902 */
867u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, 903u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
868 unsigned char **packet_buff, 904 unsigned char **packet_buff,
@@ -915,7 +951,7 @@ end:
915 * @tvlv_value: tvlv content 951 * @tvlv_value: tvlv content
916 * @tvlv_value_len: tvlv content length 952 * @tvlv_value_len: tvlv content length
917 * 953 *
918 * Returns success if handler was not found or the return value of the handler 954 * Return: success if handler was not found or the return value of the handler
919 * callback. 955 * callback.
920 */ 956 */
921static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv, 957static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
@@ -968,7 +1004,7 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
968 * @tvlv_value: tvlv content 1004 * @tvlv_value: tvlv content
969 * @tvlv_value_len: tvlv content length 1005 * @tvlv_value_len: tvlv content length
970 * 1006 *
971 * Returns success when processing an OGM or the return value of all called 1007 * Return: success when processing an OGM or the return value of all called
972 * handler callbacks. 1008 * handler callbacks.
973 */ 1009 */
974int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, 1010int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
@@ -1001,7 +1037,7 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
1001 src, dst, tvlv_value, 1037 src, dst, tvlv_value,
1002 tvlv_value_cont_len); 1038 tvlv_value_cont_len);
1003 if (tvlv_handler) 1039 if (tvlv_handler)
1004 batadv_tvlv_handler_free_ref(tvlv_handler); 1040 batadv_tvlv_handler_put(tvlv_handler);
1005 tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len; 1041 tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len;
1006 tvlv_value_len -= tvlv_value_cont_len; 1042 tvlv_value_len -= tvlv_value_cont_len;
1007 } 1043 }
@@ -1081,7 +1117,7 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
1081 1117
1082 tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version); 1118 tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
1083 if (tvlv_handler) { 1119 if (tvlv_handler) {
1084 batadv_tvlv_handler_free_ref(tvlv_handler); 1120 batadv_tvlv_handler_put(tvlv_handler);
1085 return; 1121 return;
1086 } 1122 }
1087 1123
@@ -1094,7 +1130,7 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
1094 tvlv_handler->type = type; 1130 tvlv_handler->type = type;
1095 tvlv_handler->version = version; 1131 tvlv_handler->version = version;
1096 tvlv_handler->flags = flags; 1132 tvlv_handler->flags = flags;
1097 atomic_set(&tvlv_handler->refcount, 1); 1133 kref_init(&tvlv_handler->refcount);
1098 INIT_HLIST_NODE(&tvlv_handler->list); 1134 INIT_HLIST_NODE(&tvlv_handler->list);
1099 1135
1100 spin_lock_bh(&bat_priv->tvlv.handler_list_lock); 1136 spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
@@ -1118,11 +1154,11 @@ void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
1118 if (!tvlv_handler) 1154 if (!tvlv_handler)
1119 return; 1155 return;
1120 1156
1121 batadv_tvlv_handler_free_ref(tvlv_handler); 1157 batadv_tvlv_handler_put(tvlv_handler);
1122 spin_lock_bh(&bat_priv->tvlv.handler_list_lock); 1158 spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
1123 hlist_del_rcu(&tvlv_handler->list); 1159 hlist_del_rcu(&tvlv_handler->list);
1124 spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); 1160 spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
1125 batadv_tvlv_handler_free_ref(tvlv_handler); 1161 batadv_tvlv_handler_put(tvlv_handler);
1126} 1162}
1127 1163
1128/** 1164/**
@@ -1182,7 +1218,7 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
1182 if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP) 1218 if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP)
1183 kfree_skb(skb); 1219 kfree_skb(skb);
1184out: 1220out:
1185 batadv_orig_node_free_ref(orig_node); 1221 batadv_orig_node_put(orig_node);
1186} 1222}
1187 1223
1188/** 1224/**
@@ -1190,8 +1226,8 @@ out:
1190 * @skb: the buffer containing the packet 1226 * @skb: the buffer containing the packet
1191 * @header_len: length of the batman header preceding the ethernet header 1227 * @header_len: length of the batman header preceding the ethernet header
1192 * 1228 *
1193 * If the packet embedded in the skb is vlan tagged this function returns the 1229 * Return: VID with the BATADV_VLAN_HAS_TAG flag when the packet embedded in the
1194 * VID with the BATADV_VLAN_HAS_TAG flag. Otherwise BATADV_NO_FLAGS is returned. 1230 * skb is vlan tagged. Otherwise BATADV_NO_FLAGS.
1195 */ 1231 */
1196unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len) 1232unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len)
1197{ 1233{
@@ -1218,7 +1254,7 @@ unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len)
1218 * @vid: the VLAN identifier for which the AP isolation attributed as to be 1254 * @vid: the VLAN identifier for which the AP isolation attributed as to be
1219 * looked up 1255 * looked up
1220 * 1256 *
1221 * Returns true if AP isolation is on for the VLAN idenfied by vid, false 1257 * Return: true if AP isolation is on for the VLAN idenfied by vid, false
1222 * otherwise 1258 * otherwise
1223 */ 1259 */
1224bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid) 1260bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid)
@@ -1232,7 +1268,7 @@ bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid)
1232 vlan = batadv_softif_vlan_get(bat_priv, vid); 1268 vlan = batadv_softif_vlan_get(bat_priv, vid);
1233 if (vlan) { 1269 if (vlan) {
1234 ap_isolation_enabled = atomic_read(&vlan->ap_isolation); 1270 ap_isolation_enabled = atomic_read(&vlan->ap_isolation);
1235 batadv_softif_vlan_free_ref(vlan); 1271 batadv_softif_vlan_put(vlan);
1236 } 1272 }
1237 1273
1238 return ap_isolation_enabled; 1274 return ap_isolation_enabled;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 9dbd9107e7e1..db4533631834 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -24,17 +24,21 @@
24#define BATADV_DRIVER_DEVICE "batman-adv" 24#define BATADV_DRIVER_DEVICE "batman-adv"
25 25
26#ifndef BATADV_SOURCE_VERSION 26#ifndef BATADV_SOURCE_VERSION
27#define BATADV_SOURCE_VERSION "2016.0" 27#define BATADV_SOURCE_VERSION "2016.1"
28#endif 28#endif
29 29
30/* B.A.T.M.A.N. parameters */ 30/* B.A.T.M.A.N. parameters */
31 31
32#define BATADV_TQ_MAX_VALUE 255 32#define BATADV_TQ_MAX_VALUE 255
33#define BATADV_THROUGHPUT_MAX_VALUE 0xFFFFFFFF
33#define BATADV_JITTER 20 34#define BATADV_JITTER 20
34 35
35/* Time To Live of broadcast messages */ 36/* Time To Live of broadcast messages */
36#define BATADV_TTL 50 37#define BATADV_TTL 50
37 38
39/* maximum sequence number age of broadcast messages */
40#define BATADV_BCAST_MAX_AGE 64
41
38/* purge originators after time in seconds if no valid packet comes in 42/* purge originators after time in seconds if no valid packet comes in
39 * -> TODO: check influence on BATADV_TQ_LOCAL_WINDOW_SIZE 43 * -> TODO: check influence on BATADV_TQ_LOCAL_WINDOW_SIZE
40 */ 44 */
@@ -57,6 +61,15 @@
57#define BATADV_TQ_LOCAL_BIDRECT_RECV_MINIMUM 1 61#define BATADV_TQ_LOCAL_BIDRECT_RECV_MINIMUM 1
58#define BATADV_TQ_TOTAL_BIDRECT_LIMIT 1 62#define BATADV_TQ_TOTAL_BIDRECT_LIMIT 1
59 63
64/* B.A.T.M.A.N. V */
65#define BATADV_THROUGHPUT_DEFAULT_VALUE 10 /* 1 Mbps */
66#define BATADV_ELP_PROBES_PER_NODE 2
67#define BATADV_ELP_MIN_PROBE_SIZE 200 /* bytes */
68#define BATADV_ELP_PROBE_MAX_TX_DIFF 100 /* milliseconds */
69#define BATADV_ELP_MAX_AGE 64
70#define BATADV_OGM_MAX_ORIGDIFF 5
71#define BATADV_OGM_MAX_AGE 64
72
60/* number of OGMs sent with the last tt diff */ 73/* number of OGMs sent with the last tt diff */
61#define BATADV_TT_OGM_APPEND_MAX 3 74#define BATADV_TT_OGM_APPEND_MAX 3
62 75
@@ -97,11 +110,6 @@
97 */ 110 */
98#define BATADV_TQ_SIMILARITY_THRESHOLD 50 111#define BATADV_TQ_SIMILARITY_THRESHOLD 50
99 112
100/* how much worse secondary interfaces may be to be considered as bonding
101 * candidates
102 */
103#define BATADV_BONDING_TQ_THRESHOLD 50
104
105/* should not be bigger than 512 bytes or change the size of 113/* should not be bigger than 512 bytes or change the size of
106 * forw_packet->direct_link_flags 114 * forw_packet->direct_link_flags
107 */ 115 */
@@ -273,9 +281,14 @@ static inline void _batadv_dbg(int type __always_unused,
273 pr_err("%s: " fmt, _netdev->name, ## arg); \ 281 pr_err("%s: " fmt, _netdev->name, ## arg); \
274 } while (0) 282 } while (0)
275 283
276/* returns 1 if they are the same ethernet addr 284/**
285 * batadv_compare_eth - Compare two not u16 aligned Ethernet addresses
286 * @data1: Pointer to a six-byte array containing the Ethernet address
287 * @data2: Pointer other six-byte array containing the Ethernet address
277 * 288 *
278 * note: can't use ether_addr_equal() as it requires aligned memory 289 * note: can't use ether_addr_equal() as it requires aligned memory
290 *
291 * Return: 1 if they are the same ethernet addr
279 */ 292 */
280static inline bool batadv_compare_eth(const void *data1, const void *data2) 293static inline bool batadv_compare_eth(const void *data1, const void *data2)
281{ 294{
@@ -287,7 +300,7 @@ static inline bool batadv_compare_eth(const void *data1, const void *data2)
287 * @timestamp: base value to compare with (in jiffies) 300 * @timestamp: base value to compare with (in jiffies)
288 * @timeout: added to base value before comparing (in milliseconds) 301 * @timeout: added to base value before comparing (in milliseconds)
289 * 302 *
290 * Returns true if current time is after timestamp + timeout 303 * Return: true if current time is after timestamp + timeout
291 */ 304 */
292static inline bool batadv_has_timed_out(unsigned long timestamp, 305static inline bool batadv_has_timed_out(unsigned long timestamp,
293 unsigned int timeout) 306 unsigned int timeout)
@@ -326,7 +339,13 @@ static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx,
326 339
327#define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1) 340#define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1)
328 341
329/* Sum and return the cpu-local counters for index 'idx' */ 342/**
343 * batadv_sum_counter - Sum the cpu-local counters for index 'idx'
344 * @bat_priv: the bat priv with all the soft interface information
345 * @idx: index of counter to sum up
346 *
347 * Return: sum of all cpu-local counters
348 */
330static inline u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx) 349static inline u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx)
331{ 350{
332 u64 *counters, sum = 0; 351 u64 *counters, sum = 0;
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 75fa5013af72..8caa2c72efa3 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2014-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2014-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Linus Lüssing 3 * Linus Lüssing
4 * 4 *
@@ -30,6 +30,7 @@
30#include <linux/in.h> 30#include <linux/in.h>
31#include <linux/ip.h> 31#include <linux/ip.h>
32#include <linux/ipv6.h> 32#include <linux/ipv6.h>
33#include <linux/kref.h>
33#include <linux/list.h> 34#include <linux/list.h>
34#include <linux/lockdep.h> 35#include <linux/lockdep.h>
35#include <linux/netdevice.h> 36#include <linux/netdevice.h>
@@ -55,7 +56,7 @@
55 * Collect multicast addresses of the local multicast listeners 56 * Collect multicast addresses of the local multicast listeners
56 * on the given soft interface, dev, in the given mcast_list. 57 * on the given soft interface, dev, in the given mcast_list.
57 * 58 *
58 * Returns -ENOMEM on memory allocation error or the number of 59 * Return: -ENOMEM on memory allocation error or the number of
59 * items added to the mcast_list otherwise. 60 * items added to the mcast_list otherwise.
60 */ 61 */
61static int batadv_mcast_mla_softif_get(struct net_device *dev, 62static int batadv_mcast_mla_softif_get(struct net_device *dev,
@@ -87,7 +88,7 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev,
87 * @mcast_addr: the multicast address to check 88 * @mcast_addr: the multicast address to check
88 * @mcast_list: the list with multicast addresses to search in 89 * @mcast_list: the list with multicast addresses to search in
89 * 90 *
90 * Returns true if the given address is already in the given list. 91 * Return: true if the given address is already in the given list.
91 * Otherwise returns false. 92 * Otherwise returns false.
92 */ 93 */
93static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr, 94static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr,
@@ -195,8 +196,9 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv,
195 * batadv_mcast_has_bridge - check whether the soft-iface is bridged 196 * batadv_mcast_has_bridge - check whether the soft-iface is bridged
196 * @bat_priv: the bat priv with all the soft interface information 197 * @bat_priv: the bat priv with all the soft interface information
197 * 198 *
198 * Checks whether there is a bridge on top of our soft interface. Returns 199 * Checks whether there is a bridge on top of our soft interface.
199 * true if so, false otherwise. 200 *
201 * Return: true if there is a bridge, false otherwise.
200 */ 202 */
201static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv) 203static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv)
202{ 204{
@@ -218,7 +220,7 @@ static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv)
218 * Updates the own multicast tvlv with our current multicast related settings, 220 * Updates the own multicast tvlv with our current multicast related settings,
219 * capabilities and inabilities. 221 * capabilities and inabilities.
220 * 222 *
221 * Returns true if the tvlv container is registered afterwards. Otherwise 223 * Return: true if the tvlv container is registered afterwards. Otherwise
222 * returns false. 224 * returns false.
223 */ 225 */
224static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv) 226static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv)
@@ -289,8 +291,8 @@ out:
289 * Checks whether the given IPv4 packet has the potential to be forwarded with a 291 * Checks whether the given IPv4 packet has the potential to be forwarded with a
290 * mode more optimal than classic flooding. 292 * mode more optimal than classic flooding.
291 * 293 *
292 * If so then returns 0. Otherwise -EINVAL is returned or -ENOMEM in case of 294 * Return: If so then 0. Otherwise -EINVAL or -ENOMEM in case of memory
293 * memory allocation failure. 295 * allocation failure.
294 */ 296 */
295static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv, 297static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
296 struct sk_buff *skb, 298 struct sk_buff *skb,
@@ -327,8 +329,7 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
327 * Checks whether the given IPv6 packet has the potential to be forwarded with a 329 * Checks whether the given IPv6 packet has the potential to be forwarded with a
328 * mode more optimal than classic flooding. 330 * mode more optimal than classic flooding.
329 * 331 *
330 * If so then returns 0. Otherwise -EINVAL is returned or -ENOMEM if we are out 332 * Return: If so then 0. Otherwise -EINVAL is or -ENOMEM if we are out of memory
331 * of memory.
332 */ 333 */
333static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv, 334static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
334 struct sk_buff *skb, 335 struct sk_buff *skb,
@@ -366,8 +367,7 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
366 * Checks whether the given multicast ethernet frame has the potential to be 367 * Checks whether the given multicast ethernet frame has the potential to be
367 * forwarded with a mode more optimal than classic flooding. 368 * forwarded with a mode more optimal than classic flooding.
368 * 369 *
369 * If so then returns 0. Otherwise -EINVAL is returned or -ENOMEM if we are out 370 * Return: If so then 0. Otherwise -EINVAL is or -ENOMEM if we are out of memory
370 * of memory.
371 */ 371 */
372static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv, 372static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv,
373 struct sk_buff *skb, 373 struct sk_buff *skb,
@@ -398,7 +398,7 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv,
398 * @bat_priv: the bat priv with all the soft interface information 398 * @bat_priv: the bat priv with all the soft interface information
399 * @ethhdr: ethernet header of a packet 399 * @ethhdr: ethernet header of a packet
400 * 400 *
401 * Returns the number of nodes which want all IPv4 multicast traffic if the 401 * Return: the number of nodes which want all IPv4 multicast traffic if the
402 * given ethhdr is from an IPv4 packet or the number of nodes which want all 402 * given ethhdr is from an IPv4 packet or the number of nodes which want all
403 * IPv6 traffic if it matches an IPv6 packet. 403 * IPv6 traffic if it matches an IPv6 packet.
404 */ 404 */
@@ -421,7 +421,7 @@ static int batadv_mcast_forw_want_all_ip_count(struct batadv_priv *bat_priv,
421 * @bat_priv: the bat priv with all the soft interface information 421 * @bat_priv: the bat priv with all the soft interface information
422 * @ethhdr: the ether header containing the multicast destination 422 * @ethhdr: the ether header containing the multicast destination
423 * 423 *
424 * Returns an orig_node matching the multicast address provided by ethhdr 424 * Return: an orig_node matching the multicast address provided by ethhdr
425 * via a translation table lookup. This increases the returned nodes refcount. 425 * via a translation table lookup. This increases the returned nodes refcount.
426 */ 426 */
427static struct batadv_orig_node * 427static struct batadv_orig_node *
@@ -436,7 +436,7 @@ batadv_mcast_forw_tt_node_get(struct batadv_priv *bat_priv,
436 * batadv_mcast_want_forw_ipv4_node_get - get a node with an ipv4 flag 436 * batadv_mcast_want_forw_ipv4_node_get - get a node with an ipv4 flag
437 * @bat_priv: the bat priv with all the soft interface information 437 * @bat_priv: the bat priv with all the soft interface information
438 * 438 *
439 * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 flag set and 439 * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 flag set and
440 * increases its refcount. 440 * increases its refcount.
441 */ 441 */
442static struct batadv_orig_node * 442static struct batadv_orig_node *
@@ -448,7 +448,7 @@ batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv)
448 hlist_for_each_entry_rcu(tmp_orig_node, 448 hlist_for_each_entry_rcu(tmp_orig_node,
449 &bat_priv->mcast.want_all_ipv4_list, 449 &bat_priv->mcast.want_all_ipv4_list,
450 mcast_want_all_ipv4_node) { 450 mcast_want_all_ipv4_node) {
451 if (!atomic_inc_not_zero(&tmp_orig_node->refcount)) 451 if (!kref_get_unless_zero(&tmp_orig_node->refcount))
452 continue; 452 continue;
453 453
454 orig_node = tmp_orig_node; 454 orig_node = tmp_orig_node;
@@ -463,7 +463,7 @@ batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv)
463 * batadv_mcast_want_forw_ipv6_node_get - get a node with an ipv6 flag 463 * batadv_mcast_want_forw_ipv6_node_get - get a node with an ipv6 flag
464 * @bat_priv: the bat priv with all the soft interface information 464 * @bat_priv: the bat priv with all the soft interface information
465 * 465 *
466 * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_IPV6 flag set 466 * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV6 flag set
467 * and increases its refcount. 467 * and increases its refcount.
468 */ 468 */
469static struct batadv_orig_node * 469static struct batadv_orig_node *
@@ -475,7 +475,7 @@ batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv)
475 hlist_for_each_entry_rcu(tmp_orig_node, 475 hlist_for_each_entry_rcu(tmp_orig_node,
476 &bat_priv->mcast.want_all_ipv6_list, 476 &bat_priv->mcast.want_all_ipv6_list,
477 mcast_want_all_ipv6_node) { 477 mcast_want_all_ipv6_node) {
478 if (!atomic_inc_not_zero(&tmp_orig_node->refcount)) 478 if (!kref_get_unless_zero(&tmp_orig_node->refcount))
479 continue; 479 continue;
480 480
481 orig_node = tmp_orig_node; 481 orig_node = tmp_orig_node;
@@ -491,7 +491,7 @@ batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv)
491 * @bat_priv: the bat priv with all the soft interface information 491 * @bat_priv: the bat priv with all the soft interface information
492 * @ethhdr: an ethernet header to determine the protocol family from 492 * @ethhdr: an ethernet header to determine the protocol family from
493 * 493 *
494 * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 or 494 * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 or
495 * BATADV_MCAST_WANT_ALL_IPV6 flag, depending on the provided ethhdr, set and 495 * BATADV_MCAST_WANT_ALL_IPV6 flag, depending on the provided ethhdr, set and
496 * increases its refcount. 496 * increases its refcount.
497 */ 497 */
@@ -514,7 +514,7 @@ batadv_mcast_forw_ip_node_get(struct batadv_priv *bat_priv,
514 * batadv_mcast_want_forw_unsnoop_node_get - get a node with an unsnoopable flag 514 * batadv_mcast_want_forw_unsnoop_node_get - get a node with an unsnoopable flag
515 * @bat_priv: the bat priv with all the soft interface information 515 * @bat_priv: the bat priv with all the soft interface information
516 * 516 *
517 * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag 517 * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag
518 * set and increases its refcount. 518 * set and increases its refcount.
519 */ 519 */
520static struct batadv_orig_node * 520static struct batadv_orig_node *
@@ -526,7 +526,7 @@ batadv_mcast_forw_unsnoop_node_get(struct batadv_priv *bat_priv)
526 hlist_for_each_entry_rcu(tmp_orig_node, 526 hlist_for_each_entry_rcu(tmp_orig_node,
527 &bat_priv->mcast.want_all_unsnoopables_list, 527 &bat_priv->mcast.want_all_unsnoopables_list,
528 mcast_want_all_unsnoopables_node) { 528 mcast_want_all_unsnoopables_node) {
529 if (!atomic_inc_not_zero(&tmp_orig_node->refcount)) 529 if (!kref_get_unless_zero(&tmp_orig_node->refcount))
530 continue; 530 continue;
531 531
532 orig_node = tmp_orig_node; 532 orig_node = tmp_orig_node;
@@ -543,7 +543,7 @@ batadv_mcast_forw_unsnoop_node_get(struct batadv_priv *bat_priv)
543 * @skb: The multicast packet to check 543 * @skb: The multicast packet to check
544 * @orig: an originator to be set to forward the skb to 544 * @orig: an originator to be set to forward the skb to
545 * 545 *
546 * Returns the forwarding mode as enum batadv_forw_mode and in case of 546 * Return: the forwarding mode as enum batadv_forw_mode and in case of
547 * BATADV_FORW_SINGLE set the orig to the single originator the skb 547 * BATADV_FORW_SINGLE set the orig to the single originator the skb
548 * should be forwarded to. 548 * should be forwarded to.
549 */ 549 */
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 8f3cb04b9f13..80bceec55592 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2014-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2014-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Linus Lüssing 3 * Linus Lüssing
4 * 4 *
@@ -23,7 +23,7 @@
23struct sk_buff; 23struct sk_buff;
24 24
25/** 25/**
26 * batadv_forw_mode - the way a packet should be forwarded as 26 * enum batadv_forw_mode - the way a packet should be forwarded as
27 * @BATADV_FORW_ALL: forward the packet to all nodes (currently via classic 27 * @BATADV_FORW_ALL: forward the packet to all nodes (currently via classic
28 * flooding) 28 * flooding)
29 * @BATADV_FORW_SINGLE: forward the packet to a single node (currently via the 29 * @BATADV_FORW_SINGLE: forward the packet to a single node (currently via the
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index cc63b44f0d2e..b41719b6487a 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2012-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2012-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Martin Hundebøll, Jeppe Ledet-Pedersen 3 * Martin Hundebøll, Jeppe Ledet-Pedersen
4 * 4 *
@@ -32,6 +32,7 @@
32#include <linux/jhash.h> 32#include <linux/jhash.h>
33#include <linux/jiffies.h> 33#include <linux/jiffies.h>
34#include <linux/kernel.h> 34#include <linux/kernel.h>
35#include <linux/kref.h>
35#include <linux/list.h> 36#include <linux/list.h>
36#include <linux/lockdep.h> 37#include <linux/lockdep.h>
37#include <linux/netdevice.h> 38#include <linux/netdevice.h>
@@ -64,6 +65,8 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
64 65
65/** 66/**
66 * batadv_nc_init - one-time initialization for network coding 67 * batadv_nc_init - one-time initialization for network coding
68 *
69 * Return: 0 on success or negative error number in case of failure
67 */ 70 */
68int __init batadv_nc_init(void) 71int __init batadv_nc_init(void)
69{ 72{
@@ -142,6 +145,8 @@ static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
142/** 145/**
143 * batadv_nc_mesh_init - initialise coding hash table and start house keeping 146 * batadv_nc_mesh_init - initialise coding hash table and start house keeping
144 * @bat_priv: the bat priv with all the soft interface information 147 * @bat_priv: the bat priv with all the soft interface information
148 *
149 * Return: 0 on success or negative error number in case of failure
145 */ 150 */
146int batadv_nc_mesh_init(struct batadv_priv *bat_priv) 151int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
147{ 152{
@@ -205,34 +210,50 @@ void batadv_nc_init_orig(struct batadv_orig_node *orig_node)
205/** 210/**
206 * batadv_nc_node_release - release nc_node from lists and queue for free after 211 * batadv_nc_node_release - release nc_node from lists and queue for free after
207 * rcu grace period 212 * rcu grace period
208 * @nc_node: the nc node to free 213 * @ref: kref pointer of the nc_node
209 */ 214 */
210static void batadv_nc_node_release(struct batadv_nc_node *nc_node) 215static void batadv_nc_node_release(struct kref *ref)
211{ 216{
212 batadv_orig_node_free_ref(nc_node->orig_node); 217 struct batadv_nc_node *nc_node;
218
219 nc_node = container_of(ref, struct batadv_nc_node, refcount);
220
221 batadv_orig_node_put(nc_node->orig_node);
213 kfree_rcu(nc_node, rcu); 222 kfree_rcu(nc_node, rcu);
214} 223}
215 224
216/** 225/**
217 * batadv_nc_node_free_ref - decrement the nc node refcounter and possibly 226 * batadv_nc_node_put - decrement the nc_node refcounter and possibly
218 * release it 227 * release it
219 * @nc_node: the nc node to free 228 * @nc_node: nc_node to be free'd
220 */ 229 */
221static void batadv_nc_node_free_ref(struct batadv_nc_node *nc_node) 230static void batadv_nc_node_put(struct batadv_nc_node *nc_node)
222{ 231{
223 if (atomic_dec_and_test(&nc_node->refcount)) 232 kref_put(&nc_node->refcount, batadv_nc_node_release);
224 batadv_nc_node_release(nc_node);
225} 233}
226 234
227/** 235/**
228 * batadv_nc_path_free_ref - decrements the nc path refcounter and possibly 236 * batadv_nc_path_release - release nc_path from lists and queue for free after
229 * frees it 237 * rcu grace period
230 * @nc_path: the nc node to free 238 * @ref: kref pointer of the nc_path
231 */ 239 */
232static void batadv_nc_path_free_ref(struct batadv_nc_path *nc_path) 240static void batadv_nc_path_release(struct kref *ref)
233{ 241{
234 if (atomic_dec_and_test(&nc_path->refcount)) 242 struct batadv_nc_path *nc_path;
235 kfree_rcu(nc_path, rcu); 243
244 nc_path = container_of(ref, struct batadv_nc_path, refcount);
245
246 kfree_rcu(nc_path, rcu);
247}
248
249/**
250 * batadv_nc_path_put - decrement the nc_path refcounter and possibly
251 * release it
252 * @nc_path: nc_path to be free'd
253 */
254static void batadv_nc_path_put(struct batadv_nc_path *nc_path)
255{
256 kref_put(&nc_path->refcount, batadv_nc_path_release);
236} 257}
237 258
238/** 259/**
@@ -242,7 +263,7 @@ static void batadv_nc_path_free_ref(struct batadv_nc_path *nc_path)
242static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet) 263static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet)
243{ 264{
244 kfree_skb(nc_packet->skb); 265 kfree_skb(nc_packet->skb);
245 batadv_nc_path_free_ref(nc_packet->nc_path); 266 batadv_nc_path_put(nc_packet->nc_path);
246 kfree(nc_packet); 267 kfree(nc_packet);
247} 268}
248 269
@@ -251,7 +272,7 @@ static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet)
251 * @bat_priv: the bat priv with all the soft interface information 272 * @bat_priv: the bat priv with all the soft interface information
252 * @nc_node: the nc node to check 273 * @nc_node: the nc node to check
253 * 274 *
254 * Returns true if the entry has to be purged now, false otherwise 275 * Return: true if the entry has to be purged now, false otherwise
255 */ 276 */
256static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv, 277static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv,
257 struct batadv_nc_node *nc_node) 278 struct batadv_nc_node *nc_node)
@@ -267,7 +288,7 @@ static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv,
267 * @bat_priv: the bat priv with all the soft interface information 288 * @bat_priv: the bat priv with all the soft interface information
268 * @nc_path: the nc path to check 289 * @nc_path: the nc path to check
269 * 290 *
270 * Returns true if the entry has to be purged now, false otherwise 291 * Return: true if the entry has to be purged now, false otherwise
271 */ 292 */
272static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv, 293static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv,
273 struct batadv_nc_path *nc_path) 294 struct batadv_nc_path *nc_path)
@@ -287,7 +308,7 @@ static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv,
287 * @bat_priv: the bat priv with all the soft interface information 308 * @bat_priv: the bat priv with all the soft interface information
288 * @nc_path: the nc path to check 309 * @nc_path: the nc path to check
289 * 310 *
290 * Returns true if the entry has to be purged now, false otherwise 311 * Return: true if the entry has to be purged now, false otherwise
291 */ 312 */
292static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv, 313static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv,
293 struct batadv_nc_path *nc_path) 314 struct batadv_nc_path *nc_path)
@@ -335,7 +356,7 @@ batadv_nc_purge_orig_nc_nodes(struct batadv_priv *bat_priv,
335 "Removing nc_node %pM -> %pM\n", 356 "Removing nc_node %pM -> %pM\n",
336 nc_node->addr, nc_node->orig_node->orig); 357 nc_node->addr, nc_node->orig_node->orig);
337 list_del_rcu(&nc_node->list); 358 list_del_rcu(&nc_node->list);
338 batadv_nc_node_free_ref(nc_node); 359 batadv_nc_node_put(nc_node);
339 } 360 }
340 spin_unlock_bh(lock); 361 spin_unlock_bh(lock);
341} 362}
@@ -446,7 +467,7 @@ static void batadv_nc_purge_paths(struct batadv_priv *bat_priv,
446 "Remove nc_path %pM -> %pM\n", 467 "Remove nc_path %pM -> %pM\n",
447 nc_path->prev_hop, nc_path->next_hop); 468 nc_path->prev_hop, nc_path->next_hop);
448 hlist_del_rcu(&nc_path->hash_entry); 469 hlist_del_rcu(&nc_path->hash_entry);
449 batadv_nc_path_free_ref(nc_path); 470 batadv_nc_path_put(nc_path);
450 } 471 }
451 spin_unlock_bh(lock); 472 spin_unlock_bh(lock);
452 } 473 }
@@ -470,7 +491,7 @@ static void batadv_nc_hash_key_gen(struct batadv_nc_path *key, const char *src,
470 * @data: data to hash 491 * @data: data to hash
471 * @size: size of the hash table 492 * @size: size of the hash table
472 * 493 *
473 * Returns the selected index in the hash table for the given data. 494 * Return: the selected index in the hash table for the given data.
474 */ 495 */
475static u32 batadv_nc_hash_choose(const void *data, u32 size) 496static u32 batadv_nc_hash_choose(const void *data, u32 size)
476{ 497{
@@ -489,7 +510,7 @@ static u32 batadv_nc_hash_choose(const void *data, u32 size)
489 * @node: node in the local table 510 * @node: node in the local table
490 * @data2: second object to compare the node to 511 * @data2: second object to compare the node to
491 * 512 *
492 * Returns 1 if the two entry are the same, 0 otherwise 513 * Return: 1 if the two entry are the same, 0 otherwise
493 */ 514 */
494static int batadv_nc_hash_compare(const struct hlist_node *node, 515static int batadv_nc_hash_compare(const struct hlist_node *node,
495 const void *data2) 516 const void *data2)
@@ -516,7 +537,7 @@ static int batadv_nc_hash_compare(const struct hlist_node *node,
516 * @hash: hash table containing the nc path 537 * @hash: hash table containing the nc path
517 * @data: search key 538 * @data: search key
518 * 539 *
519 * Returns the nc_path if found, NULL otherwise. 540 * Return: the nc_path if found, NULL otherwise.
520 */ 541 */
521static struct batadv_nc_path * 542static struct batadv_nc_path *
522batadv_nc_hash_find(struct batadv_hashtable *hash, 543batadv_nc_hash_find(struct batadv_hashtable *hash,
@@ -537,7 +558,7 @@ batadv_nc_hash_find(struct batadv_hashtable *hash,
537 if (!batadv_nc_hash_compare(&nc_path->hash_entry, data)) 558 if (!batadv_nc_hash_compare(&nc_path->hash_entry, data))
538 continue; 559 continue;
539 560
540 if (!atomic_inc_not_zero(&nc_path->refcount)) 561 if (!kref_get_unless_zero(&nc_path->refcount))
541 continue; 562 continue;
542 563
543 nc_path_tmp = nc_path; 564 nc_path_tmp = nc_path;
@@ -554,9 +575,7 @@ batadv_nc_hash_find(struct batadv_hashtable *hash,
554 */ 575 */
555static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet) 576static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet)
556{ 577{
557 batadv_send_skb_packet(nc_packet->skb, 578 batadv_send_unicast_skb(nc_packet->skb, nc_packet->neigh_node);
558 nc_packet->neigh_node->if_incoming,
559 nc_packet->nc_path->next_hop);
560 nc_packet->skb = NULL; 579 nc_packet->skb = NULL;
561 batadv_nc_packet_free(nc_packet); 580 batadv_nc_packet_free(nc_packet);
562} 581}
@@ -571,7 +590,7 @@ static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet)
571 * timeout. If so, the packet is no longer kept and the entry deleted from the 590 * timeout. If so, the packet is no longer kept and the entry deleted from the
572 * queue. Has to be called with the appropriate locks. 591 * queue. Has to be called with the appropriate locks.
573 * 592 *
574 * Returns false as soon as the entry in the fifo queue has not been timed out 593 * Return: false as soon as the entry in the fifo queue has not been timed out
575 * yet and true otherwise. 594 * yet and true otherwise.
576 */ 595 */
577static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv, 596static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv,
@@ -610,7 +629,7 @@ out:
610 * packet is no longer delayed, immediately sent and the entry deleted from the 629 * packet is no longer delayed, immediately sent and the entry deleted from the
611 * queue. Has to be called with the appropriate locks. 630 * queue. Has to be called with the appropriate locks.
612 * 631 *
613 * Returns false as soon as the entry in the fifo queue has not been timed out 632 * Return: false as soon as the entry in the fifo queue has not been timed out
614 * yet and true otherwise. 633 * yet and true otherwise.
615 */ 634 */
616static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv, 635static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv,
@@ -731,7 +750,7 @@ static void batadv_nc_worker(struct work_struct *work)
731 * @orig_node: neighboring orig node which may be used as nc candidate 750 * @orig_node: neighboring orig node which may be used as nc candidate
732 * @ogm_packet: incoming ogm packet also used for the checks 751 * @ogm_packet: incoming ogm packet also used for the checks
733 * 752 *
734 * Returns true if: 753 * Return: true if:
735 * 1) The OGM must have the most recent sequence number. 754 * 1) The OGM must have the most recent sequence number.
736 * 2) The TTL must be decremented by one and only one. 755 * 2) The TTL must be decremented by one and only one.
737 * 3) The OGM must be received from the first hop from orig_node. 756 * 3) The OGM must be received from the first hop from orig_node.
@@ -751,7 +770,7 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv,
751 770
752 last_ttl = orig_ifinfo->last_ttl; 771 last_ttl = orig_ifinfo->last_ttl;
753 last_real_seqno = orig_ifinfo->last_real_seqno; 772 last_real_seqno = orig_ifinfo->last_real_seqno;
754 batadv_orig_ifinfo_free_ref(orig_ifinfo); 773 batadv_orig_ifinfo_put(orig_ifinfo);
755 774
756 if (last_real_seqno != ntohl(ogm_packet->seqno)) 775 if (last_real_seqno != ntohl(ogm_packet->seqno))
757 return false; 776 return false;
@@ -772,7 +791,7 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv,
772 * (can be equal to orig_node) 791 * (can be equal to orig_node)
773 * @in_coding: traverse incoming or outgoing network coding list 792 * @in_coding: traverse incoming or outgoing network coding list
774 * 793 *
775 * Returns the nc_node if found, NULL otherwise. 794 * Return: the nc_node if found, NULL otherwise.
776 */ 795 */
777static struct batadv_nc_node 796static struct batadv_nc_node
778*batadv_nc_find_nc_node(struct batadv_orig_node *orig_node, 797*batadv_nc_find_nc_node(struct batadv_orig_node *orig_node,
@@ -793,7 +812,7 @@ static struct batadv_nc_node
793 if (!batadv_compare_eth(nc_node->addr, orig_node->orig)) 812 if (!batadv_compare_eth(nc_node->addr, orig_node->orig))
794 continue; 813 continue;
795 814
796 if (!atomic_inc_not_zero(&nc_node->refcount)) 815 if (!kref_get_unless_zero(&nc_node->refcount))
797 continue; 816 continue;
798 817
799 /* Found a match */ 818 /* Found a match */
@@ -814,7 +833,7 @@ static struct batadv_nc_node
814 * (can be equal to orig_node) 833 * (can be equal to orig_node)
815 * @in_coding: traverse incoming or outgoing network coding list 834 * @in_coding: traverse incoming or outgoing network coding list
816 * 835 *
817 * Returns the nc_node if found or created, NULL in case of an error. 836 * Return: the nc_node if found or created, NULL in case of an error.
818 */ 837 */
819static struct batadv_nc_node 838static struct batadv_nc_node
820*batadv_nc_get_nc_node(struct batadv_priv *bat_priv, 839*batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
@@ -837,14 +856,15 @@ static struct batadv_nc_node
837 if (!nc_node) 856 if (!nc_node)
838 return NULL; 857 return NULL;
839 858
840 if (!atomic_inc_not_zero(&orig_neigh_node->refcount)) 859 if (!kref_get_unless_zero(&orig_neigh_node->refcount))
841 goto free; 860 goto free;
842 861
843 /* Initialize nc_node */ 862 /* Initialize nc_node */
844 INIT_LIST_HEAD(&nc_node->list); 863 INIT_LIST_HEAD(&nc_node->list);
845 ether_addr_copy(nc_node->addr, orig_node->orig); 864 ether_addr_copy(nc_node->addr, orig_node->orig);
846 nc_node->orig_node = orig_neigh_node; 865 nc_node->orig_node = orig_neigh_node;
847 atomic_set(&nc_node->refcount, 2); 866 kref_init(&nc_node->refcount);
867 kref_get(&nc_node->refcount);
848 868
849 /* Select ingoing or outgoing coding node */ 869 /* Select ingoing or outgoing coding node */
850 if (in_coding) { 870 if (in_coding) {
@@ -920,9 +940,9 @@ void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
920 940
921out: 941out:
922 if (in_nc_node) 942 if (in_nc_node)
923 batadv_nc_node_free_ref(in_nc_node); 943 batadv_nc_node_put(in_nc_node);
924 if (out_nc_node) 944 if (out_nc_node)
925 batadv_nc_node_free_ref(out_nc_node); 945 batadv_nc_node_put(out_nc_node);
926} 946}
927 947
928/** 948/**
@@ -932,7 +952,7 @@ out:
932 * @src: ethernet source address - first half of the nc path search key 952 * @src: ethernet source address - first half of the nc path search key
933 * @dst: ethernet destination address - second half of the nc path search key 953 * @dst: ethernet destination address - second half of the nc path search key
934 * 954 *
935 * Returns pointer to nc_path if the path was found or created, returns NULL 955 * Return: pointer to nc_path if the path was found or created, returns NULL
936 * on error. 956 * on error.
937 */ 957 */
938static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv, 958static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
@@ -963,7 +983,8 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
963 /* Initialize nc_path */ 983 /* Initialize nc_path */
964 INIT_LIST_HEAD(&nc_path->packet_list); 984 INIT_LIST_HEAD(&nc_path->packet_list);
965 spin_lock_init(&nc_path->packet_list_lock); 985 spin_lock_init(&nc_path->packet_list_lock);
966 atomic_set(&nc_path->refcount, 2); 986 kref_init(&nc_path->refcount);
987 kref_get(&nc_path->refcount);
967 nc_path->last_valid = jiffies; 988 nc_path->last_valid = jiffies;
968 ether_addr_copy(nc_path->next_hop, dst); 989 ether_addr_copy(nc_path->next_hop, dst);
969 ether_addr_copy(nc_path->prev_hop, src); 990 ether_addr_copy(nc_path->prev_hop, src);
@@ -989,6 +1010,8 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
989 * batadv_nc_random_weight_tq - scale the receivers TQ-value to avoid unfair 1010 * batadv_nc_random_weight_tq - scale the receivers TQ-value to avoid unfair
990 * selection of a receiver with slightly lower TQ than the other 1011 * selection of a receiver with slightly lower TQ than the other
991 * @tq: to be weighted tq value 1012 * @tq: to be weighted tq value
1013 *
1014 * Return: scaled tq value
992 */ 1015 */
993static u8 batadv_nc_random_weight_tq(u8 tq) 1016static u8 batadv_nc_random_weight_tq(u8 tq)
994{ 1017{
@@ -1029,7 +1052,7 @@ static void batadv_nc_memxor(char *dst, const char *src, unsigned int len)
1029 * @nc_packet: structure containing the packet to the skb can be coded with 1052 * @nc_packet: structure containing the packet to the skb can be coded with
1030 * @neigh_node: next hop to forward packet to 1053 * @neigh_node: next hop to forward packet to
1031 * 1054 *
1032 * Returns true if both packets are consumed, false otherwise. 1055 * Return: true if both packets are consumed, false otherwise.
1033 */ 1056 */
1034static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, 1057static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
1035 struct sk_buff *skb, 1058 struct sk_buff *skb,
@@ -1042,11 +1065,11 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
1042 struct batadv_unicast_packet *packet1; 1065 struct batadv_unicast_packet *packet1;
1043 struct batadv_unicast_packet *packet2; 1066 struct batadv_unicast_packet *packet2;
1044 struct batadv_coded_packet *coded_packet; 1067 struct batadv_coded_packet *coded_packet;
1045 struct batadv_neigh_node *neigh_tmp, *router_neigh; 1068 struct batadv_neigh_node *neigh_tmp, *router_neigh, *first_dest;
1046 struct batadv_neigh_node *router_coding = NULL; 1069 struct batadv_neigh_node *router_coding = NULL, *second_dest;
1047 struct batadv_neigh_ifinfo *router_neigh_ifinfo = NULL; 1070 struct batadv_neigh_ifinfo *router_neigh_ifinfo = NULL;
1048 struct batadv_neigh_ifinfo *router_coding_ifinfo = NULL; 1071 struct batadv_neigh_ifinfo *router_coding_ifinfo = NULL;
1049 u8 *first_source, *first_dest, *second_source, *second_dest; 1072 u8 *first_source, *second_source;
1050 __be32 packet_id1, packet_id2; 1073 __be32 packet_id1, packet_id2;
1051 size_t count; 1074 size_t count;
1052 bool res = false; 1075 bool res = false;
@@ -1089,9 +1112,9 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
1089 */ 1112 */
1090 if (tq_weighted_neigh >= tq_weighted_coding) { 1113 if (tq_weighted_neigh >= tq_weighted_coding) {
1091 /* Destination from nc_packet is selected for MAC-header */ 1114 /* Destination from nc_packet is selected for MAC-header */
1092 first_dest = nc_packet->nc_path->next_hop; 1115 first_dest = nc_packet->neigh_node;
1093 first_source = nc_packet->nc_path->prev_hop; 1116 first_source = nc_packet->nc_path->prev_hop;
1094 second_dest = neigh_node->addr; 1117 second_dest = neigh_node;
1095 second_source = ethhdr->h_source; 1118 second_source = ethhdr->h_source;
1096 packet1 = (struct batadv_unicast_packet *)nc_packet->skb->data; 1119 packet1 = (struct batadv_unicast_packet *)nc_packet->skb->data;
1097 packet2 = (struct batadv_unicast_packet *)skb->data; 1120 packet2 = (struct batadv_unicast_packet *)skb->data;
@@ -1100,9 +1123,9 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
1100 skb->data + sizeof(*packet2)); 1123 skb->data + sizeof(*packet2));
1101 } else { 1124 } else {
1102 /* Destination for skb is selected for MAC-header */ 1125 /* Destination for skb is selected for MAC-header */
1103 first_dest = neigh_node->addr; 1126 first_dest = neigh_node;
1104 first_source = ethhdr->h_source; 1127 first_source = ethhdr->h_source;
1105 second_dest = nc_packet->nc_path->next_hop; 1128 second_dest = nc_packet->neigh_node;
1106 second_source = nc_packet->nc_path->prev_hop; 1129 second_source = nc_packet->nc_path->prev_hop;
1107 packet1 = (struct batadv_unicast_packet *)skb->data; 1130 packet1 = (struct batadv_unicast_packet *)skb->data;
1108 packet2 = (struct batadv_unicast_packet *)nc_packet->skb->data; 1131 packet2 = (struct batadv_unicast_packet *)nc_packet->skb->data;
@@ -1144,7 +1167,7 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
1144 coded_packet->first_ttvn = packet1->ttvn; 1167 coded_packet->first_ttvn = packet1->ttvn;
1145 1168
1146 /* Info about second unicast packet */ 1169 /* Info about second unicast packet */
1147 ether_addr_copy(coded_packet->second_dest, second_dest); 1170 ether_addr_copy(coded_packet->second_dest, second_dest->addr);
1148 ether_addr_copy(coded_packet->second_source, second_source); 1171 ether_addr_copy(coded_packet->second_source, second_source);
1149 ether_addr_copy(coded_packet->second_orig_dest, packet2->dest); 1172 ether_addr_copy(coded_packet->second_orig_dest, packet2->dest);
1150 coded_packet->second_crc = packet_id2; 1173 coded_packet->second_crc = packet_id2;
@@ -1199,17 +1222,17 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
1199 batadv_nc_packet_free(nc_packet); 1222 batadv_nc_packet_free(nc_packet);
1200 1223
1201 /* Send the coded packet and return true */ 1224 /* Send the coded packet and return true */
1202 batadv_send_skb_packet(skb_dest, neigh_node->if_incoming, first_dest); 1225 batadv_send_unicast_skb(skb_dest, first_dest);
1203 res = true; 1226 res = true;
1204out: 1227out:
1205 if (router_neigh) 1228 if (router_neigh)
1206 batadv_neigh_node_free_ref(router_neigh); 1229 batadv_neigh_node_put(router_neigh);
1207 if (router_coding) 1230 if (router_coding)
1208 batadv_neigh_node_free_ref(router_coding); 1231 batadv_neigh_node_put(router_coding);
1209 if (router_neigh_ifinfo) 1232 if (router_neigh_ifinfo)
1210 batadv_neigh_ifinfo_free_ref(router_neigh_ifinfo); 1233 batadv_neigh_ifinfo_put(router_neigh_ifinfo);
1211 if (router_coding_ifinfo) 1234 if (router_coding_ifinfo)
1212 batadv_neigh_ifinfo_free_ref(router_coding_ifinfo); 1235 batadv_neigh_ifinfo_put(router_coding_ifinfo);
1213 return res; 1236 return res;
1214} 1237}
1215 1238
@@ -1228,7 +1251,7 @@ out:
1228 * Since the source encoded the packet we can be certain it has all necessary 1251 * Since the source encoded the packet we can be certain it has all necessary
1229 * decode information. 1252 * decode information.
1230 * 1253 *
1231 * Returns true if coding of a decoded packet is allowed. 1254 * Return: true if coding of a decoded packet is allowed.
1232 */ 1255 */
1233static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, u8 *dst, u8 *src) 1256static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, u8 *dst, u8 *src)
1234{ 1257{
@@ -1246,7 +1269,7 @@ static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, u8 *dst, u8 *src)
1246 * @skb: data skb to forward 1269 * @skb: data skb to forward
1247 * @eth_dst: next hop mac address of skb 1270 * @eth_dst: next hop mac address of skb
1248 * 1271 *
1249 * Returns true if coding of a decoded skb is allowed. 1272 * Return: true if coding of a decoded skb is allowed.
1250 */ 1273 */
1251static struct batadv_nc_packet * 1274static struct batadv_nc_packet *
1252batadv_nc_path_search(struct batadv_priv *bat_priv, 1275batadv_nc_path_search(struct batadv_priv *bat_priv,
@@ -1314,7 +1337,7 @@ batadv_nc_path_search(struct batadv_priv *bat_priv,
1314 * @eth_src: source mac address of skb 1337 * @eth_src: source mac address of skb
1315 * @in_nc_node: pointer to skb next hop's neighbor nc node 1338 * @in_nc_node: pointer to skb next hop's neighbor nc node
1316 * 1339 *
1317 * Returns an nc packet if a suitable coding packet was found, NULL otherwise. 1340 * Return: an nc packet if a suitable coding packet was found, NULL otherwise.
1318 */ 1341 */
1319static struct batadv_nc_packet * 1342static struct batadv_nc_packet *
1320batadv_nc_skb_src_search(struct batadv_priv *bat_priv, 1343batadv_nc_skb_src_search(struct batadv_priv *bat_priv,
@@ -1347,7 +1370,7 @@ batadv_nc_skb_src_search(struct batadv_priv *bat_priv,
1347 } 1370 }
1348 rcu_read_unlock(); 1371 rcu_read_unlock();
1349 1372
1350 batadv_orig_node_free_ref(orig_node); 1373 batadv_orig_node_put(orig_node);
1351 return nc_packet; 1374 return nc_packet;
1352} 1375}
1353 1376
@@ -1397,7 +1420,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
1397 * next hop that potentially sent a packet which our next hop also received 1420 * next hop that potentially sent a packet which our next hop also received
1398 * (overheard) and has stored for later decoding. 1421 * (overheard) and has stored for later decoding.
1399 * 1422 *
1400 * Returns true if the skb was consumed (encoded packet sent) or false otherwise 1423 * Return: true if the skb was consumed (encoded packet sent) or false otherwise
1401 */ 1424 */
1402static bool batadv_nc_skb_dst_search(struct sk_buff *skb, 1425static bool batadv_nc_skb_dst_search(struct sk_buff *skb,
1403 struct batadv_neigh_node *neigh_node, 1426 struct batadv_neigh_node *neigh_node,
@@ -1451,7 +1474,7 @@ static bool batadv_nc_skb_dst_search(struct sk_buff *skb,
1451 * @neigh_node: next hop to forward packet to 1474 * @neigh_node: next hop to forward packet to
1452 * @packet_id: checksum to identify packet 1475 * @packet_id: checksum to identify packet
1453 * 1476 *
1454 * Returns true if the packet was buffered or false in case of an error. 1477 * Return: true if the packet was buffered or false in case of an error.
1455 */ 1478 */
1456static bool batadv_nc_skb_add_to_path(struct sk_buff *skb, 1479static bool batadv_nc_skb_add_to_path(struct sk_buff *skb,
1457 struct batadv_nc_path *nc_path, 1480 struct batadv_nc_path *nc_path,
@@ -1485,7 +1508,7 @@ static bool batadv_nc_skb_add_to_path(struct sk_buff *skb,
1485 * @skb: data skb to forward 1508 * @skb: data skb to forward
1486 * @neigh_node: next hop to forward packet to 1509 * @neigh_node: next hop to forward packet to
1487 * 1510 *
1488 * Returns true if the skb was consumed (encoded packet sent) or false otherwise 1511 * Return: true if the skb was consumed (encoded packet sent) or false otherwise
1489 */ 1512 */
1490bool batadv_nc_skb_forward(struct sk_buff *skb, 1513bool batadv_nc_skb_forward(struct sk_buff *skb,
1491 struct batadv_neigh_node *neigh_node) 1514 struct batadv_neigh_node *neigh_node)
@@ -1530,7 +1553,7 @@ bool batadv_nc_skb_forward(struct sk_buff *skb,
1530 return true; 1553 return true;
1531 1554
1532free_nc_path: 1555free_nc_path:
1533 batadv_nc_path_free_ref(nc_path); 1556 batadv_nc_path_put(nc_path);
1534out: 1557out:
1535 /* Packet is not consumed */ 1558 /* Packet is not consumed */
1536 return false; 1559 return false;
@@ -1592,7 +1615,7 @@ void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
1592free_skb: 1615free_skb:
1593 kfree_skb(skb); 1616 kfree_skb(skb);
1594free_nc_path: 1617free_nc_path:
1595 batadv_nc_path_free_ref(nc_path); 1618 batadv_nc_path_put(nc_path);
1596out: 1619out:
1597 return; 1620 return;
1598} 1621}
@@ -1624,7 +1647,7 @@ void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
1624 * @skb: unicast skb to decode 1647 * @skb: unicast skb to decode
1625 * @nc_packet: decode data needed to decode the skb 1648 * @nc_packet: decode data needed to decode the skb
1626 * 1649 *
1627 * Returns pointer to decoded unicast packet if the packet was decoded or NULL 1650 * Return: pointer to decoded unicast packet if the packet was decoded or NULL
1628 * in case of an error. 1651 * in case of an error.
1629 */ 1652 */
1630static struct batadv_unicast_packet * 1653static struct batadv_unicast_packet *
@@ -1718,7 +1741,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
1718 * @ethhdr: pointer to the ethernet header inside the coded packet 1741 * @ethhdr: pointer to the ethernet header inside the coded packet
1719 * @coded: coded packet we try to find decode data for 1742 * @coded: coded packet we try to find decode data for
1720 * 1743 *
1721 * Returns pointer to nc packet if the needed data was found or NULL otherwise. 1744 * Return: pointer to nc packet if the needed data was found or NULL otherwise.
1722 */ 1745 */
1723static struct batadv_nc_packet * 1746static struct batadv_nc_packet *
1724batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv, 1747batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv,
@@ -1781,6 +1804,9 @@ batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv,
1781 * resulting unicast packet 1804 * resulting unicast packet
1782 * @skb: incoming coded packet 1805 * @skb: incoming coded packet
1783 * @recv_if: pointer to interface this packet was received on 1806 * @recv_if: pointer to interface this packet was received on
1807 *
1808 * Return: NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP
1809 * otherwise.
1784 */ 1810 */
1785static int batadv_nc_recv_coded_packet(struct sk_buff *skb, 1811static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
1786 struct batadv_hard_iface *recv_if) 1812 struct batadv_hard_iface *recv_if)
@@ -1865,6 +1891,8 @@ void batadv_nc_mesh_free(struct batadv_priv *bat_priv)
1865 * batadv_nc_nodes_seq_print_text - print the nc node information 1891 * batadv_nc_nodes_seq_print_text - print the nc node information
1866 * @seq: seq file to print on 1892 * @seq: seq file to print on
1867 * @offset: not used 1893 * @offset: not used
1894 *
1895 * Return: always 0
1868 */ 1896 */
1869int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset) 1897int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset)
1870{ 1898{
@@ -1920,13 +1948,15 @@ int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset)
1920 1948
1921out: 1949out:
1922 if (primary_if) 1950 if (primary_if)
1923 batadv_hardif_free_ref(primary_if); 1951 batadv_hardif_put(primary_if);
1924 return 0; 1952 return 0;
1925} 1953}
1926 1954
1927/** 1955/**
1928 * batadv_nc_init_debugfs - create nc folder and related files in debugfs 1956 * batadv_nc_init_debugfs - create nc folder and related files in debugfs
1929 * @bat_priv: the bat priv with all the soft interface information 1957 * @bat_priv: the bat priv with all the soft interface information
1958 *
1959 * Return: 0 on success or negative error number in case of failure
1930 */ 1960 */
1931int batadv_nc_init_debugfs(struct batadv_priv *bat_priv) 1961int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
1932{ 1962{
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index 8f6d4ad8778a..d6d7fb4ec5d5 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2012-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2012-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Martin Hundebøll, Jeppe Ledet-Pedersen 3 * Martin Hundebøll, Jeppe Ledet-Pedersen
4 * 4 *
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index fe578f75c391..c355a824713c 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -18,11 +18,13 @@
18#include "originator.h" 18#include "originator.h"
19#include "main.h" 19#include "main.h"
20 20
21#include <linux/atomic.h>
21#include <linux/errno.h> 22#include <linux/errno.h>
22#include <linux/etherdevice.h> 23#include <linux/etherdevice.h>
23#include <linux/fs.h> 24#include <linux/fs.h>
24#include <linux/jiffies.h> 25#include <linux/jiffies.h>
25#include <linux/kernel.h> 26#include <linux/kernel.h>
27#include <linux/kref.h>
26#include <linux/list.h> 28#include <linux/list.h>
27#include <linux/lockdep.h> 29#include <linux/lockdep.h>
28#include <linux/netdevice.h> 30#include <linux/netdevice.h>
@@ -47,7 +49,13 @@ static struct lock_class_key batadv_orig_hash_lock_class_key;
47 49
48static void batadv_purge_orig(struct work_struct *work); 50static void batadv_purge_orig(struct work_struct *work);
49 51
50/* returns 1 if they are the same originator */ 52/**
53 * batadv_compare_orig - comparing function used in the originator hash table
54 * @node: node in the local table
55 * @data2: second object to compare the node to
56 *
57 * Return: 1 if they are the same originator
58 */
51int batadv_compare_orig(const struct hlist_node *node, const void *data2) 59int batadv_compare_orig(const struct hlist_node *node, const void *data2)
52{ 60{
53 const void *data1 = container_of(node, struct batadv_orig_node, 61 const void *data1 = container_of(node, struct batadv_orig_node,
@@ -61,7 +69,7 @@ int batadv_compare_orig(const struct hlist_node *node, const void *data2)
61 * @orig_node: the originator serving the VLAN 69 * @orig_node: the originator serving the VLAN
62 * @vid: the VLAN identifier 70 * @vid: the VLAN identifier
63 * 71 *
64 * Returns the vlan object identified by vid and belonging to orig_node or NULL 72 * Return: the vlan object identified by vid and belonging to orig_node or NULL
65 * if it does not exist. 73 * if it does not exist.
66 */ 74 */
67struct batadv_orig_node_vlan * 75struct batadv_orig_node_vlan *
@@ -75,7 +83,7 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
75 if (tmp->vid != vid) 83 if (tmp->vid != vid)
76 continue; 84 continue;
77 85
78 if (!atomic_inc_not_zero(&tmp->refcount)) 86 if (!kref_get_unless_zero(&tmp->refcount))
79 continue; 87 continue;
80 88
81 vlan = tmp; 89 vlan = tmp;
@@ -93,7 +101,7 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
93 * @orig_node: the originator serving the VLAN 101 * @orig_node: the originator serving the VLAN
94 * @vid: the VLAN identifier 102 * @vid: the VLAN identifier
95 * 103 *
96 * Returns NULL in case of failure or the vlan object identified by vid and 104 * Return: NULL in case of failure or the vlan object identified by vid and
97 * belonging to orig_node otherwise. The object is created and added to the list 105 * belonging to orig_node otherwise. The object is created and added to the list
98 * if it does not exist. 106 * if it does not exist.
99 * 107 *
@@ -116,7 +124,8 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
116 if (!vlan) 124 if (!vlan)
117 goto out; 125 goto out;
118 126
119 atomic_set(&vlan->refcount, 2); 127 kref_init(&vlan->refcount);
128 kref_get(&vlan->refcount);
120 vlan->vid = vid; 129 vlan->vid = vid;
121 130
122 hlist_add_head_rcu(&vlan->list, &orig_node->vlan_list); 131 hlist_add_head_rcu(&vlan->list, &orig_node->vlan_list);
@@ -128,14 +137,27 @@ out:
128} 137}
129 138
130/** 139/**
131 * batadv_orig_node_vlan_free_ref - decrement the refcounter and possibly free 140 * batadv_orig_node_vlan_release - release originator-vlan object from lists
141 * and queue for free after rcu grace period
142 * @ref: kref pointer of the originator-vlan object
143 */
144static void batadv_orig_node_vlan_release(struct kref *ref)
145{
146 struct batadv_orig_node_vlan *orig_vlan;
147
148 orig_vlan = container_of(ref, struct batadv_orig_node_vlan, refcount);
149
150 kfree_rcu(orig_vlan, rcu);
151}
152
153/**
154 * batadv_orig_node_vlan_put - decrement the refcounter and possibly release
132 * the originator-vlan object 155 * the originator-vlan object
133 * @orig_vlan: the originator-vlan object to release 156 * @orig_vlan: the originator-vlan object to release
134 */ 157 */
135void batadv_orig_node_vlan_free_ref(struct batadv_orig_node_vlan *orig_vlan) 158void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan)
136{ 159{
137 if (atomic_dec_and_test(&orig_vlan->refcount)) 160 kref_put(&orig_vlan->refcount, batadv_orig_node_vlan_release);
138 kfree_rcu(orig_vlan, rcu);
139} 161}
140 162
141int batadv_originator_init(struct batadv_priv *bat_priv) 163int batadv_originator_init(struct batadv_priv *bat_priv)
@@ -165,99 +187,98 @@ err:
165/** 187/**
166 * batadv_neigh_ifinfo_release - release neigh_ifinfo from lists and queue for 188 * batadv_neigh_ifinfo_release - release neigh_ifinfo from lists and queue for
167 * free after rcu grace period 189 * free after rcu grace period
168 * @neigh_ifinfo: the neigh_ifinfo object to release 190 * @ref: kref pointer of the neigh_ifinfo
169 */ 191 */
170static void 192static void batadv_neigh_ifinfo_release(struct kref *ref)
171batadv_neigh_ifinfo_release(struct batadv_neigh_ifinfo *neigh_ifinfo)
172{ 193{
194 struct batadv_neigh_ifinfo *neigh_ifinfo;
195
196 neigh_ifinfo = container_of(ref, struct batadv_neigh_ifinfo, refcount);
197
173 if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT) 198 if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT)
174 batadv_hardif_free_ref(neigh_ifinfo->if_outgoing); 199 batadv_hardif_put(neigh_ifinfo->if_outgoing);
175 200
176 kfree_rcu(neigh_ifinfo, rcu); 201 kfree_rcu(neigh_ifinfo, rcu);
177} 202}
178 203
179/** 204/**
180 * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly release 205 * batadv_neigh_ifinfo_put - decrement the refcounter and possibly release
181 * the neigh_ifinfo 206 * the neigh_ifinfo
182 * @neigh_ifinfo: the neigh_ifinfo object to release 207 * @neigh_ifinfo: the neigh_ifinfo object to release
183 */ 208 */
184void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo) 209void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo)
185{ 210{
186 if (atomic_dec_and_test(&neigh_ifinfo->refcount)) 211 kref_put(&neigh_ifinfo->refcount, batadv_neigh_ifinfo_release);
187 batadv_neigh_ifinfo_release(neigh_ifinfo);
188} 212}
189 213
190/** 214/**
191 * batadv_hardif_neigh_release - release hardif neigh node from lists and 215 * batadv_hardif_neigh_release - release hardif neigh node from lists and
192 * queue for free after rcu grace period 216 * queue for free after rcu grace period
193 * @hardif_neigh: hardif neigh neighbor to free 217 * @ref: kref pointer of the neigh_node
194 */ 218 */
195static void 219static void batadv_hardif_neigh_release(struct kref *ref)
196batadv_hardif_neigh_release(struct batadv_hardif_neigh_node *hardif_neigh)
197{ 220{
221 struct batadv_hardif_neigh_node *hardif_neigh;
222
223 hardif_neigh = container_of(ref, struct batadv_hardif_neigh_node,
224 refcount);
225
198 spin_lock_bh(&hardif_neigh->if_incoming->neigh_list_lock); 226 spin_lock_bh(&hardif_neigh->if_incoming->neigh_list_lock);
199 hlist_del_init_rcu(&hardif_neigh->list); 227 hlist_del_init_rcu(&hardif_neigh->list);
200 spin_unlock_bh(&hardif_neigh->if_incoming->neigh_list_lock); 228 spin_unlock_bh(&hardif_neigh->if_incoming->neigh_list_lock);
201 229
202 batadv_hardif_free_ref(hardif_neigh->if_incoming); 230 batadv_hardif_put(hardif_neigh->if_incoming);
203 kfree_rcu(hardif_neigh, rcu); 231 kfree_rcu(hardif_neigh, rcu);
204} 232}
205 233
206/** 234/**
207 * batadv_hardif_neigh_free_ref - decrement the hardif neighbors refcounter 235 * batadv_hardif_neigh_put - decrement the hardif neighbors refcounter
208 * and possibly release it 236 * and possibly release it
209 * @hardif_neigh: hardif neigh neighbor to free 237 * @hardif_neigh: hardif neigh neighbor to free
210 */ 238 */
211void batadv_hardif_neigh_free_ref(struct batadv_hardif_neigh_node *hardif_neigh) 239void batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh)
212{ 240{
213 if (atomic_dec_and_test(&hardif_neigh->refcount)) 241 kref_put(&hardif_neigh->refcount, batadv_hardif_neigh_release);
214 batadv_hardif_neigh_release(hardif_neigh);
215} 242}
216 243
217/** 244/**
218 * batadv_neigh_node_release - release neigh_node from lists and queue for 245 * batadv_neigh_node_release - release neigh_node from lists and queue for
219 * free after rcu grace period 246 * free after rcu grace period
220 * @neigh_node: neigh neighbor to free 247 * @ref: kref pointer of the neigh_node
221 */ 248 */
222static void batadv_neigh_node_release(struct batadv_neigh_node *neigh_node) 249static void batadv_neigh_node_release(struct kref *ref)
223{ 250{
224 struct hlist_node *node_tmp; 251 struct hlist_node *node_tmp;
225 struct batadv_hardif_neigh_node *hardif_neigh; 252 struct batadv_neigh_node *neigh_node;
226 struct batadv_neigh_ifinfo *neigh_ifinfo; 253 struct batadv_neigh_ifinfo *neigh_ifinfo;
227 struct batadv_algo_ops *bao; 254 struct batadv_algo_ops *bao;
228 255
256 neigh_node = container_of(ref, struct batadv_neigh_node, refcount);
229 bao = neigh_node->orig_node->bat_priv->bat_algo_ops; 257 bao = neigh_node->orig_node->bat_priv->bat_algo_ops;
230 258
231 hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, 259 hlist_for_each_entry_safe(neigh_ifinfo, node_tmp,
232 &neigh_node->ifinfo_list, list) { 260 &neigh_node->ifinfo_list, list) {
233 batadv_neigh_ifinfo_free_ref(neigh_ifinfo); 261 batadv_neigh_ifinfo_put(neigh_ifinfo);
234 } 262 }
235 263
236 hardif_neigh = batadv_hardif_neigh_get(neigh_node->if_incoming, 264 batadv_hardif_neigh_put(neigh_node->hardif_neigh);
237 neigh_node->addr);
238 if (hardif_neigh) {
239 /* batadv_hardif_neigh_get() increases refcount too */
240 batadv_hardif_neigh_free_ref(hardif_neigh);
241 batadv_hardif_neigh_free_ref(hardif_neigh);
242 }
243 265
244 if (bao->bat_neigh_free) 266 if (bao->bat_neigh_free)
245 bao->bat_neigh_free(neigh_node); 267 bao->bat_neigh_free(neigh_node);
246 268
247 batadv_hardif_free_ref(neigh_node->if_incoming); 269 batadv_hardif_put(neigh_node->if_incoming);
248 270
249 kfree_rcu(neigh_node, rcu); 271 kfree_rcu(neigh_node, rcu);
250} 272}
251 273
252/** 274/**
253 * batadv_neigh_node_free_ref - decrement the neighbors refcounter 275 * batadv_neigh_node_put - decrement the neighbors refcounter and possibly
254 * and possibly release it 276 * release it
255 * @neigh_node: neigh neighbor to free 277 * @neigh_node: neigh neighbor to free
256 */ 278 */
257void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node) 279void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node)
258{ 280{
259 if (atomic_dec_and_test(&neigh_node->refcount)) 281 kref_put(&neigh_node->refcount, batadv_neigh_node_release);
260 batadv_neigh_node_release(neigh_node);
261} 282}
262 283
263/** 284/**
@@ -266,7 +287,7 @@ void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node)
266 * @if_outgoing: the interface where the payload packet has been received or 287 * @if_outgoing: the interface where the payload packet has been received or
267 * the OGM should be sent to 288 * the OGM should be sent to
268 * 289 *
269 * Returns the neighbor which should be router for this orig_node/iface. 290 * Return: the neighbor which should be router for this orig_node/iface.
270 * 291 *
271 * The object is returned with refcounter increased by 1. 292 * The object is returned with refcounter increased by 1.
272 */ 293 */
@@ -286,7 +307,7 @@ batadv_orig_router_get(struct batadv_orig_node *orig_node,
286 break; 307 break;
287 } 308 }
288 309
289 if (router && !atomic_inc_not_zero(&router->refcount)) 310 if (router && !kref_get_unless_zero(&router->refcount))
290 router = NULL; 311 router = NULL;
291 312
292 rcu_read_unlock(); 313 rcu_read_unlock();
@@ -298,7 +319,7 @@ batadv_orig_router_get(struct batadv_orig_node *orig_node,
298 * @orig_node: the orig node to be queried 319 * @orig_node: the orig node to be queried
299 * @if_outgoing: the interface for which the ifinfo should be acquired 320 * @if_outgoing: the interface for which the ifinfo should be acquired
300 * 321 *
301 * Returns the requested orig_ifinfo or NULL if not found. 322 * Return: the requested orig_ifinfo or NULL if not found.
302 * 323 *
303 * The object is returned with refcounter increased by 1. 324 * The object is returned with refcounter increased by 1.
304 */ 325 */
@@ -314,7 +335,7 @@ batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node,
314 if (tmp->if_outgoing != if_outgoing) 335 if (tmp->if_outgoing != if_outgoing)
315 continue; 336 continue;
316 337
317 if (!atomic_inc_not_zero(&tmp->refcount)) 338 if (!kref_get_unless_zero(&tmp->refcount))
318 continue; 339 continue;
319 340
320 orig_ifinfo = tmp; 341 orig_ifinfo = tmp;
@@ -330,7 +351,7 @@ batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node,
330 * @orig_node: the orig node to be queried 351 * @orig_node: the orig node to be queried
331 * @if_outgoing: the interface for which the ifinfo should be acquired 352 * @if_outgoing: the interface for which the ifinfo should be acquired
332 * 353 *
333 * Returns NULL in case of failure or the orig_ifinfo object for the if_outgoing 354 * Return: NULL in case of failure or the orig_ifinfo object for the if_outgoing
334 * interface otherwise. The object is created and added to the list 355 * interface otherwise. The object is created and added to the list
335 * if it does not exist. 356 * if it does not exist.
336 * 357 *
@@ -354,7 +375,7 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
354 goto out; 375 goto out;
355 376
356 if (if_outgoing != BATADV_IF_DEFAULT && 377 if (if_outgoing != BATADV_IF_DEFAULT &&
357 !atomic_inc_not_zero(&if_outgoing->refcount)) { 378 !kref_get_unless_zero(&if_outgoing->refcount)) {
358 kfree(orig_ifinfo); 379 kfree(orig_ifinfo);
359 orig_ifinfo = NULL; 380 orig_ifinfo = NULL;
360 goto out; 381 goto out;
@@ -365,7 +386,8 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
365 orig_ifinfo->batman_seqno_reset = reset_time; 386 orig_ifinfo->batman_seqno_reset = reset_time;
366 orig_ifinfo->if_outgoing = if_outgoing; 387 orig_ifinfo->if_outgoing = if_outgoing;
367 INIT_HLIST_NODE(&orig_ifinfo->list); 388 INIT_HLIST_NODE(&orig_ifinfo->list);
368 atomic_set(&orig_ifinfo->refcount, 2); 389 kref_init(&orig_ifinfo->refcount);
390 kref_get(&orig_ifinfo->refcount);
369 hlist_add_head_rcu(&orig_ifinfo->list, 391 hlist_add_head_rcu(&orig_ifinfo->list,
370 &orig_node->ifinfo_list); 392 &orig_node->ifinfo_list);
371out: 393out:
@@ -375,12 +397,12 @@ out:
375 397
376/** 398/**
377 * batadv_neigh_ifinfo_get - find the ifinfo from an neigh_node 399 * batadv_neigh_ifinfo_get - find the ifinfo from an neigh_node
378 * @neigh_node: the neigh node to be queried 400 * @neigh: the neigh node to be queried
379 * @if_outgoing: the interface for which the ifinfo should be acquired 401 * @if_outgoing: the interface for which the ifinfo should be acquired
380 * 402 *
381 * The object is returned with refcounter increased by 1. 403 * The object is returned with refcounter increased by 1.
382 * 404 *
383 * Returns the requested neigh_ifinfo or NULL if not found 405 * Return: the requested neigh_ifinfo or NULL if not found
384 */ 406 */
385struct batadv_neigh_ifinfo * 407struct batadv_neigh_ifinfo *
386batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, 408batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
@@ -395,7 +417,7 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
395 if (tmp_neigh_ifinfo->if_outgoing != if_outgoing) 417 if (tmp_neigh_ifinfo->if_outgoing != if_outgoing)
396 continue; 418 continue;
397 419
398 if (!atomic_inc_not_zero(&tmp_neigh_ifinfo->refcount)) 420 if (!kref_get_unless_zero(&tmp_neigh_ifinfo->refcount))
399 continue; 421 continue;
400 422
401 neigh_ifinfo = tmp_neigh_ifinfo; 423 neigh_ifinfo = tmp_neigh_ifinfo;
@@ -408,10 +430,10 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
408 430
409/** 431/**
410 * batadv_neigh_ifinfo_new - search and possibly create an neigh_ifinfo object 432 * batadv_neigh_ifinfo_new - search and possibly create an neigh_ifinfo object
411 * @neigh_node: the neigh node to be queried 433 * @neigh: the neigh node to be queried
412 * @if_outgoing: the interface for which the ifinfo should be acquired 434 * @if_outgoing: the interface for which the ifinfo should be acquired
413 * 435 *
414 * Returns NULL in case of failure or the neigh_ifinfo object for the 436 * Return: NULL in case of failure or the neigh_ifinfo object for the
415 * if_outgoing interface otherwise. The object is created and added to the list 437 * if_outgoing interface otherwise. The object is created and added to the list
416 * if it does not exist. 438 * if it does not exist.
417 * 439 *
@@ -433,14 +455,15 @@ batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh,
433 if (!neigh_ifinfo) 455 if (!neigh_ifinfo)
434 goto out; 456 goto out;
435 457
436 if (if_outgoing && !atomic_inc_not_zero(&if_outgoing->refcount)) { 458 if (if_outgoing && !kref_get_unless_zero(&if_outgoing->refcount)) {
437 kfree(neigh_ifinfo); 459 kfree(neigh_ifinfo);
438 neigh_ifinfo = NULL; 460 neigh_ifinfo = NULL;
439 goto out; 461 goto out;
440 } 462 }
441 463
442 INIT_HLIST_NODE(&neigh_ifinfo->list); 464 INIT_HLIST_NODE(&neigh_ifinfo->list);
443 atomic_set(&neigh_ifinfo->refcount, 2); 465 kref_init(&neigh_ifinfo->refcount);
466 kref_get(&neigh_ifinfo->refcount);
444 neigh_ifinfo->if_outgoing = if_outgoing; 467 neigh_ifinfo->if_outgoing = if_outgoing;
445 468
446 hlist_add_head_rcu(&neigh_ifinfo->list, &neigh->ifinfo_list); 469 hlist_add_head_rcu(&neigh_ifinfo->list, &neigh->ifinfo_list);
@@ -459,7 +482,8 @@ out:
459 * 482 *
460 * Looks for and possibly returns a neighbour belonging to this originator list 483 * Looks for and possibly returns a neighbour belonging to this originator list
461 * which is connected through the provided hard interface. 484 * which is connected through the provided hard interface.
462 * Returns NULL if the neighbour is not found. 485 *
486 * Return: neighbor when found. Othwerwise NULL
463 */ 487 */
464static struct batadv_neigh_node * 488static struct batadv_neigh_node *
465batadv_neigh_node_get(const struct batadv_orig_node *orig_node, 489batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
@@ -476,7 +500,7 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
476 if (tmp_neigh_node->if_incoming != hard_iface) 500 if (tmp_neigh_node->if_incoming != hard_iface)
477 continue; 501 continue;
478 502
479 if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) 503 if (!kref_get_unless_zero(&tmp_neigh_node->refcount))
480 continue; 504 continue;
481 505
482 res = tmp_neigh_node; 506 res = tmp_neigh_node;
@@ -492,7 +516,7 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
492 * @hard_iface: the interface this neighbour is connected to 516 * @hard_iface: the interface this neighbour is connected to
493 * @neigh_addr: the interface address of the neighbour to retrieve 517 * @neigh_addr: the interface address of the neighbour to retrieve
494 * 518 *
495 * Returns the hardif neighbour node if found or created or NULL otherwise. 519 * Return: the hardif neighbour node if found or created or NULL otherwise.
496 */ 520 */
497static struct batadv_hardif_neigh_node * 521static struct batadv_hardif_neigh_node *
498batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, 522batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
@@ -508,12 +532,12 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
508 if (hardif_neigh) 532 if (hardif_neigh)
509 goto out; 533 goto out;
510 534
511 if (!atomic_inc_not_zero(&hard_iface->refcount)) 535 if (!kref_get_unless_zero(&hard_iface->refcount))
512 goto out; 536 goto out;
513 537
514 hardif_neigh = kzalloc(sizeof(*hardif_neigh), GFP_ATOMIC); 538 hardif_neigh = kzalloc(sizeof(*hardif_neigh), GFP_ATOMIC);
515 if (!hardif_neigh) { 539 if (!hardif_neigh) {
516 batadv_hardif_free_ref(hard_iface); 540 batadv_hardif_put(hard_iface);
517 goto out; 541 goto out;
518 } 542 }
519 543
@@ -522,7 +546,7 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
522 hardif_neigh->if_incoming = hard_iface; 546 hardif_neigh->if_incoming = hard_iface;
523 hardif_neigh->last_seen = jiffies; 547 hardif_neigh->last_seen = jiffies;
524 548
525 atomic_set(&hardif_neigh->refcount, 1); 549 kref_init(&hardif_neigh->refcount);
526 550
527 if (bat_priv->bat_algo_ops->bat_hardif_neigh_init) 551 if (bat_priv->bat_algo_ops->bat_hardif_neigh_init)
528 bat_priv->bat_algo_ops->bat_hardif_neigh_init(hardif_neigh); 552 bat_priv->bat_algo_ops->bat_hardif_neigh_init(hardif_neigh);
@@ -540,7 +564,7 @@ out:
540 * @hard_iface: the interface this neighbour is connected to 564 * @hard_iface: the interface this neighbour is connected to
541 * @neigh_addr: the interface address of the neighbour to retrieve 565 * @neigh_addr: the interface address of the neighbour to retrieve
542 * 566 *
543 * Returns the hardif neighbour node if found or created or NULL otherwise. 567 * Return: the hardif neighbour node if found or created or NULL otherwise.
544 */ 568 */
545static struct batadv_hardif_neigh_node * 569static struct batadv_hardif_neigh_node *
546batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface, 570batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface,
@@ -562,7 +586,8 @@ batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface,
562 * @neigh_addr: the address of the neighbour 586 * @neigh_addr: the address of the neighbour
563 * 587 *
564 * Looks for and possibly returns a neighbour belonging to this hard interface. 588 * Looks for and possibly returns a neighbour belonging to this hard interface.
565 * Returns NULL if the neighbour is not found. 589 *
590 * Return: neighbor when found. Othwerwise NULL
566 */ 591 */
567struct batadv_hardif_neigh_node * 592struct batadv_hardif_neigh_node *
568batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, 593batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
@@ -576,7 +601,7 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
576 if (!batadv_compare_eth(tmp_hardif_neigh->addr, neigh_addr)) 601 if (!batadv_compare_eth(tmp_hardif_neigh->addr, neigh_addr))
577 continue; 602 continue;
578 603
579 if (!atomic_inc_not_zero(&tmp_hardif_neigh->refcount)) 604 if (!kref_get_unless_zero(&tmp_hardif_neigh->refcount))
580 continue; 605 continue;
581 606
582 hardif_neigh = tmp_hardif_neigh; 607 hardif_neigh = tmp_hardif_neigh;
@@ -594,7 +619,8 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
594 * @neigh_addr: the mac address of the neighbour interface 619 * @neigh_addr: the mac address of the neighbour interface
595 * 620 *
596 * Allocates a new neigh_node object and initialises all the generic fields. 621 * Allocates a new neigh_node object and initialises all the generic fields.
597 * Returns the new object or NULL on failure. 622 *
623 * Return: neighbor when found. Othwerwise NULL
598 */ 624 */
599struct batadv_neigh_node * 625struct batadv_neigh_node *
600batadv_neigh_node_new(struct batadv_orig_node *orig_node, 626batadv_neigh_node_new(struct batadv_orig_node *orig_node,
@@ -617,7 +643,7 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node,
617 if (!neigh_node) 643 if (!neigh_node)
618 goto out; 644 goto out;
619 645
620 if (!atomic_inc_not_zero(&hard_iface->refcount)) { 646 if (!kref_get_unless_zero(&hard_iface->refcount)) {
621 kfree(neigh_node); 647 kfree(neigh_node);
622 neigh_node = NULL; 648 neigh_node = NULL;
623 goto out; 649 goto out;
@@ -630,24 +656,27 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node,
630 ether_addr_copy(neigh_node->addr, neigh_addr); 656 ether_addr_copy(neigh_node->addr, neigh_addr);
631 neigh_node->if_incoming = hard_iface; 657 neigh_node->if_incoming = hard_iface;
632 neigh_node->orig_node = orig_node; 658 neigh_node->orig_node = orig_node;
659 neigh_node->last_seen = jiffies;
660
661 /* increment unique neighbor refcount */
662 kref_get(&hardif_neigh->refcount);
663 neigh_node->hardif_neigh = hardif_neigh;
633 664
634 /* extra reference for return */ 665 /* extra reference for return */
635 atomic_set(&neigh_node->refcount, 2); 666 kref_init(&neigh_node->refcount);
667 kref_get(&neigh_node->refcount);
636 668
637 spin_lock_bh(&orig_node->neigh_list_lock); 669 spin_lock_bh(&orig_node->neigh_list_lock);
638 hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); 670 hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
639 spin_unlock_bh(&orig_node->neigh_list_lock); 671 spin_unlock_bh(&orig_node->neigh_list_lock);
640 672
641 /* increment unique neighbor refcount */
642 atomic_inc(&hardif_neigh->refcount);
643
644 batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv, 673 batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv,
645 "Creating new neighbor %pM for orig_node %pM on interface %s\n", 674 "Creating new neighbor %pM for orig_node %pM on interface %s\n",
646 neigh_addr, orig_node->orig, hard_iface->net_dev->name); 675 neigh_addr, orig_node->orig, hard_iface->net_dev->name);
647 676
648out: 677out:
649 if (hardif_neigh) 678 if (hardif_neigh)
650 batadv_hardif_neigh_free_ref(hardif_neigh); 679 batadv_hardif_neigh_put(hardif_neigh);
651 return neigh_node; 680 return neigh_node;
652} 681}
653 682
@@ -656,7 +685,7 @@ out:
656 * @seq: neighbour table seq_file struct 685 * @seq: neighbour table seq_file struct
657 * @offset: not used 686 * @offset: not used
658 * 687 *
659 * Always returns 0. 688 * Return: always 0
660 */ 689 */
661int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset) 690int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
662{ 691{
@@ -673,7 +702,7 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
673 primary_if->net_dev->dev_addr, net_dev->name, 702 primary_if->net_dev->dev_addr, net_dev->name,
674 bat_priv->bat_algo_ops->name); 703 bat_priv->bat_algo_ops->name);
675 704
676 batadv_hardif_free_ref(primary_if); 705 batadv_hardif_put(primary_if);
677 706
678 if (!bat_priv->bat_algo_ops->bat_neigh_print) { 707 if (!bat_priv->bat_algo_ops->bat_neigh_print) {
679 seq_puts(seq, 708 seq_puts(seq,
@@ -688,32 +717,34 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
688/** 717/**
689 * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for 718 * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for
690 * free after rcu grace period 719 * free after rcu grace period
691 * @orig_ifinfo: the orig_ifinfo object to release 720 * @ref: kref pointer of the orig_ifinfo
692 */ 721 */
693static void batadv_orig_ifinfo_release(struct batadv_orig_ifinfo *orig_ifinfo) 722static void batadv_orig_ifinfo_release(struct kref *ref)
694{ 723{
724 struct batadv_orig_ifinfo *orig_ifinfo;
695 struct batadv_neigh_node *router; 725 struct batadv_neigh_node *router;
696 726
727 orig_ifinfo = container_of(ref, struct batadv_orig_ifinfo, refcount);
728
697 if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT) 729 if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT)
698 batadv_hardif_free_ref(orig_ifinfo->if_outgoing); 730 batadv_hardif_put(orig_ifinfo->if_outgoing);
699 731
700 /* this is the last reference to this object */ 732 /* this is the last reference to this object */
701 router = rcu_dereference_protected(orig_ifinfo->router, true); 733 router = rcu_dereference_protected(orig_ifinfo->router, true);
702 if (router) 734 if (router)
703 batadv_neigh_node_free_ref(router); 735 batadv_neigh_node_put(router);
704 736
705 kfree_rcu(orig_ifinfo, rcu); 737 kfree_rcu(orig_ifinfo, rcu);
706} 738}
707 739
708/** 740/**
709 * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly release 741 * batadv_orig_ifinfo_put - decrement the refcounter and possibly release
710 * the orig_ifinfo 742 * the orig_ifinfo
711 * @orig_ifinfo: the orig_ifinfo object to release 743 * @orig_ifinfo: the orig_ifinfo object to release
712 */ 744 */
713void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo) 745void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo)
714{ 746{
715 if (atomic_dec_and_test(&orig_ifinfo->refcount)) 747 kref_put(&orig_ifinfo->refcount, batadv_orig_ifinfo_release);
716 batadv_orig_ifinfo_release(orig_ifinfo);
717} 748}
718 749
719/** 750/**
@@ -740,27 +771,30 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
740/** 771/**
741 * batadv_orig_node_release - release orig_node from lists and queue for 772 * batadv_orig_node_release - release orig_node from lists and queue for
742 * free after rcu grace period 773 * free after rcu grace period
743 * @orig_node: the orig node to free 774 * @ref: kref pointer of the orig_node
744 */ 775 */
745static void batadv_orig_node_release(struct batadv_orig_node *orig_node) 776static void batadv_orig_node_release(struct kref *ref)
746{ 777{
747 struct hlist_node *node_tmp; 778 struct hlist_node *node_tmp;
748 struct batadv_neigh_node *neigh_node; 779 struct batadv_neigh_node *neigh_node;
780 struct batadv_orig_node *orig_node;
749 struct batadv_orig_ifinfo *orig_ifinfo; 781 struct batadv_orig_ifinfo *orig_ifinfo;
750 782
783 orig_node = container_of(ref, struct batadv_orig_node, refcount);
784
751 spin_lock_bh(&orig_node->neigh_list_lock); 785 spin_lock_bh(&orig_node->neigh_list_lock);
752 786
753 /* for all neighbors towards this originator ... */ 787 /* for all neighbors towards this originator ... */
754 hlist_for_each_entry_safe(neigh_node, node_tmp, 788 hlist_for_each_entry_safe(neigh_node, node_tmp,
755 &orig_node->neigh_list, list) { 789 &orig_node->neigh_list, list) {
756 hlist_del_rcu(&neigh_node->list); 790 hlist_del_rcu(&neigh_node->list);
757 batadv_neigh_node_free_ref(neigh_node); 791 batadv_neigh_node_put(neigh_node);
758 } 792 }
759 793
760 hlist_for_each_entry_safe(orig_ifinfo, node_tmp, 794 hlist_for_each_entry_safe(orig_ifinfo, node_tmp,
761 &orig_node->ifinfo_list, list) { 795 &orig_node->ifinfo_list, list) {
762 hlist_del_rcu(&orig_ifinfo->list); 796 hlist_del_rcu(&orig_ifinfo->list);
763 batadv_orig_ifinfo_free_ref(orig_ifinfo); 797 batadv_orig_ifinfo_put(orig_ifinfo);
764 } 798 }
765 spin_unlock_bh(&orig_node->neigh_list_lock); 799 spin_unlock_bh(&orig_node->neigh_list_lock);
766 800
@@ -771,14 +805,13 @@ static void batadv_orig_node_release(struct batadv_orig_node *orig_node)
771} 805}
772 806
773/** 807/**
774 * batadv_orig_node_free_ref - decrement the orig node refcounter and possibly 808 * batadv_orig_node_put - decrement the orig node refcounter and possibly
775 * release it 809 * release it
776 * @orig_node: the orig node to free 810 * @orig_node: the orig node to free
777 */ 811 */
778void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node) 812void batadv_orig_node_put(struct batadv_orig_node *orig_node)
779{ 813{
780 if (atomic_dec_and_test(&orig_node->refcount)) 814 kref_put(&orig_node->refcount, batadv_orig_node_release);
781 batadv_orig_node_release(orig_node);
782} 815}
783 816
784void batadv_originator_free(struct batadv_priv *bat_priv) 817void batadv_originator_free(struct batadv_priv *bat_priv)
@@ -805,7 +838,7 @@ void batadv_originator_free(struct batadv_priv *bat_priv)
805 hlist_for_each_entry_safe(orig_node, node_tmp, 838 hlist_for_each_entry_safe(orig_node, node_tmp,
806 head, hash_entry) { 839 head, hash_entry) {
807 hlist_del_rcu(&orig_node->hash_entry); 840 hlist_del_rcu(&orig_node->hash_entry);
808 batadv_orig_node_free_ref(orig_node); 841 batadv_orig_node_put(orig_node);
809 } 842 }
810 spin_unlock_bh(list_lock); 843 spin_unlock_bh(list_lock);
811 } 844 }
@@ -820,7 +853,8 @@ void batadv_originator_free(struct batadv_priv *bat_priv)
820 * 853 *
821 * Creates a new originator object and initialise all the generic fields. 854 * Creates a new originator object and initialise all the generic fields.
822 * The new object is not added to the originator list. 855 * The new object is not added to the originator list.
823 * Returns the newly created object or NULL on failure. 856 *
857 * Return: the newly created object or NULL on failure.
824 */ 858 */
825struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, 859struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
826 const u8 *addr) 860 const u8 *addr)
@@ -849,7 +883,8 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
849 batadv_nc_init_orig(orig_node); 883 batadv_nc_init_orig(orig_node);
850 884
851 /* extra reference for return */ 885 /* extra reference for return */
852 atomic_set(&orig_node->refcount, 2); 886 kref_init(&orig_node->refcount);
887 kref_get(&orig_node->refcount);
853 888
854 orig_node->bat_priv = bat_priv; 889 orig_node->bat_priv = bat_priv;
855 ether_addr_copy(orig_node->orig, addr); 890 ether_addr_copy(orig_node->orig, addr);
@@ -877,7 +912,7 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
877 * Immediately release vlan since it is not needed anymore in this 912 * Immediately release vlan since it is not needed anymore in this
878 * context 913 * context
879 */ 914 */
880 batadv_orig_node_vlan_free_ref(vlan); 915 batadv_orig_node_vlan_put(vlan);
881 916
882 for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) { 917 for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) {
883 INIT_HLIST_HEAD(&orig_node->fragments[i].head); 918 INIT_HLIST_HEAD(&orig_node->fragments[i].head);
@@ -926,7 +961,7 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv,
926 neigh->addr, if_outgoing->net_dev->name); 961 neigh->addr, if_outgoing->net_dev->name);
927 962
928 hlist_del_rcu(&neigh_ifinfo->list); 963 hlist_del_rcu(&neigh_ifinfo->list);
929 batadv_neigh_ifinfo_free_ref(neigh_ifinfo); 964 batadv_neigh_ifinfo_put(neigh_ifinfo);
930 } 965 }
931 966
932 spin_unlock_bh(&neigh->ifinfo_lock); 967 spin_unlock_bh(&neigh->ifinfo_lock);
@@ -937,7 +972,7 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv,
937 * @bat_priv: the bat priv with all the soft interface information 972 * @bat_priv: the bat priv with all the soft interface information
938 * @orig_node: orig node which is to be checked 973 * @orig_node: orig node which is to be checked
939 * 974 *
940 * Returns true if any ifinfo entry was purged, false otherwise. 975 * Return: true if any ifinfo entry was purged, false otherwise.
941 */ 976 */
942static bool 977static bool
943batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv, 978batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv,
@@ -972,10 +1007,10 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv,
972 ifinfo_purged = true; 1007 ifinfo_purged = true;
973 1008
974 hlist_del_rcu(&orig_ifinfo->list); 1009 hlist_del_rcu(&orig_ifinfo->list);
975 batadv_orig_ifinfo_free_ref(orig_ifinfo); 1010 batadv_orig_ifinfo_put(orig_ifinfo);
976 if (orig_node->last_bonding_candidate == orig_ifinfo) { 1011 if (orig_node->last_bonding_candidate == orig_ifinfo) {
977 orig_node->last_bonding_candidate = NULL; 1012 orig_node->last_bonding_candidate = NULL;
978 batadv_orig_ifinfo_free_ref(orig_ifinfo); 1013 batadv_orig_ifinfo_put(orig_ifinfo);
979 } 1014 }
980 } 1015 }
981 1016
@@ -989,7 +1024,7 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv,
989 * @bat_priv: the bat priv with all the soft interface information 1024 * @bat_priv: the bat priv with all the soft interface information
990 * @orig_node: orig node which is to be checked 1025 * @orig_node: orig node which is to be checked
991 * 1026 *
992 * Returns true if any neighbor was purged, false otherwise 1027 * Return: true if any neighbor was purged, false otherwise
993 */ 1028 */
994static bool 1029static bool
995batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, 1030batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
@@ -1029,7 +1064,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
1029 neigh_purged = true; 1064 neigh_purged = true;
1030 1065
1031 hlist_del_rcu(&neigh_node->list); 1066 hlist_del_rcu(&neigh_node->list);
1032 batadv_neigh_node_free_ref(neigh_node); 1067 batadv_neigh_node_put(neigh_node);
1033 } else { 1068 } else {
1034 /* only necessary if not the whole neighbor is to be 1069 /* only necessary if not the whole neighbor is to be
1035 * deleted, but some interface has been removed. 1070 * deleted, but some interface has been removed.
@@ -1048,7 +1083,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
1048 * @orig_node: orig node which is to be checked 1083 * @orig_node: orig node which is to be checked
1049 * @if_outgoing: the interface for which the metric should be compared 1084 * @if_outgoing: the interface for which the metric should be compared
1050 * 1085 *
1051 * Returns the current best neighbor, with refcount increased. 1086 * Return: the current best neighbor, with refcount increased.
1052 */ 1087 */
1053static struct batadv_neigh_node * 1088static struct batadv_neigh_node *
1054batadv_find_best_neighbor(struct batadv_priv *bat_priv, 1089batadv_find_best_neighbor(struct batadv_priv *bat_priv,
@@ -1064,11 +1099,11 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv,
1064 best, if_outgoing) <= 0)) 1099 best, if_outgoing) <= 0))
1065 continue; 1100 continue;
1066 1101
1067 if (!atomic_inc_not_zero(&neigh->refcount)) 1102 if (!kref_get_unless_zero(&neigh->refcount))
1068 continue; 1103 continue;
1069 1104
1070 if (best) 1105 if (best)
1071 batadv_neigh_node_free_ref(best); 1106 batadv_neigh_node_put(best);
1072 1107
1073 best = neigh; 1108 best = neigh;
1074 } 1109 }
@@ -1085,7 +1120,7 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv,
1085 * This function checks if the orig_node or substructures of it have become 1120 * This function checks if the orig_node or substructures of it have become
1086 * obsolete, and purges this information if that's the case. 1121 * obsolete, and purges this information if that's the case.
1087 * 1122 *
1088 * Returns true if the orig_node is to be removed, false otherwise. 1123 * Return: true if the orig_node is to be removed, false otherwise.
1089 */ 1124 */
1090static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, 1125static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
1091 struct batadv_orig_node *orig_node) 1126 struct batadv_orig_node *orig_node)
@@ -1114,7 +1149,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
1114 batadv_update_route(bat_priv, orig_node, BATADV_IF_DEFAULT, 1149 batadv_update_route(bat_priv, orig_node, BATADV_IF_DEFAULT,
1115 best_neigh_node); 1150 best_neigh_node);
1116 if (best_neigh_node) 1151 if (best_neigh_node)
1117 batadv_neigh_node_free_ref(best_neigh_node); 1152 batadv_neigh_node_put(best_neigh_node);
1118 1153
1119 /* ... then for all other interfaces. */ 1154 /* ... then for all other interfaces. */
1120 rcu_read_lock(); 1155 rcu_read_lock();
@@ -1131,7 +1166,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
1131 batadv_update_route(bat_priv, orig_node, hard_iface, 1166 batadv_update_route(bat_priv, orig_node, hard_iface,
1132 best_neigh_node); 1167 best_neigh_node);
1133 if (best_neigh_node) 1168 if (best_neigh_node)
1134 batadv_neigh_node_free_ref(best_neigh_node); 1169 batadv_neigh_node_put(best_neigh_node);
1135 } 1170 }
1136 rcu_read_unlock(); 1171 rcu_read_unlock();
1137 1172
@@ -1164,7 +1199,7 @@ static void _batadv_purge_orig(struct batadv_priv *bat_priv)
1164 batadv_tt_global_del_orig(orig_node->bat_priv, 1199 batadv_tt_global_del_orig(orig_node->bat_priv,
1165 orig_node, -1, 1200 orig_node, -1,
1166 "originator timed out"); 1201 "originator timed out");
1167 batadv_orig_node_free_ref(orig_node); 1202 batadv_orig_node_put(orig_node);
1168 continue; 1203 continue;
1169 } 1204 }
1170 1205
@@ -1210,7 +1245,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
1210 primary_if->net_dev->dev_addr, net_dev->name, 1245 primary_if->net_dev->dev_addr, net_dev->name,
1211 bat_priv->bat_algo_ops->name); 1246 bat_priv->bat_algo_ops->name);
1212 1247
1213 batadv_hardif_free_ref(primary_if); 1248 batadv_hardif_put(primary_if);
1214 1249
1215 if (!bat_priv->bat_algo_ops->bat_orig_print) { 1250 if (!bat_priv->bat_algo_ops->bat_orig_print) {
1216 seq_puts(seq, 1251 seq_puts(seq,
@@ -1230,7 +1265,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
1230 * @seq: debugfs table seq_file struct 1265 * @seq: debugfs table seq_file struct
1231 * @offset: not used 1266 * @offset: not used
1232 * 1267 *
1233 * Returns 0 1268 * Return: 0
1234 */ 1269 */
1235int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset) 1270int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset)
1236{ 1271{
@@ -1266,7 +1301,7 @@ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset)
1266 1301
1267out: 1302out:
1268 if (hard_iface) 1303 if (hard_iface)
1269 batadv_hardif_free_ref(hard_iface); 1304 batadv_hardif_put(hard_iface);
1270 return 0; 1305 return 0;
1271} 1306}
1272 1307
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index cf0730414ed2..4e8b67f11051 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -20,10 +20,10 @@
20 20
21#include "main.h" 21#include "main.h"
22 22
23#include <linux/atomic.h>
24#include <linux/compiler.h> 23#include <linux/compiler.h>
25#include <linux/if_ether.h> 24#include <linux/if_ether.h>
26#include <linux/jhash.h> 25#include <linux/jhash.h>
26#include <linux/kref.h>
27#include <linux/rculist.h> 27#include <linux/rculist.h>
28#include <linux/rcupdate.h> 28#include <linux/rcupdate.h>
29#include <linux/stddef.h> 29#include <linux/stddef.h>
@@ -37,19 +37,19 @@ int batadv_compare_orig(const struct hlist_node *node, const void *data2);
37int batadv_originator_init(struct batadv_priv *bat_priv); 37int batadv_originator_init(struct batadv_priv *bat_priv);
38void batadv_originator_free(struct batadv_priv *bat_priv); 38void batadv_originator_free(struct batadv_priv *bat_priv);
39void batadv_purge_orig_ref(struct batadv_priv *bat_priv); 39void batadv_purge_orig_ref(struct batadv_priv *bat_priv);
40void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node); 40void batadv_orig_node_put(struct batadv_orig_node *orig_node);
41struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, 41struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
42 const u8 *addr); 42 const u8 *addr);
43struct batadv_hardif_neigh_node * 43struct batadv_hardif_neigh_node *
44batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, 44batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
45 const u8 *neigh_addr); 45 const u8 *neigh_addr);
46void 46void
47batadv_hardif_neigh_free_ref(struct batadv_hardif_neigh_node *hardif_neigh); 47batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh);
48struct batadv_neigh_node * 48struct batadv_neigh_node *
49batadv_neigh_node_new(struct batadv_orig_node *orig_node, 49batadv_neigh_node_new(struct batadv_orig_node *orig_node,
50 struct batadv_hard_iface *hard_iface, 50 struct batadv_hard_iface *hard_iface,
51 const u8 *neigh_addr); 51 const u8 *neigh_addr);
52void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node); 52void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node);
53struct batadv_neigh_node * 53struct batadv_neigh_node *
54batadv_orig_router_get(struct batadv_orig_node *orig_node, 54batadv_orig_router_get(struct batadv_orig_node *orig_node,
55 const struct batadv_hard_iface *if_outgoing); 55 const struct batadv_hard_iface *if_outgoing);
@@ -59,7 +59,7 @@ batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh,
59struct batadv_neigh_ifinfo * 59struct batadv_neigh_ifinfo *
60batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, 60batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
61 struct batadv_hard_iface *if_outgoing); 61 struct batadv_hard_iface *if_outgoing);
62void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo); 62void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo);
63 63
64int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset); 64int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset);
65 65
@@ -69,7 +69,7 @@ batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node,
69struct batadv_orig_ifinfo * 69struct batadv_orig_ifinfo *
70batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, 70batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
71 struct batadv_hard_iface *if_outgoing); 71 struct batadv_hard_iface *if_outgoing);
72void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo); 72void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo);
73 73
74int batadv_orig_seq_print_text(struct seq_file *seq, void *offset); 74int batadv_orig_seq_print_text(struct seq_file *seq, void *offset);
75int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset); 75int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset);
@@ -83,7 +83,7 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
83struct batadv_orig_node_vlan * 83struct batadv_orig_node_vlan *
84batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node, 84batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
85 unsigned short vid); 85 unsigned short vid);
86void batadv_orig_node_vlan_free_ref(struct batadv_orig_node_vlan *orig_vlan); 86void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan);
87 87
88/* hashfunction to choose an entry in a hash table of given size 88/* hashfunction to choose an entry in a hash table of given size
89 * hash algorithm from http://en.wikipedia.org/wiki/Hash_table 89 * hash algorithm from http://en.wikipedia.org/wiki/Hash_table
@@ -115,7 +115,7 @@ batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data)
115 if (!batadv_compare_eth(orig_node, data)) 115 if (!batadv_compare_eth(orig_node, data))
116 continue; 116 continue;
117 117
118 if (!atomic_inc_not_zero(&orig_node->refcount)) 118 if (!kref_get_unless_zero(&orig_node->refcount))
119 continue; 119 continue;
120 120
121 orig_node_tmp = orig_node; 121 orig_node_tmp = orig_node;
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 0558e3237e0e..8a8d7ca1a5cf 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -26,6 +26,8 @@
26 * @BATADV_IV_OGM: originator messages for B.A.T.M.A.N. IV 26 * @BATADV_IV_OGM: originator messages for B.A.T.M.A.N. IV
27 * @BATADV_BCAST: broadcast packets carrying broadcast payload 27 * @BATADV_BCAST: broadcast packets carrying broadcast payload
28 * @BATADV_CODED: network coded packets 28 * @BATADV_CODED: network coded packets
29 * @BATADV_ELP: echo location packets for B.A.T.M.A.N. V
30 * @BATADV_OGM2: originator messages for B.A.T.M.A.N. V
29 * 31 *
30 * @BATADV_UNICAST: unicast packets carrying unicast payload traffic 32 * @BATADV_UNICAST: unicast packets carrying unicast payload traffic
31 * @BATADV_UNICAST_FRAG: unicast packets carrying a fragment of the original 33 * @BATADV_UNICAST_FRAG: unicast packets carrying a fragment of the original
@@ -40,6 +42,8 @@ enum batadv_packettype {
40 BATADV_IV_OGM = 0x00, 42 BATADV_IV_OGM = 0x00,
41 BATADV_BCAST = 0x01, 43 BATADV_BCAST = 0x01,
42 BATADV_CODED = 0x02, 44 BATADV_CODED = 0x02,
45 BATADV_ELP = 0x03,
46 BATADV_OGM2 = 0x04,
43 /* 0x40 - 0x7f: unicast */ 47 /* 0x40 - 0x7f: unicast */
44#define BATADV_UNICAST_MIN 0x40 48#define BATADV_UNICAST_MIN 0x40
45 BATADV_UNICAST = 0x40, 49 BATADV_UNICAST = 0x40,
@@ -158,7 +162,7 @@ enum batadv_tt_client_flags {
158}; 162};
159 163
160/** 164/**
161 * batadv_vlan_flags - flags for the four MSB of any vlan ID field 165 * enum batadv_vlan_flags - flags for the four MSB of any vlan ID field
162 * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not 166 * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not
163 */ 167 */
164enum batadv_vlan_flags { 168enum batadv_vlan_flags {
@@ -209,6 +213,11 @@ struct batadv_bla_claim_dst {
209 * @version: batman-adv protocol version, part of the genereal header 213 * @version: batman-adv protocol version, part of the genereal header
210 * @ttl: time to live for this packet, part of the genereal header 214 * @ttl: time to live for this packet, part of the genereal header
211 * @flags: contains routing relevant flags - see enum batadv_iv_flags 215 * @flags: contains routing relevant flags - see enum batadv_iv_flags
216 * @seqno: sequence identification
217 * @orig: address of the source node
218 * @prev_sender: address of the previous sender
219 * @reserved: reserved byte for alignment
220 * @tq: transmission quality
212 * @tvlv_len: length of tvlv data following the ogm header 221 * @tvlv_len: length of tvlv data following the ogm header
213 */ 222 */
214struct batadv_ogm_packet { 223struct batadv_ogm_packet {
@@ -230,7 +239,52 @@ struct batadv_ogm_packet {
230#define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet) 239#define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet)
231 240
232/** 241/**
233 * batadv_icmp_header - common members among all the ICMP packets 242 * struct batadv_ogm2_packet - ogm2 (routing protocol) packet
243 * @packet_type: batman-adv packet type, part of the general header
244 * @version: batman-adv protocol version, part of the general header
245 * @ttl: time to live for this packet, part of the general header
246 * @flags: reseved for routing relevant flags - currently always 0
247 * @seqno: sequence number
248 * @orig: originator mac address
249 * @tvlv_len: length of the appended tvlv buffer (in bytes)
250 * @throughput: the currently flooded path throughput
251 */
252struct batadv_ogm2_packet {
253 u8 packet_type;
254 u8 version;
255 u8 ttl;
256 u8 flags;
257 __be32 seqno;
258 u8 orig[ETH_ALEN];
259 __be16 tvlv_len;
260 __be32 throughput;
261 /* __packed is not needed as the struct size is divisible by 4,
262 * and the largest data type in this struct has a size of 4.
263 */
264};
265
266#define BATADV_OGM2_HLEN sizeof(struct batadv_ogm2_packet)
267
268/**
269 * struct batadv_elp_packet - elp (neighbor discovery) packet
270 * @packet_type: batman-adv packet type, part of the general header
271 * @version: batman-adv protocol version, part of the genereal header
272 * @orig: originator mac address
273 * @seqno: sequence number
274 * @elp_interval: currently used ELP sending interval in ms
275 */
276struct batadv_elp_packet {
277 u8 packet_type;
278 u8 version;
279 u8 orig[ETH_ALEN];
280 __be32 seqno;
281 __be32 elp_interval;
282};
283
284#define BATADV_ELP_HLEN sizeof(struct batadv_elp_packet)
285
286/**
287 * struct batadv_icmp_header - common members among all the ICMP packets
234 * @packet_type: batman-adv packet type, part of the general header 288 * @packet_type: batman-adv packet type, part of the general header
235 * @version: batman-adv protocol version, part of the genereal header 289 * @version: batman-adv protocol version, part of the genereal header
236 * @ttl: time to live for this packet, part of the genereal header 290 * @ttl: time to live for this packet, part of the genereal header
@@ -256,7 +310,7 @@ struct batadv_icmp_header {
256}; 310};
257 311
258/** 312/**
259 * batadv_icmp_packet - ICMP packet 313 * struct batadv_icmp_packet - ICMP packet
260 * @packet_type: batman-adv packet type, part of the general header 314 * @packet_type: batman-adv packet type, part of the general header
261 * @version: batman-adv protocol version, part of the genereal header 315 * @version: batman-adv protocol version, part of the genereal header
262 * @ttl: time to live for this packet, part of the genereal header 316 * @ttl: time to live for this packet, part of the genereal header
@@ -282,7 +336,7 @@ struct batadv_icmp_packet {
282#define BATADV_RR_LEN 16 336#define BATADV_RR_LEN 16
283 337
284/** 338/**
285 * batadv_icmp_packet_rr - ICMP RouteRecord packet 339 * struct batadv_icmp_packet_rr - ICMP RouteRecord packet
286 * @packet_type: batman-adv packet type, part of the general header 340 * @packet_type: batman-adv packet type, part of the general header
287 * @version: batman-adv protocol version, part of the genereal header 341 * @version: batman-adv protocol version, part of the genereal header
288 * @ttl: time to live for this packet, part of the genereal header 342 * @ttl: time to live for this packet, part of the genereal header
@@ -345,6 +399,7 @@ struct batadv_unicast_packet {
345 * @u: common unicast packet header 399 * @u: common unicast packet header
346 * @src: address of the source 400 * @src: address of the source
347 * @subtype: packet subtype 401 * @subtype: packet subtype
402 * @reserved: reserved byte for alignment
348 */ 403 */
349struct batadv_unicast_4addr_packet { 404struct batadv_unicast_4addr_packet {
350 struct batadv_unicast_packet u; 405 struct batadv_unicast_packet u;
@@ -413,7 +468,6 @@ struct batadv_bcast_packet {
413 * @packet_type: batman-adv packet type, part of the general header 468 * @packet_type: batman-adv packet type, part of the general header
414 * @version: batman-adv protocol version, part of the genereal header 469 * @version: batman-adv protocol version, part of the genereal header
415 * @ttl: time to live for this packet, part of the genereal header 470 * @ttl: time to live for this packet, part of the genereal header
416 * @reserved: Align following fields to 2-byte boundaries
417 * @first_source: original source of first included packet 471 * @first_source: original source of first included packet
418 * @first_orig_dest: original destinal of first included packet 472 * @first_orig_dest: original destinal of first included packet
419 * @first_crc: checksum of first included packet 473 * @first_crc: checksum of first included packet
@@ -495,7 +549,7 @@ struct batadv_tvlv_gateway_data {
495 * struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container 549 * struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container
496 * @flags: translation table flags (see batadv_tt_data_flags) 550 * @flags: translation table flags (see batadv_tt_data_flags)
497 * @ttvn: translation table version number 551 * @ttvn: translation table version number
498 * @vlan_num: number of announced VLANs. In the TVLV this struct is followed by 552 * @num_vlan: number of announced VLANs. In the TVLV this struct is followed by
499 * one batadv_tvlv_tt_vlan_data object per announced vlan 553 * one batadv_tvlv_tt_vlan_data object per announced vlan
500 */ 554 */
501struct batadv_tvlv_tt_data { 555struct batadv_tvlv_tt_data {
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index e4f2646d9246..b781bf753250 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -25,6 +25,7 @@
25#include <linux/etherdevice.h> 25#include <linux/etherdevice.h>
26#include <linux/if_ether.h> 26#include <linux/if_ether.h>
27#include <linux/jiffies.h> 27#include <linux/jiffies.h>
28#include <linux/kref.h>
28#include <linux/netdevice.h> 29#include <linux/netdevice.h>
29#include <linux/printk.h> 30#include <linux/printk.h>
30#include <linux/rculist.h> 31#include <linux/rculist.h>
@@ -72,7 +73,7 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
72 73
73 rcu_read_lock(); 74 rcu_read_lock();
74 curr_router = rcu_dereference(orig_ifinfo->router); 75 curr_router = rcu_dereference(orig_ifinfo->router);
75 if (curr_router && !atomic_inc_not_zero(&curr_router->refcount)) 76 if (curr_router && !kref_get_unless_zero(&curr_router->refcount))
76 curr_router = NULL; 77 curr_router = NULL;
77 rcu_read_unlock(); 78 rcu_read_unlock();
78 79
@@ -97,20 +98,29 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
97 } 98 }
98 99
99 if (curr_router) 100 if (curr_router)
100 batadv_neigh_node_free_ref(curr_router); 101 batadv_neigh_node_put(curr_router);
101 102
102 /* increase refcount of new best neighbor */ 103 /* increase refcount of new best neighbor */
103 if (neigh_node && !atomic_inc_not_zero(&neigh_node->refcount)) 104 if (neigh_node && !kref_get_unless_zero(&neigh_node->refcount))
104 neigh_node = NULL; 105 neigh_node = NULL;
105 106
106 spin_lock_bh(&orig_node->neigh_list_lock); 107 spin_lock_bh(&orig_node->neigh_list_lock);
108 /* curr_router used earlier may not be the current orig_ifinfo->router
109 * anymore because it was dereferenced outside of the neigh_list_lock
110 * protected region. After the new best neighbor has replace the current
111 * best neighbor the reference counter needs to decrease. Consequently,
112 * the code needs to ensure the curr_router variable contains a pointer
113 * to the replaced best neighbor.
114 */
115 curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
116
107 rcu_assign_pointer(orig_ifinfo->router, neigh_node); 117 rcu_assign_pointer(orig_ifinfo->router, neigh_node);
108 spin_unlock_bh(&orig_node->neigh_list_lock); 118 spin_unlock_bh(&orig_node->neigh_list_lock);
109 batadv_orig_ifinfo_free_ref(orig_ifinfo); 119 batadv_orig_ifinfo_put(orig_ifinfo);
110 120
111 /* decrease refcount of previous best neighbor */ 121 /* decrease refcount of previous best neighbor */
112 if (curr_router) 122 if (curr_router)
113 batadv_neigh_node_free_ref(curr_router); 123 batadv_neigh_node_put(curr_router);
114} 124}
115 125
116/** 126/**
@@ -137,24 +147,38 @@ void batadv_update_route(struct batadv_priv *bat_priv,
137 147
138out: 148out:
139 if (router) 149 if (router)
140 batadv_neigh_node_free_ref(router); 150 batadv_neigh_node_put(router);
141} 151}
142 152
143/* checks whether the host restarted and is in the protection time. 153/**
144 * returns: 154 * batadv_window_protected - checks whether the host restarted and is in the
145 * 0 if the packet is to be accepted 155 * protection time.
156 * @bat_priv: the bat priv with all the soft interface information
157 * @seq_num_diff: difference between the current/received sequence number and
158 * the last sequence number
159 * @seq_old_max_diff: maximum age of sequence number not considered as restart
160 * @last_reset: jiffies timestamp of the last reset, will be updated when reset
161 * is detected
162 * @protection_started: is set to true if the protection window was started,
163 * doesn't change otherwise.
164 *
165 * Return:
166 * 0 if the packet is to be accepted.
146 * 1 if the packet is to be ignored. 167 * 1 if the packet is to be ignored.
147 */ 168 */
148int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff, 169int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff,
149 unsigned long *last_reset) 170 s32 seq_old_max_diff, unsigned long *last_reset,
171 bool *protection_started)
150{ 172{
151 if (seq_num_diff <= -BATADV_TQ_LOCAL_WINDOW_SIZE || 173 if (seq_num_diff <= -seq_old_max_diff ||
152 seq_num_diff >= BATADV_EXPECTED_SEQNO_RANGE) { 174 seq_num_diff >= BATADV_EXPECTED_SEQNO_RANGE) {
153 if (!batadv_has_timed_out(*last_reset, 175 if (!batadv_has_timed_out(*last_reset,
154 BATADV_RESET_PROTECTION_MS)) 176 BATADV_RESET_PROTECTION_MS))
155 return 1; 177 return 1;
156 178
157 *last_reset = jiffies; 179 *last_reset = jiffies;
180 if (protection_started)
181 *protection_started = true;
158 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 182 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
159 "old packet received, start protection\n"); 183 "old packet received, start protection\n");
160 } 184 }
@@ -198,7 +222,7 @@ bool batadv_check_management_packet(struct sk_buff *skb,
198 * @bat_priv: the bat priv with all the soft interface information 222 * @bat_priv: the bat priv with all the soft interface information
199 * @skb: icmp packet to process 223 * @skb: icmp packet to process
200 * 224 *
201 * Returns NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP 225 * Return: NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP
202 * otherwise. 226 * otherwise.
203 */ 227 */
204static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, 228static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
@@ -254,9 +278,9 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
254 } 278 }
255out: 279out:
256 if (primary_if) 280 if (primary_if)
257 batadv_hardif_free_ref(primary_if); 281 batadv_hardif_put(primary_if);
258 if (orig_node) 282 if (orig_node)
259 batadv_orig_node_free_ref(orig_node); 283 batadv_orig_node_put(orig_node);
260 return ret; 284 return ret;
261} 285}
262 286
@@ -302,9 +326,9 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
302 326
303out: 327out:
304 if (primary_if) 328 if (primary_if)
305 batadv_hardif_free_ref(primary_if); 329 batadv_hardif_put(primary_if);
306 if (orig_node) 330 if (orig_node)
307 batadv_orig_node_free_ref(orig_node); 331 batadv_orig_node_put(orig_node);
308 return ret; 332 return ret;
309} 333}
310 334
@@ -388,7 +412,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
388 412
389out: 413out:
390 if (orig_node) 414 if (orig_node)
391 batadv_orig_node_free_ref(orig_node); 415 batadv_orig_node_put(orig_node);
392 return ret; 416 return ret;
393} 417}
394 418
@@ -398,10 +422,11 @@ out:
398 * @skb: packet to check 422 * @skb: packet to check
399 * @hdr_size: size of header to pull 423 * @hdr_size: size of header to pull
400 * 424 *
401 * Check for short header and bad addresses in given packet. Returns negative 425 * Check for short header and bad addresses in given packet.
402 * value when check fails and 0 otherwise. The negative value depends on the 426 *
403 * reason: -ENODATA for bad header, -EBADR for broadcast destination or source, 427 * Return: negative value when check fails and 0 otherwise. The negative value
404 * and -EREMOTE for non-local (other host) destination. 428 * depends on the reason: -ENODATA for bad header, -EBADR for broadcast
429 * destination or source, and -EREMOTE for non-local (other host) destination.
405 */ 430 */
406static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, 431static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
407 struct sk_buff *skb, int hdr_size) 432 struct sk_buff *skb, int hdr_size)
@@ -435,7 +460,7 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
435 * @orig_node: the destination node 460 * @orig_node: the destination node
436 * @recv_if: pointer to interface this packet was received on 461 * @recv_if: pointer to interface this packet was received on
437 * 462 *
438 * Returns the router which should be used for this orig_node on 463 * Return: the router which should be used for this orig_node on
439 * this interface, or NULL if not available. 464 * this interface, or NULL if not available.
440 */ 465 */
441struct batadv_neigh_node * 466struct batadv_neigh_node *
@@ -482,14 +507,14 @@ batadv_find_router(struct batadv_priv *bat_priv,
482 507
483 hlist_for_each_entry_rcu(cand, &orig_node->ifinfo_list, list) { 508 hlist_for_each_entry_rcu(cand, &orig_node->ifinfo_list, list) {
484 /* acquire some structures and references ... */ 509 /* acquire some structures and references ... */
485 if (!atomic_inc_not_zero(&cand->refcount)) 510 if (!kref_get_unless_zero(&cand->refcount))
486 continue; 511 continue;
487 512
488 cand_router = rcu_dereference(cand->router); 513 cand_router = rcu_dereference(cand->router);
489 if (!cand_router) 514 if (!cand_router)
490 goto next; 515 goto next;
491 516
492 if (!atomic_inc_not_zero(&cand_router->refcount)) { 517 if (!kref_get_unless_zero(&cand_router->refcount)) {
493 cand_router = NULL; 518 cand_router = NULL;
494 goto next; 519 goto next;
495 } 520 }
@@ -508,8 +533,8 @@ batadv_find_router(struct batadv_priv *bat_priv,
508 533
509 /* mark the first possible candidate */ 534 /* mark the first possible candidate */
510 if (!first_candidate) { 535 if (!first_candidate) {
511 atomic_inc(&cand_router->refcount); 536 kref_get(&cand_router->refcount);
512 atomic_inc(&cand->refcount); 537 kref_get(&cand->refcount);
513 first_candidate = cand; 538 first_candidate = cand;
514 first_candidate_router = cand_router; 539 first_candidate_router = cand_router;
515 } 540 }
@@ -529,16 +554,16 @@ batadv_find_router(struct batadv_priv *bat_priv,
529next: 554next:
530 /* free references */ 555 /* free references */
531 if (cand_router) { 556 if (cand_router) {
532 batadv_neigh_node_free_ref(cand_router); 557 batadv_neigh_node_put(cand_router);
533 cand_router = NULL; 558 cand_router = NULL;
534 } 559 }
535 batadv_orig_ifinfo_free_ref(cand); 560 batadv_orig_ifinfo_put(cand);
536 } 561 }
537 rcu_read_unlock(); 562 rcu_read_unlock();
538 563
539 /* last_bonding_candidate is reset below, remove the old reference. */ 564 /* last_bonding_candidate is reset below, remove the old reference. */
540 if (orig_node->last_bonding_candidate) 565 if (orig_node->last_bonding_candidate)
541 batadv_orig_ifinfo_free_ref(orig_node->last_bonding_candidate); 566 batadv_orig_ifinfo_put(orig_node->last_bonding_candidate);
542 567
543 /* After finding candidates, handle the three cases: 568 /* After finding candidates, handle the three cases:
544 * 1) there is a next candidate, use that 569 * 1) there is a next candidate, use that
@@ -546,17 +571,17 @@ next:
546 * 3) there is no candidate at all, return the default router 571 * 3) there is no candidate at all, return the default router
547 */ 572 */
548 if (next_candidate) { 573 if (next_candidate) {
549 batadv_neigh_node_free_ref(router); 574 batadv_neigh_node_put(router);
550 575
551 /* remove references to first candidate, we don't need it. */ 576 /* remove references to first candidate, we don't need it. */
552 if (first_candidate) { 577 if (first_candidate) {
553 batadv_neigh_node_free_ref(first_candidate_router); 578 batadv_neigh_node_put(first_candidate_router);
554 batadv_orig_ifinfo_free_ref(first_candidate); 579 batadv_orig_ifinfo_put(first_candidate);
555 } 580 }
556 router = next_candidate_router; 581 router = next_candidate_router;
557 orig_node->last_bonding_candidate = next_candidate; 582 orig_node->last_bonding_candidate = next_candidate;
558 } else if (first_candidate) { 583 } else if (first_candidate) {
559 batadv_neigh_node_free_ref(router); 584 batadv_neigh_node_put(router);
560 585
561 /* refcounting has already been done in the loop above. */ 586 /* refcounting has already been done in the loop above. */
562 router = first_candidate_router; 587 router = first_candidate_router;
@@ -633,7 +658,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
633 658
634out: 659out:
635 if (orig_node) 660 if (orig_node)
636 batadv_orig_node_free_ref(orig_node); 661 batadv_orig_node_put(orig_node);
637 return ret; 662 return ret;
638} 663}
639 664
@@ -648,7 +673,7 @@ out:
648 * the new corresponding information (originator address where the destination 673 * the new corresponding information (originator address where the destination
649 * client currently is and its known TTVN) 674 * client currently is and its known TTVN)
650 * 675 *
651 * Returns true if the packet header has been updated, false otherwise 676 * Return: true if the packet header has been updated, false otherwise
652 */ 677 */
653static bool 678static bool
654batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, 679batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
@@ -686,9 +711,9 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
686 ret = true; 711 ret = true;
687out: 712out:
688 if (primary_if) 713 if (primary_if)
689 batadv_hardif_free_ref(primary_if); 714 batadv_hardif_put(primary_if);
690 if (orig_node) 715 if (orig_node)
691 batadv_orig_node_free_ref(orig_node); 716 batadv_orig_node_put(orig_node);
692 717
693 return ret; 718 return ret;
694} 719}
@@ -752,7 +777,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
752 return 0; 777 return 0;
753 778
754 curr_ttvn = (u8)atomic_read(&orig_node->last_ttvn); 779 curr_ttvn = (u8)atomic_read(&orig_node->last_ttvn);
755 batadv_orig_node_free_ref(orig_node); 780 batadv_orig_node_put(orig_node);
756 } 781 }
757 782
758 /* check if the TTVN contained in the packet is fresher than what the 783 /* check if the TTVN contained in the packet is fresher than what the
@@ -792,7 +817,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
792 817
793 ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr); 818 ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr);
794 819
795 batadv_hardif_free_ref(primary_if); 820 batadv_hardif_put(primary_if);
796 821
797 unicast_packet->ttvn = curr_ttvn; 822 unicast_packet->ttvn = curr_ttvn;
798 823
@@ -805,7 +830,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
805 * @skb: unicast tvlv packet to process 830 * @skb: unicast tvlv packet to process
806 * @recv_if: pointer to interface this packet was received on 831 * @recv_if: pointer to interface this packet was received on
807 * 832 *
808 * Returns NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP 833 * Return: NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP
809 * otherwise. 834 * otherwise.
810 */ 835 */
811int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb, 836int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb,
@@ -892,7 +917,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
892 917
893rx_success: 918rx_success:
894 if (orig_node) 919 if (orig_node)
895 batadv_orig_node_free_ref(orig_node); 920 batadv_orig_node_put(orig_node);
896 921
897 return NET_RX_SUCCESS; 922 return NET_RX_SUCCESS;
898 } 923 }
@@ -904,9 +929,8 @@ rx_success:
904 * batadv_recv_unicast_tvlv - receive and process unicast tvlv packets 929 * batadv_recv_unicast_tvlv - receive and process unicast tvlv packets
905 * @skb: unicast tvlv packet to process 930 * @skb: unicast tvlv packet to process
906 * @recv_if: pointer to interface this packet was received on 931 * @recv_if: pointer to interface this packet was received on
907 * @dst_addr: the payload destination
908 * 932 *
909 * Returns NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP 933 * Return: NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP
910 * otherwise. 934 * otherwise.
911 */ 935 */
912int batadv_recv_unicast_tvlv(struct sk_buff *skb, 936int batadv_recv_unicast_tvlv(struct sk_buff *skb,
@@ -960,7 +984,7 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb,
960 * the assembled packet will exceed our MTU; 2) Buffer fragment, if we till 984 * the assembled packet will exceed our MTU; 2) Buffer fragment, if we till
961 * lack further fragments; 3) Merge fragments, if we have all needed parts. 985 * lack further fragments; 3) Merge fragments, if we have all needed parts.
962 * 986 *
963 * Return NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise. 987 * Return: NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise.
964 */ 988 */
965int batadv_recv_frag_packet(struct sk_buff *skb, 989int batadv_recv_frag_packet(struct sk_buff *skb,
966 struct batadv_hard_iface *recv_if) 990 struct batadv_hard_iface *recv_if)
@@ -1004,7 +1028,7 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
1004 1028
1005out: 1029out:
1006 if (orig_node_src) 1030 if (orig_node_src)
1007 batadv_orig_node_free_ref(orig_node_src); 1031 batadv_orig_node_put(orig_node_src);
1008 1032
1009 return ret; 1033 return ret;
1010} 1034}
@@ -1065,7 +1089,8 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
1065 1089
1066 /* check whether the packet is old and the host just restarted. */ 1090 /* check whether the packet is old and the host just restarted. */
1067 if (batadv_window_protected(bat_priv, seq_diff, 1091 if (batadv_window_protected(bat_priv, seq_diff,
1068 &orig_node->bcast_seqno_reset)) 1092 BATADV_BCAST_MAX_AGE,
1093 &orig_node->bcast_seqno_reset, NULL))
1069 goto spin_unlock; 1094 goto spin_unlock;
1070 1095
1071 /* mark broadcast in flood history, update window position 1096 /* mark broadcast in flood history, update window position
@@ -1108,6 +1133,6 @@ spin_unlock:
1108 spin_unlock_bh(&orig_node->bcast_seqno_lock); 1133 spin_unlock_bh(&orig_node->bcast_seqno_lock);
1109out: 1134out:
1110 if (orig_node) 1135 if (orig_node)
1111 batadv_orig_node_free_ref(orig_node); 1136 batadv_orig_node_put(orig_node);
1112 return ret; 1137 return ret;
1113} 1138}
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 204bbe4952a6..02a5caa84127 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -52,6 +52,7 @@ batadv_find_router(struct batadv_priv *bat_priv,
52 struct batadv_orig_node *orig_node, 52 struct batadv_orig_node *orig_node,
53 struct batadv_hard_iface *recv_if); 53 struct batadv_hard_iface *recv_if);
54int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff, 54int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff,
55 unsigned long *last_reset); 55 s32 seq_old_max_diff, unsigned long *last_reset,
56 bool *protection_started);
56 57
57#endif /* _NET_BATMAN_ADV_ROUTING_H_ */ 58#endif /* _NET_BATMAN_ADV_ROUTING_H_ */
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 782fa33ec296..76417850d3fc 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -49,16 +49,30 @@
49 49
50static void batadv_send_outstanding_bcast_packet(struct work_struct *work); 50static void batadv_send_outstanding_bcast_packet(struct work_struct *work);
51 51
52/* send out an already prepared packet to the given address via the 52/**
53 * specified batman interface 53 * batadv_send_skb_packet - send an already prepared packet
54 * @skb: the packet to send
55 * @hard_iface: the interface to use to send the broadcast packet
56 * @dst_addr: the payload destination
57 *
58 * Send out an already prepared packet to the given neighbor or broadcast it
59 * using the specified interface. Either hard_iface or neigh_node must be not
60 * NULL.
61 * If neigh_node is NULL, then the packet is broadcasted using hard_iface,
62 * otherwise it is sent as unicast to the given neighbor.
63 *
64 * Return: NET_TX_DROP in case of error or the result of dev_queue_xmit(skb)
65 * otherwise
54 */ 66 */
55int batadv_send_skb_packet(struct sk_buff *skb, 67int batadv_send_skb_packet(struct sk_buff *skb,
56 struct batadv_hard_iface *hard_iface, 68 struct batadv_hard_iface *hard_iface,
57 const u8 *dst_addr) 69 const u8 *dst_addr)
58{ 70{
59 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); 71 struct batadv_priv *bat_priv;
60 struct ethhdr *ethhdr; 72 struct ethhdr *ethhdr;
61 73
74 bat_priv = netdev_priv(hard_iface->soft_iface);
75
62 if (hard_iface->if_status != BATADV_IF_ACTIVE) 76 if (hard_iface->if_status != BATADV_IF_ACTIVE)
63 goto send_skb_err; 77 goto send_skb_err;
64 78
@@ -100,6 +114,35 @@ send_skb_err:
100 return NET_XMIT_DROP; 114 return NET_XMIT_DROP;
101} 115}
102 116
117int batadv_send_broadcast_skb(struct sk_buff *skb,
118 struct batadv_hard_iface *hard_iface)
119{
120 return batadv_send_skb_packet(skb, hard_iface, batadv_broadcast_addr);
121}
122
123int batadv_send_unicast_skb(struct sk_buff *skb,
124 struct batadv_neigh_node *neigh)
125{
126#ifdef CONFIG_BATMAN_ADV_BATMAN_V
127 struct batadv_hardif_neigh_node *hardif_neigh;
128#endif
129 int ret;
130
131 ret = batadv_send_skb_packet(skb, neigh->if_incoming, neigh->addr);
132
133#ifdef CONFIG_BATMAN_ADV_BATMAN_V
134 hardif_neigh = batadv_hardif_neigh_get(neigh->if_incoming, neigh->addr);
135
136 if ((hardif_neigh) && (ret != NET_XMIT_DROP))
137 hardif_neigh->bat_v.last_unicast_tx = jiffies;
138
139 if (hardif_neigh)
140 batadv_hardif_neigh_put(hardif_neigh);
141#endif
142
143 return ret;
144}
145
103/** 146/**
104 * batadv_send_skb_to_orig - Lookup next-hop and transmit skb. 147 * batadv_send_skb_to_orig - Lookup next-hop and transmit skb.
105 * @skb: Packet to be transmitted. 148 * @skb: Packet to be transmitted.
@@ -111,7 +154,7 @@ send_skb_err:
111 * host, NULL can be passed as recv_if and no interface alternating is 154 * host, NULL can be passed as recv_if and no interface alternating is
112 * attempted. 155 * attempted.
113 * 156 *
114 * Returns NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or 157 * Return: NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or
115 * NET_XMIT_POLICED if the skb is buffered for later transmit. 158 * NET_XMIT_POLICED if the skb is buffered for later transmit.
116 */ 159 */
117int batadv_send_skb_to_orig(struct sk_buff *skb, 160int batadv_send_skb_to_orig(struct sk_buff *skb,
@@ -146,14 +189,13 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
146 if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) { 189 if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) {
147 ret = NET_XMIT_POLICED; 190 ret = NET_XMIT_POLICED;
148 } else { 191 } else {
149 batadv_send_skb_packet(skb, neigh_node->if_incoming, 192 batadv_send_unicast_skb(skb, neigh_node);
150 neigh_node->addr);
151 ret = NET_XMIT_SUCCESS; 193 ret = NET_XMIT_SUCCESS;
152 } 194 }
153 195
154out: 196out:
155 if (neigh_node) 197 if (neigh_node)
156 batadv_neigh_node_free_ref(neigh_node); 198 batadv_neigh_node_put(neigh_node);
157 199
158 return ret; 200 return ret;
159} 201}
@@ -165,7 +207,7 @@ out:
165 * @hdr_size: amount of bytes to push at the beginning of the skb 207 * @hdr_size: amount of bytes to push at the beginning of the skb
166 * @orig_node: the destination node 208 * @orig_node: the destination node
167 * 209 *
168 * Returns false if the buffer extension was not possible or true otherwise. 210 * Return: false if the buffer extension was not possible or true otherwise.
169 */ 211 */
170static bool 212static bool
171batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size, 213batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size,
@@ -196,7 +238,7 @@ batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size,
196 * @skb: the skb containing the payload to encapsulate 238 * @skb: the skb containing the payload to encapsulate
197 * @orig_node: the destination node 239 * @orig_node: the destination node
198 * 240 *
199 * Returns false if the payload could not be encapsulated or true otherwise. 241 * Return: false if the payload could not be encapsulated or true otherwise.
200 */ 242 */
201static bool batadv_send_skb_prepare_unicast(struct sk_buff *skb, 243static bool batadv_send_skb_prepare_unicast(struct sk_buff *skb,
202 struct batadv_orig_node *orig_node) 244 struct batadv_orig_node *orig_node)
@@ -211,10 +253,10 @@ static bool batadv_send_skb_prepare_unicast(struct sk_buff *skb,
211 * unicast 4addr header 253 * unicast 4addr header
212 * @bat_priv: the bat priv with all the soft interface information 254 * @bat_priv: the bat priv with all the soft interface information
213 * @skb: the skb containing the payload to encapsulate 255 * @skb: the skb containing the payload to encapsulate
214 * @orig_node: the destination node 256 * @orig: the destination node
215 * @packet_subtype: the unicast 4addr packet subtype to use 257 * @packet_subtype: the unicast 4addr packet subtype to use
216 * 258 *
217 * Returns false if the payload could not be encapsulated or true otherwise. 259 * Return: false if the payload could not be encapsulated or true otherwise.
218 */ 260 */
219bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv, 261bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv,
220 struct sk_buff *skb, 262 struct sk_buff *skb,
@@ -246,7 +288,7 @@ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv,
246 ret = true; 288 ret = true;
247out: 289out:
248 if (primary_if) 290 if (primary_if)
249 batadv_hardif_free_ref(primary_if); 291 batadv_hardif_put(primary_if);
250 return ret; 292 return ret;
251} 293}
252 294
@@ -265,7 +307,7 @@ out:
265 * as packet_type. Then send this frame to the given orig_node and release a 307 * as packet_type. Then send this frame to the given orig_node and release a
266 * reference to this orig_node. 308 * reference to this orig_node.
267 * 309 *
268 * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. 310 * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
269 */ 311 */
270int batadv_send_skb_unicast(struct batadv_priv *bat_priv, 312int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
271 struct sk_buff *skb, int packet_type, 313 struct sk_buff *skb, int packet_type,
@@ -317,7 +359,7 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
317 359
318out: 360out:
319 if (orig_node) 361 if (orig_node)
320 batadv_orig_node_free_ref(orig_node); 362 batadv_orig_node_put(orig_node);
321 if (ret == NET_XMIT_DROP) 363 if (ret == NET_XMIT_DROP)
322 kfree_skb(skb); 364 kfree_skb(skb);
323 return ret; 365 return ret;
@@ -339,7 +381,7 @@ out:
339 * BATADV_UNICAST_4ADDR was supplied as packet_type. Then send this frame 381 * BATADV_UNICAST_4ADDR was supplied as packet_type. Then send this frame
340 * to the according destination node. 382 * to the according destination node.
341 * 383 *
342 * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. 384 * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
343 */ 385 */
344int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, 386int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
345 struct sk_buff *skb, int packet_type, 387 struct sk_buff *skb, int packet_type,
@@ -373,7 +415,7 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
373 * Look up the currently selected gateway. Wrap the given skb into a batman-adv 415 * Look up the currently selected gateway. Wrap the given skb into a batman-adv
374 * unicast header and send this frame to this gateway node. 416 * unicast header and send this frame to this gateway node.
375 * 417 *
376 * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. 418 * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
377 */ 419 */
378int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, 420int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
379 unsigned short vid) 421 unsigned short vid)
@@ -409,9 +451,9 @@ static void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet)
409{ 451{
410 kfree_skb(forw_packet->skb); 452 kfree_skb(forw_packet->skb);
411 if (forw_packet->if_incoming) 453 if (forw_packet->if_incoming)
412 batadv_hardif_free_ref(forw_packet->if_incoming); 454 batadv_hardif_put(forw_packet->if_incoming);
413 if (forw_packet->if_outgoing) 455 if (forw_packet->if_outgoing)
414 batadv_hardif_free_ref(forw_packet->if_outgoing); 456 batadv_hardif_put(forw_packet->if_outgoing);
415 kfree(forw_packet); 457 kfree(forw_packet);
416} 458}
417 459
@@ -430,14 +472,19 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
430 send_time); 472 send_time);
431} 473}
432 474
433/* add a broadcast packet to the queue and setup timers. broadcast packets 475/**
434 * are sent multiple times to increase probability for being received. 476 * batadv_add_bcast_packet_to_list - queue broadcast packet for multiple sends
477 * @bat_priv: the bat priv with all the soft interface information
478 * @skb: broadcast packet to add
479 * @delay: number of jiffies to wait before sending
435 * 480 *
436 * This function returns NETDEV_TX_OK on success and NETDEV_TX_BUSY on 481 * add a broadcast packet to the queue and setup timers. broadcast packets
437 * errors. 482 * are sent multiple times to increase probability for being received.
438 * 483 *
439 * The skb is not consumed, so the caller should make sure that the 484 * The skb is not consumed, so the caller should make sure that the
440 * skb is freed. 485 * skb is freed.
486 *
487 * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors.
441 */ 488 */
442int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, 489int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
443 const struct sk_buff *skb, 490 const struct sk_buff *skb,
@@ -492,7 +539,7 @@ out_and_inc:
492 atomic_inc(&bat_priv->bcast_queue_left); 539 atomic_inc(&bat_priv->bcast_queue_left);
493out: 540out:
494 if (primary_if) 541 if (primary_if)
495 batadv_hardif_free_ref(primary_if); 542 batadv_hardif_put(primary_if);
496 return NETDEV_TX_BUSY; 543 return NETDEV_TX_BUSY;
497} 544}
498 545
@@ -533,8 +580,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
533 /* send a copy of the saved skb */ 580 /* send a copy of the saved skb */
534 skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC); 581 skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC);
535 if (skb1) 582 if (skb1)
536 batadv_send_skb_packet(skb1, hard_iface, 583 batadv_send_broadcast_skb(skb1, hard_iface);
537 batadv_broadcast_addr);
538 } 584 }
539 rcu_read_unlock(); 585 rcu_read_unlock();
540 586
@@ -629,6 +675,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
629 675
630 if (pending) { 676 if (pending) {
631 hlist_del(&forw_packet->list); 677 hlist_del(&forw_packet->list);
678 if (!forw_packet->own)
679 atomic_inc(&bat_priv->bcast_queue_left);
680
632 batadv_forw_packet_free(forw_packet); 681 batadv_forw_packet_free(forw_packet);
633 } 682 }
634 } 683 }
@@ -656,6 +705,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
656 705
657 if (pending) { 706 if (pending) {
658 hlist_del(&forw_packet->list); 707 hlist_del(&forw_packet->list);
708 if (!forw_packet->own)
709 atomic_inc(&bat_priv->batman_queue_left);
710
659 batadv_forw_packet_free(forw_packet); 711 batadv_forw_packet_free(forw_packet);
660 } 712 }
661 } 713 }
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 82059f259e46..6fd7270d8ce6 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -28,12 +28,16 @@
28struct sk_buff; 28struct sk_buff;
29struct work_struct; 29struct work_struct;
30 30
31int batadv_send_skb_packet(struct sk_buff *skb,
32 struct batadv_hard_iface *hard_iface,
33 const u8 *dst_addr);
34int batadv_send_skb_to_orig(struct sk_buff *skb, 31int batadv_send_skb_to_orig(struct sk_buff *skb,
35 struct batadv_orig_node *orig_node, 32 struct batadv_orig_node *orig_node,
36 struct batadv_hard_iface *recv_if); 33 struct batadv_hard_iface *recv_if);
34int batadv_send_skb_packet(struct sk_buff *skb,
35 struct batadv_hard_iface *hard_iface,
36 const u8 *dst_addr);
37int batadv_send_broadcast_skb(struct sk_buff *skb,
38 struct batadv_hard_iface *hard_iface);
39int batadv_send_unicast_skb(struct sk_buff *skb,
40 struct batadv_neigh_node *neigh_node);
37void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface); 41void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface);
38int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, 42int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
39 const struct sk_buff *skb, 43 const struct sk_buff *skb,
@@ -69,7 +73,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
69 * header via the translation table. Wrap the given skb into a batman-adv 73 * header via the translation table. Wrap the given skb into a batman-adv
70 * unicast header. Then send this frame to the according destination node. 74 * unicast header. Then send this frame to the according destination node.
71 * 75 *
72 * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. 76 * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
73 */ 77 */
74static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv, 78static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv,
75 struct sk_buff *skb, u8 *dst_hint, 79 struct sk_buff *skb, u8 *dst_hint,
@@ -92,7 +96,7 @@ static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv,
92 * unicast-4addr header. Then send this frame to the according destination 96 * unicast-4addr header. Then send this frame to the according destination
93 * node. 97 * node.
94 * 98 *
95 * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. 99 * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
96 */ 100 */
97static inline int batadv_send_skb_via_tt_4addr(struct batadv_priv *bat_priv, 101static inline int batadv_send_skb_via_tt_4addr(struct batadv_priv *bat_priv,
98 struct sk_buff *skb, 102 struct sk_buff *skb,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index ac4d08de5df4..8a136b6a1ff0 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -30,6 +30,7 @@
30#include <linux/if_vlan.h> 30#include <linux/if_vlan.h>
31#include <linux/jiffies.h> 31#include <linux/jiffies.h>
32#include <linux/kernel.h> 32#include <linux/kernel.h>
33#include <linux/kref.h>
33#include <linux/list.h> 34#include <linux/list.h>
34#include <linux/lockdep.h> 35#include <linux/lockdep.h>
35#include <linux/netdevice.h> 36#include <linux/netdevice.h>
@@ -376,7 +377,7 @@ dropped_freed:
376 batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED); 377 batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED);
377end: 378end:
378 if (primary_if) 379 if (primary_if)
379 batadv_hardif_free_ref(primary_if); 380 batadv_hardif_put(primary_if);
380 return NETDEV_TX_OK; 381 return NETDEV_TX_OK;
381} 382}
382 383
@@ -407,11 +408,17 @@ void batadv_interface_rx(struct net_device *soft_iface,
407 */ 408 */
408 nf_reset(skb); 409 nf_reset(skb);
409 410
411 if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
412 goto dropped;
413
410 vid = batadv_get_vid(skb, 0); 414 vid = batadv_get_vid(skb, 0);
411 ethhdr = eth_hdr(skb); 415 ethhdr = eth_hdr(skb);
412 416
413 switch (ntohs(ethhdr->h_proto)) { 417 switch (ntohs(ethhdr->h_proto)) {
414 case ETH_P_8021Q: 418 case ETH_P_8021Q:
419 if (!pskb_may_pull(skb, VLAN_ETH_HLEN))
420 goto dropped;
421
415 vhdr = (struct vlan_ethhdr *)skb->data; 422 vhdr = (struct vlan_ethhdr *)skb->data;
416 423
417 if (vhdr->h_vlan_encapsulated_proto != ethertype) 424 if (vhdr->h_vlan_encapsulated_proto != ethertype)
@@ -423,8 +430,6 @@ void batadv_interface_rx(struct net_device *soft_iface,
423 } 430 }
424 431
425 /* skb->dev & skb->pkt_type are set here */ 432 /* skb->dev & skb->pkt_type are set here */
426 if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
427 goto dropped;
428 skb->protocol = eth_type_trans(skb, soft_iface); 433 skb->protocol = eth_type_trans(skb, soft_iface);
429 434
430 /* should not be necessary anymore as we use skb_pull_rcsum() 435 /* should not be necessary anymore as we use skb_pull_rcsum()
@@ -478,22 +483,34 @@ out:
478} 483}
479 484
480/** 485/**
481 * batadv_softif_vlan_free_ref - decrease the vlan object refcounter and 486 * batadv_softif_vlan_release - release vlan from lists and queue for free after
482 * possibly free it 487 * rcu grace period
483 * @softif_vlan: the vlan object to release 488 * @ref: kref pointer of the vlan object
489 */
490static void batadv_softif_vlan_release(struct kref *ref)
491{
492 struct batadv_softif_vlan *vlan;
493
494 vlan = container_of(ref, struct batadv_softif_vlan, refcount);
495
496 spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock);
497 hlist_del_rcu(&vlan->list);
498 spin_unlock_bh(&vlan->bat_priv->softif_vlan_list_lock);
499
500 kfree_rcu(vlan, rcu);
501}
502
503/**
504 * batadv_softif_vlan_put - decrease the vlan object refcounter and
505 * possibly release it
506 * @vlan: the vlan object to release
484 */ 507 */
485void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *vlan) 508void batadv_softif_vlan_put(struct batadv_softif_vlan *vlan)
486{ 509{
487 if (!vlan) 510 if (!vlan)
488 return; 511 return;
489 512
490 if (atomic_dec_and_test(&vlan->refcount)) { 513 kref_put(&vlan->refcount, batadv_softif_vlan_release);
491 spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock);
492 hlist_del_rcu(&vlan->list);
493 spin_unlock_bh(&vlan->bat_priv->softif_vlan_list_lock);
494
495 kfree_rcu(vlan, rcu);
496 }
497} 514}
498 515
499/** 516/**
@@ -501,7 +518,7 @@ void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *vlan)
501 * @bat_priv: the bat priv with all the soft interface information 518 * @bat_priv: the bat priv with all the soft interface information
502 * @vid: the identifier of the vlan object to retrieve 519 * @vid: the identifier of the vlan object to retrieve
503 * 520 *
504 * Returns the private data of the vlan matching the vid passed as argument or 521 * Return: the private data of the vlan matching the vid passed as argument or
505 * NULL otherwise. The refcounter of the returned object is incremented by 1. 522 * NULL otherwise. The refcounter of the returned object is incremented by 1.
506 */ 523 */
507struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, 524struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
@@ -514,7 +531,7 @@ struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
514 if (vlan_tmp->vid != vid) 531 if (vlan_tmp->vid != vid)
515 continue; 532 continue;
516 533
517 if (!atomic_inc_not_zero(&vlan_tmp->refcount)) 534 if (!kref_get_unless_zero(&vlan_tmp->refcount))
518 continue; 535 continue;
519 536
520 vlan = vlan_tmp; 537 vlan = vlan_tmp;
@@ -530,7 +547,7 @@ struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
530 * @bat_priv: the bat priv with all the soft interface information 547 * @bat_priv: the bat priv with all the soft interface information
531 * @vid: the VLAN identifier 548 * @vid: the VLAN identifier
532 * 549 *
533 * Returns 0 on success, a negative error otherwise. 550 * Return: 0 on success, a negative error otherwise.
534 */ 551 */
535int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) 552int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
536{ 553{
@@ -539,7 +556,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
539 556
540 vlan = batadv_softif_vlan_get(bat_priv, vid); 557 vlan = batadv_softif_vlan_get(bat_priv, vid);
541 if (vlan) { 558 if (vlan) {
542 batadv_softif_vlan_free_ref(vlan); 559 batadv_softif_vlan_put(vlan);
543 return -EEXIST; 560 return -EEXIST;
544 } 561 }
545 562
@@ -549,7 +566,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
549 566
550 vlan->bat_priv = bat_priv; 567 vlan->bat_priv = bat_priv;
551 vlan->vid = vid; 568 vlan->vid = vid;
552 atomic_set(&vlan->refcount, 1); 569 kref_init(&vlan->refcount);
553 570
554 atomic_set(&vlan->ap_isolation, 0); 571 atomic_set(&vlan->ap_isolation, 0);
555 572
@@ -588,18 +605,19 @@ static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv,
588 vlan->vid, "vlan interface destroyed", false); 605 vlan->vid, "vlan interface destroyed", false);
589 606
590 batadv_sysfs_del_vlan(bat_priv, vlan); 607 batadv_sysfs_del_vlan(bat_priv, vlan);
591 batadv_softif_vlan_free_ref(vlan); 608 batadv_softif_vlan_put(vlan);
592} 609}
593 610
594/** 611/**
595 * batadv_interface_add_vid - ndo_add_vid API implementation 612 * batadv_interface_add_vid - ndo_add_vid API implementation
596 * @dev: the netdev of the mesh interface 613 * @dev: the netdev of the mesh interface
614 * @proto: protocol of the the vlan id
597 * @vid: identifier of the new vlan 615 * @vid: identifier of the new vlan
598 * 616 *
599 * Set up all the internal structures for handling the new vlan on top of the 617 * Set up all the internal structures for handling the new vlan on top of the
600 * mesh interface 618 * mesh interface
601 * 619 *
602 * Returns 0 on success or a negative error code in case of failure. 620 * Return: 0 on success or a negative error code in case of failure.
603 */ 621 */
604static int batadv_interface_add_vid(struct net_device *dev, __be16 proto, 622static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
605 unsigned short vid) 623 unsigned short vid)
@@ -632,7 +650,7 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
632 if (!vlan->kobj) { 650 if (!vlan->kobj) {
633 ret = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan); 651 ret = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan);
634 if (ret) { 652 if (ret) {
635 batadv_softif_vlan_free_ref(vlan); 653 batadv_softif_vlan_put(vlan);
636 return ret; 654 return ret;
637 } 655 }
638 } 656 }
@@ -651,12 +669,13 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
651/** 669/**
652 * batadv_interface_kill_vid - ndo_kill_vid API implementation 670 * batadv_interface_kill_vid - ndo_kill_vid API implementation
653 * @dev: the netdev of the mesh interface 671 * @dev: the netdev of the mesh interface
672 * @proto: protocol of the the vlan id
654 * @vid: identifier of the deleted vlan 673 * @vid: identifier of the deleted vlan
655 * 674 *
656 * Destroy all the internal structures used to handle the vlan identified by vid 675 * Destroy all the internal structures used to handle the vlan identified by vid
657 * on top of the mesh interface 676 * on top of the mesh interface
658 * 677 *
659 * Returns 0 on success, -EINVAL if the specified prototype is not ETH_P_8021Q 678 * Return: 0 on success, -EINVAL if the specified prototype is not ETH_P_8021Q
660 * or -ENOENT if the specified vlan id wasn't registered. 679 * or -ENOENT if the specified vlan id wasn't registered.
661 */ 680 */
662static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto, 681static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto,
@@ -678,7 +697,7 @@ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto,
678 batadv_softif_destroy_vlan(bat_priv, vlan); 697 batadv_softif_destroy_vlan(bat_priv, vlan);
679 698
680 /* finally free the vlan object */ 699 /* finally free the vlan object */
681 batadv_softif_vlan_free_ref(vlan); 700 batadv_softif_vlan_put(vlan);
682 701
683 return 0; 702 return 0;
684} 703}
@@ -734,7 +753,7 @@ static void batadv_softif_destroy_finish(struct work_struct *work)
734 vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); 753 vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
735 if (vlan) { 754 if (vlan) {
736 batadv_softif_destroy_vlan(bat_priv, vlan); 755 batadv_softif_destroy_vlan(bat_priv, vlan);
737 batadv_softif_vlan_free_ref(vlan); 756 batadv_softif_vlan_put(vlan);
738 } 757 }
739 758
740 batadv_sysfs_del_meshif(soft_iface); 759 batadv_sysfs_del_meshif(soft_iface);
@@ -745,7 +764,7 @@ static void batadv_softif_destroy_finish(struct work_struct *work)
745 * batadv_softif_init_late - late stage initialization of soft interface 764 * batadv_softif_init_late - late stage initialization of soft interface
746 * @dev: registered network device to modify 765 * @dev: registered network device to modify
747 * 766 *
748 * Returns error code on failures 767 * Return: error code on failures
749 */ 768 */
750static int batadv_softif_init_late(struct net_device *dev) 769static int batadv_softif_init_late(struct net_device *dev)
751{ 770{
@@ -847,7 +866,7 @@ free_bat_counters:
847 * @dev: batadv_soft_interface used as master interface 866 * @dev: batadv_soft_interface used as master interface
848 * @slave_dev: net_device which should become the slave interface 867 * @slave_dev: net_device which should become the slave interface
849 * 868 *
850 * Return 0 if successful or error otherwise. 869 * Return: 0 if successful or error otherwise.
851 */ 870 */
852static int batadv_softif_slave_add(struct net_device *dev, 871static int batadv_softif_slave_add(struct net_device *dev,
853 struct net_device *slave_dev) 872 struct net_device *slave_dev)
@@ -863,7 +882,7 @@ static int batadv_softif_slave_add(struct net_device *dev,
863 882
864out: 883out:
865 if (hard_iface) 884 if (hard_iface)
866 batadv_hardif_free_ref(hard_iface); 885 batadv_hardif_put(hard_iface);
867 return ret; 886 return ret;
868} 887}
869 888
@@ -872,7 +891,7 @@ out:
872 * @dev: batadv_soft_interface used as master interface 891 * @dev: batadv_soft_interface used as master interface
873 * @slave_dev: net_device which should be removed from the master interface 892 * @slave_dev: net_device which should be removed from the master interface
874 * 893 *
875 * Return 0 if successful or error otherwise. 894 * Return: 0 if successful or error otherwise.
876 */ 895 */
877static int batadv_softif_slave_del(struct net_device *dev, 896static int batadv_softif_slave_del(struct net_device *dev,
878 struct net_device *slave_dev) 897 struct net_device *slave_dev)
@@ -890,7 +909,7 @@ static int batadv_softif_slave_del(struct net_device *dev,
890 909
891out: 910out:
892 if (hard_iface) 911 if (hard_iface)
893 batadv_hardif_free_ref(hard_iface); 912 batadv_hardif_put(hard_iface);
894 return ret; 913 return ret;
895} 914}
896 915
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 8e82176f40b1..9ae265703d23 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -34,7 +34,7 @@ void batadv_softif_destroy_sysfs(struct net_device *soft_iface);
34int batadv_softif_is_valid(const struct net_device *net_dev); 34int batadv_softif_is_valid(const struct net_device *net_dev);
35extern struct rtnl_link_ops batadv_link_ops; 35extern struct rtnl_link_ops batadv_link_ops;
36int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid); 36int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid);
37void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *softif_vlan); 37void batadv_softif_vlan_put(struct batadv_softif_vlan *softif_vlan);
38struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, 38struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
39 unsigned short vid); 39 unsigned short vid);
40 40
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index fe87777fda8a..e7cf51333a36 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2010-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -25,6 +25,7 @@
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/if.h> 26#include <linux/if.h>
27#include <linux/if_vlan.h> 27#include <linux/if_vlan.h>
28#include <linux/kref.h>
28#include <linux/kernel.h> 29#include <linux/kernel.h>
29#include <linux/netdevice.h> 30#include <linux/netdevice.h>
30#include <linux/printk.h> 31#include <linux/printk.h>
@@ -64,7 +65,7 @@ static struct batadv_priv *batadv_kobj_to_batpriv(struct kobject *obj)
64 * batadv_vlan_kobj_to_batpriv - convert a vlan kobj in the associated batpriv 65 * batadv_vlan_kobj_to_batpriv - convert a vlan kobj in the associated batpriv
65 * @obj: kobject to covert 66 * @obj: kobject to covert
66 * 67 *
67 * Returns the associated batadv_priv struct. 68 * Return: the associated batadv_priv struct.
68 */ 69 */
69static struct batadv_priv *batadv_vlan_kobj_to_batpriv(struct kobject *obj) 70static struct batadv_priv *batadv_vlan_kobj_to_batpriv(struct kobject *obj)
70{ 71{
@@ -82,9 +83,10 @@ static struct batadv_priv *batadv_vlan_kobj_to_batpriv(struct kobject *obj)
82 83
83/** 84/**
84 * batadv_kobj_to_vlan - convert a kobj in the associated softif_vlan struct 85 * batadv_kobj_to_vlan - convert a kobj in the associated softif_vlan struct
86 * @bat_priv: the bat priv with all the soft interface information
85 * @obj: kobject to covert 87 * @obj: kobject to covert
86 * 88 *
87 * Returns the associated softif_vlan struct if found, NULL otherwise. 89 * Return: the associated softif_vlan struct if found, NULL otherwise.
88 */ 90 */
89static struct batadv_softif_vlan * 91static struct batadv_softif_vlan *
90batadv_kobj_to_vlan(struct batadv_priv *bat_priv, struct kobject *obj) 92batadv_kobj_to_vlan(struct batadv_priv *bat_priv, struct kobject *obj)
@@ -96,7 +98,7 @@ batadv_kobj_to_vlan(struct batadv_priv *bat_priv, struct kobject *obj)
96 if (vlan_tmp->kobj != obj) 98 if (vlan_tmp->kobj != obj)
97 continue; 99 continue;
98 100
99 if (!atomic_inc_not_zero(&vlan_tmp->refcount)) 101 if (!kref_get_unless_zero(&vlan_tmp->refcount))
100 continue; 102 continue;
101 103
102 vlan = vlan_tmp; 104 vlan = vlan_tmp;
@@ -214,7 +216,7 @@ ssize_t batadv_store_vlan_##_name(struct kobject *kobj, \
214 attr, &vlan->_name, \ 216 attr, &vlan->_name, \
215 bat_priv->soft_iface); \ 217 bat_priv->soft_iface); \
216 \ 218 \
217 batadv_softif_vlan_free_ref(vlan); \ 219 batadv_softif_vlan_put(vlan); \
218 return res; \ 220 return res; \
219} 221}
220 222
@@ -229,7 +231,7 @@ ssize_t batadv_show_vlan_##_name(struct kobject *kobj, \
229 atomic_read(&vlan->_name) == 0 ? \ 231 atomic_read(&vlan->_name) == 0 ? \
230 "disabled" : "enabled"); \ 232 "disabled" : "enabled"); \
231 \ 233 \
232 batadv_softif_vlan_free_ref(vlan); \ 234 batadv_softif_vlan_put(vlan); \
233 return res; \ 235 return res; \
234} 236}
235 237
@@ -240,6 +242,55 @@ ssize_t batadv_show_vlan_##_name(struct kobject *kobj, \
240 static BATADV_ATTR_VLAN(_name, _mode, batadv_show_vlan_##_name, \ 242 static BATADV_ATTR_VLAN(_name, _mode, batadv_show_vlan_##_name, \
241 batadv_store_vlan_##_name) 243 batadv_store_vlan_##_name)
242 244
245#define BATADV_ATTR_HIF_STORE_UINT(_name, _var, _min, _max, _post_func) \
246ssize_t batadv_store_##_name(struct kobject *kobj, \
247 struct attribute *attr, char *buff, \
248 size_t count) \
249{ \
250 struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \
251 struct batadv_hard_iface *hard_iface; \
252 ssize_t length; \
253 \
254 hard_iface = batadv_hardif_get_by_netdev(net_dev); \
255 if (!hard_iface) \
256 return 0; \
257 \
258 length = __batadv_store_uint_attr(buff, count, _min, _max, \
259 _post_func, attr, \
260 &hard_iface->_var, net_dev); \
261 \
262 batadv_hardif_put(hard_iface); \
263 return length; \
264}
265
266#define BATADV_ATTR_HIF_SHOW_UINT(_name, _var) \
267ssize_t batadv_show_##_name(struct kobject *kobj, \
268 struct attribute *attr, char *buff) \
269{ \
270 struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \
271 struct batadv_hard_iface *hard_iface; \
272 ssize_t length; \
273 \
274 hard_iface = batadv_hardif_get_by_netdev(net_dev); \
275 if (!hard_iface) \
276 return 0; \
277 \
278 length = sprintf(buff, "%i\n", atomic_read(&hard_iface->_var)); \
279 \
280 batadv_hardif_put(hard_iface); \
281 return length; \
282}
283
284/* Use this, if you are going to set [name] in hard_iface to an
285 * unsigned integer value
286 */
287#define BATADV_ATTR_HIF_UINT(_name, _var, _mode, _min, _max, _post_func)\
288 static BATADV_ATTR_HIF_STORE_UINT(_name, _var, _min, \
289 _max, _post_func) \
290 static BATADV_ATTR_HIF_SHOW_UINT(_name, _var) \
291 static BATADV_ATTR(_name, _mode, batadv_show_##_name, \
292 batadv_store_##_name)
293
243static int batadv_store_bool_attr(char *buff, size_t count, 294static int batadv_store_bool_attr(char *buff, size_t count,
244 struct net_device *net_dev, 295 struct net_device *net_dev,
245 const char *attr_name, atomic_t *attr, 296 const char *attr_name, atomic_t *attr,
@@ -491,7 +542,7 @@ static ssize_t batadv_store_gw_bwidth(struct kobject *kobj,
491 * @attr: the batman-adv attribute the user is interacting with 542 * @attr: the batman-adv attribute the user is interacting with
492 * @buff: the buffer that will contain the data to send back to the user 543 * @buff: the buffer that will contain the data to send back to the user
493 * 544 *
494 * Returns the number of bytes written into 'buff' on success or a negative 545 * Return: the number of bytes written into 'buff' on success or a negative
495 * error code in case of failure 546 * error code in case of failure
496 */ 547 */
497static ssize_t batadv_show_isolation_mark(struct kobject *kobj, 548static ssize_t batadv_show_isolation_mark(struct kobject *kobj,
@@ -511,7 +562,7 @@ static ssize_t batadv_show_isolation_mark(struct kobject *kobj,
511 * @buff: the buffer containing the user data 562 * @buff: the buffer containing the user data
512 * @count: number of bytes in the buffer 563 * @count: number of bytes in the buffer
513 * 564 *
514 * Returns 'count' on success or a negative error code in case of failure 565 * Return: 'count' on success or a negative error code in case of failure
515 */ 566 */
516static ssize_t batadv_store_isolation_mark(struct kobject *kobj, 567static ssize_t batadv_store_isolation_mark(struct kobject *kobj,
517 struct attribute *attr, char *buff, 568 struct attribute *attr, char *buff,
@@ -620,9 +671,7 @@ static struct batadv_attribute *batadv_mesh_attrs[] = {
620 671
621BATADV_ATTR_VLAN_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL); 672BATADV_ATTR_VLAN_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL);
622 673
623/** 674/* array of vlan specific sysfs attributes */
624 * batadv_vlan_attrs - array of vlan specific sysfs attributes
625 */
626static struct batadv_attribute *batadv_vlan_attrs[] = { 675static struct batadv_attribute *batadv_vlan_attrs[] = {
627 &batadv_attr_vlan_ap_isolation, 676 &batadv_attr_vlan_ap_isolation,
628 NULL, 677 NULL,
@@ -683,7 +732,7 @@ void batadv_sysfs_del_meshif(struct net_device *dev)
683 * @dev: netdev of the mesh interface 732 * @dev: netdev of the mesh interface
684 * @vlan: private data of the newly added VLAN interface 733 * @vlan: private data of the newly added VLAN interface
685 * 734 *
686 * Returns 0 on success and -ENOMEM if any of the structure allocations fails. 735 * Return: 0 on success and -ENOMEM if any of the structure allocations fails.
687 */ 736 */
688int batadv_sysfs_add_vlan(struct net_device *dev, 737int batadv_sysfs_add_vlan(struct net_device *dev,
689 struct batadv_softif_vlan *vlan) 738 struct batadv_softif_vlan *vlan)
@@ -771,7 +820,7 @@ static ssize_t batadv_show_mesh_iface(struct kobject *kobj,
771 820
772 length = sprintf(buff, "%s\n", ifname); 821 length = sprintf(buff, "%s\n", ifname);
773 822
774 batadv_hardif_free_ref(hard_iface); 823 batadv_hardif_put(hard_iface);
775 824
776 return length; 825 return length;
777} 826}
@@ -795,7 +844,7 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
795 if (strlen(buff) >= IFNAMSIZ) { 844 if (strlen(buff) >= IFNAMSIZ) {
796 pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n", 845 pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n",
797 buff); 846 buff);
798 batadv_hardif_free_ref(hard_iface); 847 batadv_hardif_put(hard_iface);
799 return -EINVAL; 848 return -EINVAL;
800 } 849 }
801 850
@@ -829,7 +878,7 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
829unlock: 878unlock:
830 rtnl_unlock(); 879 rtnl_unlock();
831out: 880out:
832 batadv_hardif_free_ref(hard_iface); 881 batadv_hardif_put(hard_iface);
833 return ret; 882 return ret;
834} 883}
835 884
@@ -863,18 +912,99 @@ static ssize_t batadv_show_iface_status(struct kobject *kobj,
863 break; 912 break;
864 } 913 }
865 914
866 batadv_hardif_free_ref(hard_iface); 915 batadv_hardif_put(hard_iface);
867 916
868 return length; 917 return length;
869} 918}
870 919
920#ifdef CONFIG_BATMAN_ADV_BATMAN_V
921
922/**
923 * batadv_store_throughput_override - parse and store throughput override
924 * entered by the user
925 * @kobj: kobject representing the private mesh sysfs directory
926 * @attr: the batman-adv attribute the user is interacting with
927 * @buff: the buffer containing the user data
928 * @count: number of bytes in the buffer
929 *
930 * Return: 'count' on success or a negative error code in case of failure
931 */
932static ssize_t batadv_store_throughput_override(struct kobject *kobj,
933 struct attribute *attr,
934 char *buff, size_t count)
935{
936 struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
937 struct batadv_hard_iface *hard_iface;
938 u32 tp_override;
939 u32 old_tp_override;
940 bool ret;
941
942 hard_iface = batadv_hardif_get_by_netdev(net_dev);
943 if (!hard_iface)
944 return -EINVAL;
945
946 if (buff[count - 1] == '\n')
947 buff[count - 1] = '\0';
948
949 ret = batadv_parse_throughput(net_dev, buff, "throughput_override",
950 &tp_override);
951 if (!ret)
952 return count;
953
954 old_tp_override = atomic_read(&hard_iface->bat_v.throughput_override);
955 if (old_tp_override == tp_override)
956 goto out;
957
958 batadv_info(net_dev, "%s: Changing from: %u.%u MBit to: %u.%u MBit\n",
959 "throughput_override",
960 old_tp_override / 10, old_tp_override % 10,
961 tp_override / 10, tp_override % 10);
962
963 atomic_set(&hard_iface->bat_v.throughput_override, tp_override);
964
965out:
966 batadv_hardif_put(hard_iface);
967 return count;
968}
969
970static ssize_t batadv_show_throughput_override(struct kobject *kobj,
971 struct attribute *attr,
972 char *buff)
973{
974 struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
975 struct batadv_hard_iface *hard_iface;
976 u32 tp_override;
977
978 hard_iface = batadv_hardif_get_by_netdev(net_dev);
979 if (!hard_iface)
980 return -EINVAL;
981
982 tp_override = atomic_read(&hard_iface->bat_v.throughput_override);
983
984 return sprintf(buff, "%u.%u MBit\n", tp_override / 10,
985 tp_override % 10);
986}
987
988#endif
989
871static BATADV_ATTR(mesh_iface, S_IRUGO | S_IWUSR, batadv_show_mesh_iface, 990static BATADV_ATTR(mesh_iface, S_IRUGO | S_IWUSR, batadv_show_mesh_iface,
872 batadv_store_mesh_iface); 991 batadv_store_mesh_iface);
873static BATADV_ATTR(iface_status, S_IRUGO, batadv_show_iface_status, NULL); 992static BATADV_ATTR(iface_status, S_IRUGO, batadv_show_iface_status, NULL);
993#ifdef CONFIG_BATMAN_ADV_BATMAN_V
994BATADV_ATTR_HIF_UINT(elp_interval, bat_v.elp_interval, S_IRUGO | S_IWUSR,
995 2 * BATADV_JITTER, INT_MAX, NULL);
996static BATADV_ATTR(throughput_override, S_IRUGO | S_IWUSR,
997 batadv_show_throughput_override,
998 batadv_store_throughput_override);
999#endif
874 1000
875static struct batadv_attribute *batadv_batman_attrs[] = { 1001static struct batadv_attribute *batadv_batman_attrs[] = {
876 &batadv_attr_mesh_iface, 1002 &batadv_attr_mesh_iface,
877 &batadv_attr_iface_status, 1003 &batadv_attr_iface_status,
1004#ifdef CONFIG_BATMAN_ADV_BATMAN_V
1005 &batadv_attr_elp_interval,
1006 &batadv_attr_throughput_override,
1007#endif
878 NULL, 1008 NULL,
879}; 1009};
880 1010
diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index 61974428a7af..c76021b4e198 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2010-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 0e80fd1461ab..9b4551a86535 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich, Antonio Quartulli 3 * Marek Lindner, Simon Wunderlich, Antonio Quartulli
4 * 4 *
@@ -31,6 +31,7 @@
31#include <linux/jhash.h> 31#include <linux/jhash.h>
32#include <linux/jiffies.h> 32#include <linux/jiffies.h>
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/kref.h>
34#include <linux/list.h> 35#include <linux/list.h>
35#include <linux/lockdep.h> 36#include <linux/lockdep.h>
36#include <linux/netdevice.h> 37#include <linux/netdevice.h>
@@ -68,7 +69,15 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
68 unsigned short vid, const char *message, 69 unsigned short vid, const char *message,
69 bool roaming); 70 bool roaming);
70 71
71/* returns 1 if they are the same mac addr and vid */ 72/**
73 * batadv_compare_tt - check if two TT entries are the same
74 * @node: the list element pointer of the first TT entry
75 * @data2: pointer to the tt_common_entry of the second TT entry
76 *
77 * Compare the MAC address and the VLAN ID of the two TT entries and check if
78 * they are the same TT client.
79 * Return: 1 if the two TT clients are the same, 0 otherwise
80 */
72static int batadv_compare_tt(const struct hlist_node *node, const void *data2) 81static int batadv_compare_tt(const struct hlist_node *node, const void *data2)
73{ 82{
74 const void *data1 = container_of(node, struct batadv_tt_common_entry, 83 const void *data1 = container_of(node, struct batadv_tt_common_entry,
@@ -84,7 +93,7 @@ static int batadv_compare_tt(const struct hlist_node *node, const void *data2)
84 * @data: pointer to the tt_common_entry object to map 93 * @data: pointer to the tt_common_entry object to map
85 * @size: the size of the hash table 94 * @size: the size of the hash table
86 * 95 *
87 * Returns the hash index where the object represented by 'data' should be 96 * Return: the hash index where the object represented by 'data' should be
88 * stored at. 97 * stored at.
89 */ 98 */
90static inline u32 batadv_choose_tt(const void *data, u32 size) 99static inline u32 batadv_choose_tt(const void *data, u32 size)
@@ -105,7 +114,7 @@ static inline u32 batadv_choose_tt(const void *data, u32 size)
105 * @addr: the mac address of the client to look for 114 * @addr: the mac address of the client to look for
106 * @vid: VLAN identifier 115 * @vid: VLAN identifier
107 * 116 *
108 * Returns a pointer to the tt_common struct belonging to the searched client if 117 * Return: a pointer to the tt_common struct belonging to the searched client if
109 * found, NULL otherwise. 118 * found, NULL otherwise.
110 */ 119 */
111static struct batadv_tt_common_entry * 120static struct batadv_tt_common_entry *
@@ -133,7 +142,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const u8 *addr,
133 if (tt->vid != vid) 142 if (tt->vid != vid)
134 continue; 143 continue;
135 144
136 if (!atomic_inc_not_zero(&tt->refcount)) 145 if (!kref_get_unless_zero(&tt->refcount))
137 continue; 146 continue;
138 147
139 tt_tmp = tt; 148 tt_tmp = tt;
@@ -150,7 +159,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const u8 *addr,
150 * @addr: the mac address of the client to look for 159 * @addr: the mac address of the client to look for
151 * @vid: VLAN identifier 160 * @vid: VLAN identifier
152 * 161 *
153 * Returns a pointer to the corresponding tt_local_entry struct if the client is 162 * Return: a pointer to the corresponding tt_local_entry struct if the client is
154 * found, NULL otherwise. 163 * found, NULL otherwise.
155 */ 164 */
156static struct batadv_tt_local_entry * 165static struct batadv_tt_local_entry *
@@ -175,7 +184,7 @@ batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
175 * @addr: the mac address of the client to look for 184 * @addr: the mac address of the client to look for
176 * @vid: VLAN identifier 185 * @vid: VLAN identifier
177 * 186 *
178 * Returns a pointer to the corresponding tt_global_entry struct if the client 187 * Return: a pointer to the corresponding tt_global_entry struct if the client
179 * is found, NULL otherwise. 188 * is found, NULL otherwise.
180 */ 189 */
181static struct batadv_tt_global_entry * 190static struct batadv_tt_global_entry *
@@ -194,34 +203,70 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
194 return tt_global_entry; 203 return tt_global_entry;
195} 204}
196 205
206/**
207 * batadv_tt_local_entry_release - release tt_local_entry from lists and queue
208 * for free after rcu grace period
209 * @ref: kref pointer of the nc_node
210 */
211static void batadv_tt_local_entry_release(struct kref *ref)
212{
213 struct batadv_tt_local_entry *tt_local_entry;
214
215 tt_local_entry = container_of(ref, struct batadv_tt_local_entry,
216 common.refcount);
217
218 batadv_softif_vlan_put(tt_local_entry->vlan);
219
220 kfree_rcu(tt_local_entry, common.rcu);
221}
222
223/**
224 * batadv_tt_local_entry_put - decrement the tt_local_entry refcounter and
225 * possibly release it
226 * @tt_local_entry: tt_local_entry to be free'd
227 */
197static void 228static void
198batadv_tt_local_entry_free_ref(struct batadv_tt_local_entry *tt_local_entry) 229batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry)
230{
231 kref_put(&tt_local_entry->common.refcount,
232 batadv_tt_local_entry_release);
233}
234
235/**
236 * batadv_tt_global_entry_release - release tt_global_entry from lists and queue
237 * for free after rcu grace period
238 * @ref: kref pointer of the nc_node
239 */
240static void batadv_tt_global_entry_release(struct kref *ref)
199{ 241{
200 if (atomic_dec_and_test(&tt_local_entry->common.refcount)) 242 struct batadv_tt_global_entry *tt_global_entry;
201 kfree_rcu(tt_local_entry, common.rcu); 243
244 tt_global_entry = container_of(ref, struct batadv_tt_global_entry,
245 common.refcount);
246
247 batadv_tt_global_del_orig_list(tt_global_entry);
248 kfree_rcu(tt_global_entry, common.rcu);
202} 249}
203 250
204/** 251/**
205 * batadv_tt_global_entry_free_ref - decrement the refcounter for a 252 * batadv_tt_global_entry_put - decrement the tt_global_entry refcounter and
206 * tt_global_entry and possibly free it 253 * possibly release it
207 * @tt_global_entry: the object to free 254 * @tt_global_entry: tt_global_entry to be free'd
208 */ 255 */
209static void 256static void
210batadv_tt_global_entry_free_ref(struct batadv_tt_global_entry *tt_global_entry) 257batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry)
211{ 258{
212 if (atomic_dec_and_test(&tt_global_entry->common.refcount)) { 259 kref_put(&tt_global_entry->common.refcount,
213 batadv_tt_global_del_orig_list(tt_global_entry); 260 batadv_tt_global_entry_release);
214 kfree_rcu(tt_global_entry, common.rcu);
215 }
216} 261}
217 262
218/** 263/**
219 * batadv_tt_global_hash_count - count the number of orig entries 264 * batadv_tt_global_hash_count - count the number of orig entries
220 * @hash: hash table containing the tt entries 265 * @bat_priv: the bat priv with all the soft interface information
221 * @addr: the mac address of the client to count entries for 266 * @addr: the mac address of the client to count entries for
222 * @vid: VLAN identifier 267 * @vid: VLAN identifier
223 * 268 *
224 * Return the number of originators advertising the given address/data 269 * Return: the number of originators advertising the given address/data
225 * (excluding ourself). 270 * (excluding ourself).
226 */ 271 */
227int batadv_tt_global_hash_count(struct batadv_priv *bat_priv, 272int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
@@ -235,7 +280,7 @@ int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
235 return 0; 280 return 0;
236 281
237 count = atomic_read(&tt_global_entry->orig_list_count); 282 count = atomic_read(&tt_global_entry->orig_list_count);
238 batadv_tt_global_entry_free_ref(tt_global_entry); 283 batadv_tt_global_entry_put(tt_global_entry);
239 284
240 return count; 285 return count;
241} 286}
@@ -258,7 +303,7 @@ static void batadv_tt_local_size_mod(struct batadv_priv *bat_priv,
258 303
259 atomic_add(v, &vlan->tt.num_entries); 304 atomic_add(v, &vlan->tt.num_entries);
260 305
261 batadv_softif_vlan_free_ref(vlan); 306 batadv_softif_vlan_put(vlan);
262} 307}
263 308
264/** 309/**
@@ -286,9 +331,9 @@ static void batadv_tt_local_size_dec(struct batadv_priv *bat_priv,
286} 331}
287 332
288/** 333/**
289 * batadv_tt_global_size_mod - change the size by v of the local table 334 * batadv_tt_global_size_mod - change the size by v of the global table
290 * identified by vid 335 * for orig_node identified by vid
291 * @bat_priv: the bat priv with all the soft interface information 336 * @orig_node: the originator for which the table has to be modified
292 * @vid: the VLAN identifier 337 * @vid: the VLAN identifier
293 * @v: the amount to sum to the global table size 338 * @v: the amount to sum to the global table size
294 */ 339 */
@@ -305,12 +350,12 @@ static void batadv_tt_global_size_mod(struct batadv_orig_node *orig_node,
305 spin_lock_bh(&orig_node->vlan_list_lock); 350 spin_lock_bh(&orig_node->vlan_list_lock);
306 if (!hlist_unhashed(&vlan->list)) { 351 if (!hlist_unhashed(&vlan->list)) {
307 hlist_del_init_rcu(&vlan->list); 352 hlist_del_init_rcu(&vlan->list);
308 batadv_orig_node_vlan_free_ref(vlan); 353 batadv_orig_node_vlan_put(vlan);
309 } 354 }
310 spin_unlock_bh(&orig_node->vlan_list_lock); 355 spin_unlock_bh(&orig_node->vlan_list_lock);
311 } 356 }
312 357
313 batadv_orig_node_vlan_free_ref(vlan); 358 batadv_orig_node_vlan_put(vlan);
314} 359}
315 360
316/** 361/**
@@ -340,22 +385,28 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node,
340/** 385/**
341 * batadv_tt_orig_list_entry_release - release tt orig entry from lists and 386 * batadv_tt_orig_list_entry_release - release tt orig entry from lists and
342 * queue for free after rcu grace period 387 * queue for free after rcu grace period
343 * @orig_entry: tt orig entry to be free'd 388 * @ref: kref pointer of the tt orig entry
344 */ 389 */
345static void 390static void batadv_tt_orig_list_entry_release(struct kref *ref)
346batadv_tt_orig_list_entry_release(struct batadv_tt_orig_list_entry *orig_entry)
347{ 391{
348 batadv_orig_node_free_ref(orig_entry->orig_node); 392 struct batadv_tt_orig_list_entry *orig_entry;
393
394 orig_entry = container_of(ref, struct batadv_tt_orig_list_entry,
395 refcount);
396
397 batadv_orig_node_put(orig_entry->orig_node);
349 kfree_rcu(orig_entry, rcu); 398 kfree_rcu(orig_entry, rcu);
350} 399}
351 400
401/**
402 * batadv_tt_orig_list_entry_put - decrement the tt orig entry refcounter and
403 * possibly release it
404 * @orig_entry: tt orig entry to be free'd
405 */
352static void 406static void
353batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry) 407batadv_tt_orig_list_entry_put(struct batadv_tt_orig_list_entry *orig_entry)
354{ 408{
355 if (!atomic_dec_and_test(&orig_entry->refcount)) 409 kref_put(&orig_entry->refcount, batadv_tt_orig_list_entry_release);
356 return;
357
358 batadv_tt_orig_list_entry_release(orig_entry);
359} 410}
360 411
361/** 412/**
@@ -437,7 +488,7 @@ unlock:
437 * batadv_tt_len - compute length in bytes of given number of tt changes 488 * batadv_tt_len - compute length in bytes of given number of tt changes
438 * @changes_num: number of tt changes 489 * @changes_num: number of tt changes
439 * 490 *
440 * Returns computed length in bytes. 491 * Return: computed length in bytes.
441 */ 492 */
442static int batadv_tt_len(int changes_num) 493static int batadv_tt_len(int changes_num)
443{ 494{
@@ -448,7 +499,7 @@ static int batadv_tt_len(int changes_num)
448 * batadv_tt_entries - compute the number of entries fitting in tt_len bytes 499 * batadv_tt_entries - compute the number of entries fitting in tt_len bytes
449 * @tt_len: available space 500 * @tt_len: available space
450 * 501 *
451 * Returns the number of entries. 502 * Return: the number of entries.
452 */ 503 */
453static u16 batadv_tt_entries(u16 tt_len) 504static u16 batadv_tt_entries(u16 tt_len)
454{ 505{
@@ -460,7 +511,7 @@ static u16 batadv_tt_entries(u16 tt_len)
460 * size when transmitted over the air 511 * size when transmitted over the air
461 * @bat_priv: the bat priv with all the soft interface information 512 * @bat_priv: the bat priv with all the soft interface information
462 * 513 *
463 * Returns local translation table size in bytes. 514 * Return: local translation table size in bytes.
464 */ 515 */
465static int batadv_tt_local_table_transmit_size(struct batadv_priv *bat_priv) 516static int batadv_tt_local_table_transmit_size(struct batadv_priv *bat_priv)
466{ 517{
@@ -512,7 +563,7 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv,
512 563
513 batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt, 564 batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt,
514 batadv_choose_tt, &tt_global->common); 565 batadv_choose_tt, &tt_global->common);
515 batadv_tt_global_entry_free_ref(tt_global); 566 batadv_tt_global_entry_put(tt_global);
516} 567}
517 568
518/** 569/**
@@ -526,7 +577,7 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv,
526 * @mark: the value contained in the skb->mark field of the received packet (if 577 * @mark: the value contained in the skb->mark field of the received packet (if
527 * any) 578 * any)
528 * 579 *
529 * Returns true if the client was successfully added, false otherwise. 580 * Return: true if the client was successfully added, false otherwise.
530 */ 581 */
531bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, 582bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
532 unsigned short vid, int ifindex, u32 mark) 583 unsigned short vid, int ifindex, u32 mark)
@@ -620,9 +671,11 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
620 tt_local->common.vid = vid; 671 tt_local->common.vid = vid;
621 if (batadv_is_wifi_netdev(in_dev)) 672 if (batadv_is_wifi_netdev(in_dev))
622 tt_local->common.flags |= BATADV_TT_CLIENT_WIFI; 673 tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
623 atomic_set(&tt_local->common.refcount, 2); 674 kref_init(&tt_local->common.refcount);
675 kref_get(&tt_local->common.refcount);
624 tt_local->last_seen = jiffies; 676 tt_local->last_seen = jiffies;
625 tt_local->common.added_at = tt_local->last_seen; 677 tt_local->common.added_at = tt_local->last_seen;
678 tt_local->vlan = vlan;
626 679
627 /* the batman interface mac and multicast addresses should never be 680 /* the batman interface mac and multicast addresses should never be
628 * purged 681 * purged
@@ -637,8 +690,8 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
637 690
638 if (unlikely(hash_added != 0)) { 691 if (unlikely(hash_added != 0)) {
639 /* remove the reference for the hash */ 692 /* remove the reference for the hash */
640 batadv_tt_local_entry_free_ref(tt_local); 693 batadv_tt_local_entry_put(tt_local);
641 batadv_softif_vlan_free_ref(vlan); 694 batadv_softif_vlan_put(vlan);
642 goto out; 695 goto out;
643 } 696 }
644 697
@@ -704,9 +757,9 @@ out:
704 if (in_dev) 757 if (in_dev)
705 dev_put(in_dev); 758 dev_put(in_dev);
706 if (tt_local) 759 if (tt_local)
707 batadv_tt_local_entry_free_ref(tt_local); 760 batadv_tt_local_entry_put(tt_local);
708 if (tt_global) 761 if (tt_global)
709 batadv_tt_global_entry_free_ref(tt_global); 762 batadv_tt_global_entry_put(tt_global);
710 return ret; 763 return ret;
711} 764}
712 765
@@ -721,12 +774,11 @@ out:
721 * function reserves the amount of space needed to send the entire global TT 774 * function reserves the amount of space needed to send the entire global TT
722 * table. In case of success the value is updated with the real amount of 775 * table. In case of success the value is updated with the real amount of
723 * reserved bytes 776 * reserved bytes
724
725 * Allocate the needed amount of memory for the entire TT TVLV and write its 777 * Allocate the needed amount of memory for the entire TT TVLV and write its
726 * header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data 778 * header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data
727 * objects, one per active VLAN served by the originator node. 779 * objects, one per active VLAN served by the originator node.
728 * 780 *
729 * Return the size of the allocated buffer or 0 in case of failure. 781 * Return: the size of the allocated buffer or 0 in case of failure.
730 */ 782 */
731static u16 783static u16
732batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, 784batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
@@ -800,7 +852,7 @@ out:
800 * header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data 852 * header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data
801 * objects, one per active VLAN. 853 * objects, one per active VLAN.
802 * 854 *
803 * Return the size of the allocated buffer or 0 in case of failure. 855 * Return: the size of the allocated buffer or 0 in case of failure.
804 */ 856 */
805static u16 857static u16
806batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, 858batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
@@ -942,7 +994,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
942 struct batadv_tt_common_entry *tt_common_entry; 994 struct batadv_tt_common_entry *tt_common_entry;
943 struct batadv_tt_local_entry *tt_local; 995 struct batadv_tt_local_entry *tt_local;
944 struct batadv_hard_iface *primary_if; 996 struct batadv_hard_iface *primary_if;
945 struct batadv_softif_vlan *vlan;
946 struct hlist_head *head; 997 struct hlist_head *head;
947 unsigned short vid; 998 unsigned short vid;
948 u32 i; 999 u32 i;
@@ -978,14 +1029,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
978 last_seen_msecs = last_seen_msecs % 1000; 1029 last_seen_msecs = last_seen_msecs % 1000;
979 1030
980 no_purge = tt_common_entry->flags & np_flag; 1031 no_purge = tt_common_entry->flags & np_flag;
981
982 vlan = batadv_softif_vlan_get(bat_priv, vid);
983 if (!vlan) {
984 seq_printf(seq, "Cannot retrieve VLAN %d\n",
985 BATADV_PRINT_VID(vid));
986 continue;
987 }
988
989 seq_printf(seq, 1032 seq_printf(seq,
990 " * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n", 1033 " * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n",
991 tt_common_entry->addr, 1034 tt_common_entry->addr,
@@ -1003,15 +1046,13 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
1003 BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'), 1046 BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'),
1004 no_purge ? 0 : last_seen_secs, 1047 no_purge ? 0 : last_seen_secs,
1005 no_purge ? 0 : last_seen_msecs, 1048 no_purge ? 0 : last_seen_msecs,
1006 vlan->tt.crc); 1049 tt_local->vlan->tt.crc);
1007
1008 batadv_softif_vlan_free_ref(vlan);
1009 } 1050 }
1010 rcu_read_unlock(); 1051 rcu_read_unlock();
1011 } 1052 }
1012out: 1053out:
1013 if (primary_if) 1054 if (primary_if)
1014 batadv_hardif_free_ref(primary_if); 1055 batadv_hardif_put(primary_if);
1015 return 0; 1056 return 0;
1016} 1057}
1017 1058
@@ -1042,7 +1083,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
1042 * @message: message to append to the log on deletion 1083 * @message: message to append to the log on deletion
1043 * @roaming: true if the deletion is due to a roaming event 1084 * @roaming: true if the deletion is due to a roaming event
1044 * 1085 *
1045 * Returns the flags assigned to the local entry before being deleted 1086 * Return: the flags assigned to the local entry before being deleted
1046 */ 1087 */
1047u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, 1088u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
1048 unsigned short vid, const char *message, 1089 unsigned short vid, const char *message,
@@ -1050,7 +1091,6 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
1050{ 1091{
1051 struct batadv_tt_local_entry *tt_local_entry; 1092 struct batadv_tt_local_entry *tt_local_entry;
1052 u16 flags, curr_flags = BATADV_NO_FLAGS; 1093 u16 flags, curr_flags = BATADV_NO_FLAGS;
1053 struct batadv_softif_vlan *vlan;
1054 void *tt_entry_exists; 1094 void *tt_entry_exists;
1055 1095
1056 tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); 1096 tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid);
@@ -1088,19 +1128,11 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
1088 goto out; 1128 goto out;
1089 1129
1090 /* extra call to free the local tt entry */ 1130 /* extra call to free the local tt entry */
1091 batadv_tt_local_entry_free_ref(tt_local_entry); 1131 batadv_tt_local_entry_put(tt_local_entry);
1092
1093 /* decrease the reference held for this vlan */
1094 vlan = batadv_softif_vlan_get(bat_priv, vid);
1095 if (!vlan)
1096 goto out;
1097
1098 batadv_softif_vlan_free_ref(vlan);
1099 batadv_softif_vlan_free_ref(vlan);
1100 1132
1101out: 1133out:
1102 if (tt_local_entry) 1134 if (tt_local_entry)
1103 batadv_tt_local_entry_free_ref(tt_local_entry); 1135 batadv_tt_local_entry_put(tt_local_entry);
1104 1136
1105 return curr_flags; 1137 return curr_flags;
1106} 1138}
@@ -1170,7 +1202,6 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
1170 spinlock_t *list_lock; /* protects write access to the hash lists */ 1202 spinlock_t *list_lock; /* protects write access to the hash lists */
1171 struct batadv_tt_common_entry *tt_common_entry; 1203 struct batadv_tt_common_entry *tt_common_entry;
1172 struct batadv_tt_local_entry *tt_local; 1204 struct batadv_tt_local_entry *tt_local;
1173 struct batadv_softif_vlan *vlan;
1174 struct hlist_node *node_tmp; 1205 struct hlist_node *node_tmp;
1175 struct hlist_head *head; 1206 struct hlist_head *head;
1176 u32 i; 1207 u32 i;
@@ -1192,15 +1223,7 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
1192 struct batadv_tt_local_entry, 1223 struct batadv_tt_local_entry,
1193 common); 1224 common);
1194 1225
1195 /* decrease the reference held for this vlan */ 1226 batadv_tt_local_entry_put(tt_local);
1196 vlan = batadv_softif_vlan_get(bat_priv,
1197 tt_common_entry->vid);
1198 if (vlan) {
1199 batadv_softif_vlan_free_ref(vlan);
1200 batadv_softif_vlan_free_ref(vlan);
1201 }
1202
1203 batadv_tt_local_entry_free_ref(tt_local);
1204 } 1227 }
1205 spin_unlock_bh(list_lock); 1228 spin_unlock_bh(list_lock);
1206 } 1229 }
@@ -1242,10 +1265,16 @@ static void batadv_tt_changes_list_free(struct batadv_priv *bat_priv)
1242 spin_unlock_bh(&bat_priv->tt.changes_list_lock); 1265 spin_unlock_bh(&bat_priv->tt.changes_list_lock);
1243} 1266}
1244 1267
1245/* retrieves the orig_tt_list_entry belonging to orig_node from the 1268/**
1269 * batadv_tt_global_orig_entry_find - find a TT orig_list_entry
1270 * @entry: the TT global entry where the orig_list_entry has to be
1271 * extracted from
1272 * @orig_node: the originator for which the orig_list_entry has to be found
1273 *
1274 * retrieve the orig_tt_list_entry belonging to orig_node from the
1246 * batadv_tt_global_entry list 1275 * batadv_tt_global_entry list
1247 * 1276 *
1248 * returns it with an increased refcounter, NULL if not found 1277 * Return: it with an increased refcounter, NULL if not found
1249 */ 1278 */
1250static struct batadv_tt_orig_list_entry * 1279static struct batadv_tt_orig_list_entry *
1251batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, 1280batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
@@ -1259,7 +1288,7 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
1259 hlist_for_each_entry_rcu(tmp_orig_entry, head, list) { 1288 hlist_for_each_entry_rcu(tmp_orig_entry, head, list) {
1260 if (tmp_orig_entry->orig_node != orig_node) 1289 if (tmp_orig_entry->orig_node != orig_node)
1261 continue; 1290 continue;
1262 if (!atomic_inc_not_zero(&tmp_orig_entry->refcount)) 1291 if (!kref_get_unless_zero(&tmp_orig_entry->refcount))
1263 continue; 1292 continue;
1264 1293
1265 orig_entry = tmp_orig_entry; 1294 orig_entry = tmp_orig_entry;
@@ -1270,8 +1299,15 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
1270 return orig_entry; 1299 return orig_entry;
1271} 1300}
1272 1301
1273/* find out if an orig_node is already in the list of a tt_global_entry. 1302/**
1274 * returns true if found, false otherwise 1303 * batadv_tt_global_entry_has_orig - check if a TT global entry is also handled
1304 * by a given originator
1305 * @entry: the TT global entry to check
1306 * @orig_node: the originator to search in the list
1307 *
1308 * find out if an orig_node is already in the list of a tt_global_entry.
1309 *
1310 * Return: true if found, false otherwise
1275 */ 1311 */
1276static bool 1312static bool
1277batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, 1313batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry,
@@ -1283,7 +1319,7 @@ batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry,
1283 orig_entry = batadv_tt_global_orig_entry_find(entry, orig_node); 1319 orig_entry = batadv_tt_global_orig_entry_find(entry, orig_node);
1284 if (orig_entry) { 1320 if (orig_entry) {
1285 found = true; 1321 found = true;
1286 batadv_tt_orig_list_entry_free_ref(orig_entry); 1322 batadv_tt_orig_list_entry_put(orig_entry);
1287 } 1323 }
1288 1324
1289 return found; 1325 return found;
@@ -1309,11 +1345,12 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
1309 goto out; 1345 goto out;
1310 1346
1311 INIT_HLIST_NODE(&orig_entry->list); 1347 INIT_HLIST_NODE(&orig_entry->list);
1312 atomic_inc(&orig_node->refcount); 1348 kref_get(&orig_node->refcount);
1313 batadv_tt_global_size_inc(orig_node, tt_global->common.vid); 1349 batadv_tt_global_size_inc(orig_node, tt_global->common.vid);
1314 orig_entry->orig_node = orig_node; 1350 orig_entry->orig_node = orig_node;
1315 orig_entry->ttvn = ttvn; 1351 orig_entry->ttvn = ttvn;
1316 atomic_set(&orig_entry->refcount, 2); 1352 kref_init(&orig_entry->refcount);
1353 kref_get(&orig_entry->refcount);
1317 1354
1318 spin_lock_bh(&tt_global->list_lock); 1355 spin_lock_bh(&tt_global->list_lock);
1319 hlist_add_head_rcu(&orig_entry->list, 1356 hlist_add_head_rcu(&orig_entry->list,
@@ -1323,7 +1360,7 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
1323 1360
1324out: 1361out:
1325 if (orig_entry) 1362 if (orig_entry)
1326 batadv_tt_orig_list_entry_free_ref(orig_entry); 1363 batadv_tt_orig_list_entry_put(orig_entry);
1327} 1364}
1328 1365
1329/** 1366/**
@@ -1343,7 +1380,7 @@ out:
1343 * 1380 *
1344 * The caller must hold orig_node refcount. 1381 * The caller must hold orig_node refcount.
1345 * 1382 *
1346 * Return true if the new entry has been added, false otherwise 1383 * Return: true if the new entry has been added, false otherwise
1347 */ 1384 */
1348static bool batadv_tt_global_add(struct batadv_priv *bat_priv, 1385static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
1349 struct batadv_orig_node *orig_node, 1386 struct batadv_orig_node *orig_node,
@@ -1389,7 +1426,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
1389 */ 1426 */
1390 if (flags & BATADV_TT_CLIENT_ROAM) 1427 if (flags & BATADV_TT_CLIENT_ROAM)
1391 tt_global_entry->roam_at = jiffies; 1428 tt_global_entry->roam_at = jiffies;
1392 atomic_set(&common->refcount, 2); 1429 kref_init(&common->refcount);
1430 kref_get(&common->refcount);
1393 common->added_at = jiffies; 1431 common->added_at = jiffies;
1394 1432
1395 INIT_HLIST_HEAD(&tt_global_entry->orig_list); 1433 INIT_HLIST_HEAD(&tt_global_entry->orig_list);
@@ -1403,7 +1441,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
1403 1441
1404 if (unlikely(hash_added != 0)) { 1442 if (unlikely(hash_added != 0)) {
1405 /* remove the reference for the hash */ 1443 /* remove the reference for the hash */
1406 batadv_tt_global_entry_free_ref(tt_global_entry); 1444 batadv_tt_global_entry_put(tt_global_entry);
1407 goto out_remove; 1445 goto out_remove;
1408 } 1446 }
1409 } else { 1447 } else {
@@ -1489,9 +1527,9 @@ out_remove:
1489 1527
1490out: 1528out:
1491 if (tt_global_entry) 1529 if (tt_global_entry)
1492 batadv_tt_global_entry_free_ref(tt_global_entry); 1530 batadv_tt_global_entry_put(tt_global_entry);
1493 if (tt_local_entry) 1531 if (tt_local_entry)
1494 batadv_tt_local_entry_free_ref(tt_local_entry); 1532 batadv_tt_local_entry_put(tt_local_entry);
1495 return ret; 1533 return ret;
1496} 1534}
1497 1535
@@ -1501,7 +1539,7 @@ out:
1501 * @tt_global_entry: global translation table entry to be analyzed 1539 * @tt_global_entry: global translation table entry to be analyzed
1502 * 1540 *
1503 * This functon assumes the caller holds rcu_read_lock(). 1541 * This functon assumes the caller holds rcu_read_lock().
1504 * Returns best originator list entry or NULL on errors. 1542 * Return: best originator list entry or NULL on errors.
1505 */ 1543 */
1506static struct batadv_tt_orig_list_entry * 1544static struct batadv_tt_orig_list_entry *
1507batadv_transtable_best_orig(struct batadv_priv *bat_priv, 1545batadv_transtable_best_orig(struct batadv_priv *bat_priv,
@@ -1522,20 +1560,20 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
1522 if (best_router && 1560 if (best_router &&
1523 bao->bat_neigh_cmp(router, BATADV_IF_DEFAULT, 1561 bao->bat_neigh_cmp(router, BATADV_IF_DEFAULT,
1524 best_router, BATADV_IF_DEFAULT) <= 0) { 1562 best_router, BATADV_IF_DEFAULT) <= 0) {
1525 batadv_neigh_node_free_ref(router); 1563 batadv_neigh_node_put(router);
1526 continue; 1564 continue;
1527 } 1565 }
1528 1566
1529 /* release the refcount for the "old" best */ 1567 /* release the refcount for the "old" best */
1530 if (best_router) 1568 if (best_router)
1531 batadv_neigh_node_free_ref(best_router); 1569 batadv_neigh_node_put(best_router);
1532 1570
1533 best_entry = orig_entry; 1571 best_entry = orig_entry;
1534 best_router = router; 1572 best_router = router;
1535 } 1573 }
1536 1574
1537 if (best_router) 1575 if (best_router)
1538 batadv_neigh_node_free_ref(best_router); 1576 batadv_neigh_node_put(best_router);
1539 1577
1540 return best_entry; 1578 return best_entry;
1541} 1579}
@@ -1588,7 +1626,7 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv,
1588 ((flags & BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'), 1626 ((flags & BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'),
1589 ((flags & BATADV_TT_CLIENT_TEMP) ? 'T' : '.')); 1627 ((flags & BATADV_TT_CLIENT_TEMP) ? 'T' : '.'));
1590 1628
1591 batadv_orig_node_vlan_free_ref(vlan); 1629 batadv_orig_node_vlan_put(vlan);
1592 } 1630 }
1593 1631
1594print_list: 1632print_list:
@@ -1620,7 +1658,7 @@ print_list:
1620 ((flags & BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'), 1658 ((flags & BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'),
1621 ((flags & BATADV_TT_CLIENT_TEMP) ? 'T' : '.')); 1659 ((flags & BATADV_TT_CLIENT_TEMP) ? 'T' : '.'));
1622 1660
1623 batadv_orig_node_vlan_free_ref(vlan); 1661 batadv_orig_node_vlan_put(vlan);
1624 } 1662 }
1625} 1663}
1626 1664
@@ -1661,7 +1699,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
1661 } 1699 }
1662out: 1700out:
1663 if (primary_if) 1701 if (primary_if)
1664 batadv_hardif_free_ref(primary_if); 1702 batadv_hardif_put(primary_if);
1665 return 0; 1703 return 0;
1666} 1704}
1667 1705
@@ -1689,7 +1727,7 @@ _batadv_tt_global_del_orig_entry(struct batadv_tt_global_entry *tt_global_entry,
1689 * being part of a list 1727 * being part of a list
1690 */ 1728 */
1691 hlist_del_rcu(&orig_entry->list); 1729 hlist_del_rcu(&orig_entry->list);
1692 batadv_tt_orig_list_entry_free_ref(orig_entry); 1730 batadv_tt_orig_list_entry_put(orig_entry);
1693} 1731}
1694 1732
1695/* deletes the orig list of a tt_global_entry */ 1733/* deletes the orig list of a tt_global_entry */
@@ -1845,9 +1883,9 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
1845 1883
1846out: 1884out:
1847 if (tt_global_entry) 1885 if (tt_global_entry)
1848 batadv_tt_global_entry_free_ref(tt_global_entry); 1886 batadv_tt_global_entry_put(tt_global_entry);
1849 if (local_entry) 1887 if (local_entry)
1850 batadv_tt_local_entry_free_ref(local_entry); 1888 batadv_tt_local_entry_put(local_entry);
1851} 1889}
1852 1890
1853/** 1891/**
@@ -1901,7 +1939,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
1901 tt_global->common.addr, 1939 tt_global->common.addr,
1902 BATADV_PRINT_VID(vid), message); 1940 BATADV_PRINT_VID(vid), message);
1903 hlist_del_rcu(&tt_common_entry->hash_entry); 1941 hlist_del_rcu(&tt_common_entry->hash_entry);
1904 batadv_tt_global_entry_free_ref(tt_global); 1942 batadv_tt_global_entry_put(tt_global);
1905 } 1943 }
1906 } 1944 }
1907 spin_unlock_bh(list_lock); 1945 spin_unlock_bh(list_lock);
@@ -1964,7 +2002,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv)
1964 2002
1965 hlist_del_rcu(&tt_common->hash_entry); 2003 hlist_del_rcu(&tt_common->hash_entry);
1966 2004
1967 batadv_tt_global_entry_free_ref(tt_global); 2005 batadv_tt_global_entry_put(tt_global);
1968 } 2006 }
1969 spin_unlock_bh(list_lock); 2007 spin_unlock_bh(list_lock);
1970 } 2008 }
@@ -1996,7 +2034,7 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv)
1996 tt_global = container_of(tt_common_entry, 2034 tt_global = container_of(tt_common_entry,
1997 struct batadv_tt_global_entry, 2035 struct batadv_tt_global_entry,
1998 common); 2036 common);
1999 batadv_tt_global_entry_free_ref(tt_global); 2037 batadv_tt_global_entry_put(tt_global);
2000 } 2038 }
2001 spin_unlock_bh(list_lock); 2039 spin_unlock_bh(list_lock);
2002 } 2040 }
@@ -2031,7 +2069,7 @@ _batadv_is_ap_isolated(struct batadv_tt_local_entry *tt_local_entry,
2031 * @addr: mac address of the destination client 2069 * @addr: mac address of the destination client
2032 * @vid: VLAN identifier 2070 * @vid: VLAN identifier
2033 * 2071 *
2034 * Returns a pointer to the originator that was selected as destination in the 2072 * Return: a pointer to the originator that was selected as destination in the
2035 * mesh for contacting the client 'addr', NULL otherwise. 2073 * mesh for contacting the client 'addr', NULL otherwise.
2036 * In case of multiple originators serving the same client, the function returns 2074 * In case of multiple originators serving the same client, the function returns
2037 * the best one (best in terms of metric towards the destination node). 2075 * the best one (best in terms of metric towards the destination node).
@@ -2071,15 +2109,15 @@ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
2071 /* found anything? */ 2109 /* found anything? */
2072 if (best_entry) 2110 if (best_entry)
2073 orig_node = best_entry->orig_node; 2111 orig_node = best_entry->orig_node;
2074 if (orig_node && !atomic_inc_not_zero(&orig_node->refcount)) 2112 if (orig_node && !kref_get_unless_zero(&orig_node->refcount))
2075 orig_node = NULL; 2113 orig_node = NULL;
2076 rcu_read_unlock(); 2114 rcu_read_unlock();
2077 2115
2078out: 2116out:
2079 if (tt_global_entry) 2117 if (tt_global_entry)
2080 batadv_tt_global_entry_free_ref(tt_global_entry); 2118 batadv_tt_global_entry_put(tt_global_entry);
2081 if (tt_local_entry) 2119 if (tt_local_entry)
2082 batadv_tt_local_entry_free_ref(tt_local_entry); 2120 batadv_tt_local_entry_put(tt_local_entry);
2083 2121
2084 return orig_node; 2122 return orig_node;
2085} 2123}
@@ -2106,7 +2144,7 @@ out:
2106 * because the XOR operation can combine them all while trying to reduce the 2144 * because the XOR operation can combine them all while trying to reduce the
2107 * noise as much as possible. 2145 * noise as much as possible.
2108 * 2146 *
2109 * Returns the checksum of the global table of a given originator. 2147 * Return: the checksum of the global table of a given originator.
2110 */ 2148 */
2111static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv, 2149static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv,
2112 struct batadv_orig_node *orig_node, 2150 struct batadv_orig_node *orig_node,
@@ -2183,7 +2221,7 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv,
2183 * For details about the computation, please refer to the documentation for 2221 * For details about the computation, please refer to the documentation for
2184 * batadv_tt_global_crc(). 2222 * batadv_tt_global_crc().
2185 * 2223 *
2186 * Returns the checksum of the local table 2224 * Return: the checksum of the local table
2187 */ 2225 */
2188static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv, 2226static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv,
2189 unsigned short vid) 2227 unsigned short vid)
@@ -2289,7 +2327,7 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv)
2289 * @bat_priv: the bat priv with all the soft interface information 2327 * @bat_priv: the bat priv with all the soft interface information
2290 * @orig_node: orig node this request is being issued for 2328 * @orig_node: orig node this request is being issued for
2291 * 2329 *
2292 * Returns the pointer to the new tt_req_node struct if no request 2330 * Return: the pointer to the new tt_req_node struct if no request
2293 * has already been issued for this orig_node, NULL otherwise. 2331 * has already been issued for this orig_node, NULL otherwise.
2294 */ 2332 */
2295static struct batadv_tt_req_node * 2333static struct batadv_tt_req_node *
@@ -2324,7 +2362,7 @@ unlock:
2324 * @entry_ptr: to be checked local tt entry 2362 * @entry_ptr: to be checked local tt entry
2325 * @data_ptr: not used but definition required to satisfy the callback prototype 2363 * @data_ptr: not used but definition required to satisfy the callback prototype
2326 * 2364 *
2327 * Returns 1 if the entry is a valid, 0 otherwise. 2365 * Return: 1 if the entry is a valid, 0 otherwise.
2328 */ 2366 */
2329static int batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr) 2367static int batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr)
2330{ 2368{
@@ -2408,9 +2446,8 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
2408 * @orig_node: originator for which the CRCs have to be checked 2446 * @orig_node: originator for which the CRCs have to be checked
2409 * @tt_vlan: pointer to the first tvlv VLAN entry 2447 * @tt_vlan: pointer to the first tvlv VLAN entry
2410 * @num_vlan: number of tvlv VLAN entries 2448 * @num_vlan: number of tvlv VLAN entries
2411 * @create: if true, create VLAN objects if not found
2412 * 2449 *
2413 * Return true if all the received CRCs match the locally stored ones, false 2450 * Return: true if all the received CRCs match the locally stored ones, false
2414 * otherwise 2451 * otherwise
2415 */ 2452 */
2416static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node, 2453static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node,
@@ -2440,7 +2477,7 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node,
2440 return false; 2477 return false;
2441 2478
2442 crc = vlan->tt.crc; 2479 crc = vlan->tt.crc;
2443 batadv_orig_node_vlan_free_ref(vlan); 2480 batadv_orig_node_vlan_put(vlan);
2444 2481
2445 if (crc != ntohl(tt_vlan_tmp->crc)) 2482 if (crc != ntohl(tt_vlan_tmp->crc))
2446 return false; 2483 return false;
@@ -2513,6 +2550,8 @@ static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv,
2513 * @num_vlan: number of tvlv VLAN entries 2550 * @num_vlan: number of tvlv VLAN entries
2514 * @full_table: ask for the entire translation table if true, while only for the 2551 * @full_table: ask for the entire translation table if true, while only for the
2515 * last TT diff otherwise 2552 * last TT diff otherwise
2553 *
2554 * Return: true if the TT Request was sent, false otherwise
2516 */ 2555 */
2517static int batadv_send_tt_request(struct batadv_priv *bat_priv, 2556static int batadv_send_tt_request(struct batadv_priv *bat_priv,
2518 struct batadv_orig_node *dst_orig_node, 2557 struct batadv_orig_node *dst_orig_node,
@@ -2573,7 +2612,7 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv,
2573 2612
2574out: 2613out:
2575 if (primary_if) 2614 if (primary_if)
2576 batadv_hardif_free_ref(primary_if); 2615 batadv_hardif_put(primary_if);
2577 if (ret && tt_req_node) { 2616 if (ret && tt_req_node) {
2578 spin_lock_bh(&bat_priv->tt.req_list_lock); 2617 spin_lock_bh(&bat_priv->tt.req_list_lock);
2579 /* hlist_del_init() verifies tt_req_node still is in the list */ 2618 /* hlist_del_init() verifies tt_req_node still is in the list */
@@ -2593,7 +2632,7 @@ out:
2593 * @req_src: mac address of tt request sender 2632 * @req_src: mac address of tt request sender
2594 * @req_dst: mac address of tt request recipient 2633 * @req_dst: mac address of tt request recipient
2595 * 2634 *
2596 * Returns true if tt request reply was sent, false otherwise. 2635 * Return: true if tt request reply was sent, false otherwise.
2597 */ 2636 */
2598static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv, 2637static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
2599 struct batadv_tvlv_tt_data *tt_data, 2638 struct batadv_tvlv_tt_data *tt_data,
@@ -2711,9 +2750,9 @@ unlock:
2711 2750
2712out: 2751out:
2713 if (res_dst_orig_node) 2752 if (res_dst_orig_node)
2714 batadv_orig_node_free_ref(res_dst_orig_node); 2753 batadv_orig_node_put(res_dst_orig_node);
2715 if (req_dst_orig_node) 2754 if (req_dst_orig_node)
2716 batadv_orig_node_free_ref(req_dst_orig_node); 2755 batadv_orig_node_put(req_dst_orig_node);
2717 kfree(tvlv_tt_data); 2756 kfree(tvlv_tt_data);
2718 return ret; 2757 return ret;
2719} 2758}
@@ -2725,7 +2764,7 @@ out:
2725 * @tt_data: tt data containing the tt request information 2764 * @tt_data: tt data containing the tt request information
2726 * @req_src: mac address of tt request sender 2765 * @req_src: mac address of tt request sender
2727 * 2766 *
2728 * Returns true if tt request reply was sent, false otherwise. 2767 * Return: true if tt request reply was sent, false otherwise.
2729 */ 2768 */
2730static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, 2769static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv,
2731 struct batadv_tvlv_tt_data *tt_data, 2770 struct batadv_tvlv_tt_data *tt_data,
@@ -2828,9 +2867,9 @@ unlock:
2828out: 2867out:
2829 spin_unlock_bh(&bat_priv->tt.commit_lock); 2868 spin_unlock_bh(&bat_priv->tt.commit_lock);
2830 if (orig_node) 2869 if (orig_node)
2831 batadv_orig_node_free_ref(orig_node); 2870 batadv_orig_node_put(orig_node);
2832 if (primary_if) 2871 if (primary_if)
2833 batadv_hardif_free_ref(primary_if); 2872 batadv_hardif_put(primary_if);
2834 kfree(tvlv_tt_data); 2873 kfree(tvlv_tt_data);
2835 /* The packet was for this host, so it doesn't need to be re-routed */ 2874 /* The packet was for this host, so it doesn't need to be re-routed */
2836 return true; 2875 return true;
@@ -2843,7 +2882,7 @@ out:
2843 * @req_src: mac address of tt request sender 2882 * @req_src: mac address of tt request sender
2844 * @req_dst: mac address of tt request recipient 2883 * @req_dst: mac address of tt request recipient
2845 * 2884 *
2846 * Returns true if tt request reply was sent, false otherwise. 2885 * Return: true if tt request reply was sent, false otherwise.
2847 */ 2886 */
2848static bool batadv_send_tt_response(struct batadv_priv *bat_priv, 2887static bool batadv_send_tt_response(struct batadv_priv *bat_priv,
2849 struct batadv_tvlv_tt_data *tt_data, 2888 struct batadv_tvlv_tt_data *tt_data,
@@ -2916,7 +2955,7 @@ static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv,
2916 2955
2917out: 2956out:
2918 if (orig_node) 2957 if (orig_node)
2919 batadv_orig_node_free_ref(orig_node); 2958 batadv_orig_node_put(orig_node);
2920} 2959}
2921 2960
2922static void batadv_tt_update_changes(struct batadv_priv *bat_priv, 2961static void batadv_tt_update_changes(struct batadv_priv *bat_priv,
@@ -2938,7 +2977,7 @@ static void batadv_tt_update_changes(struct batadv_priv *bat_priv,
2938 * @addr: the mac address of the client to check 2977 * @addr: the mac address of the client to check
2939 * @vid: VLAN identifier 2978 * @vid: VLAN identifier
2940 * 2979 *
2941 * Returns true if the client is served by this node, false otherwise. 2980 * Return: true if the client is served by this node, false otherwise.
2942 */ 2981 */
2943bool batadv_is_my_client(struct batadv_priv *bat_priv, const u8 *addr, 2982bool batadv_is_my_client(struct batadv_priv *bat_priv, const u8 *addr,
2944 unsigned short vid) 2983 unsigned short vid)
@@ -2958,7 +2997,7 @@ bool batadv_is_my_client(struct batadv_priv *bat_priv, const u8 *addr,
2958 ret = true; 2997 ret = true;
2959out: 2998out:
2960 if (tt_local_entry) 2999 if (tt_local_entry)
2961 batadv_tt_local_entry_free_ref(tt_local_entry); 3000 batadv_tt_local_entry_put(tt_local_entry);
2962 return ret; 3001 return ret;
2963} 3002}
2964 3003
@@ -3022,7 +3061,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
3022 spin_unlock_bh(&bat_priv->tt.req_list_lock); 3061 spin_unlock_bh(&bat_priv->tt.req_list_lock);
3023out: 3062out:
3024 if (orig_node) 3063 if (orig_node)
3025 batadv_orig_node_free_ref(orig_node); 3064 batadv_orig_node_put(orig_node);
3026} 3065}
3027 3066
3028static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv) 3067static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv)
@@ -3055,11 +3094,16 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv)
3055 spin_unlock_bh(&bat_priv->tt.roam_list_lock); 3094 spin_unlock_bh(&bat_priv->tt.roam_list_lock);
3056} 3095}
3057 3096
3058/* This function checks whether the client already reached the 3097/**
3098 * batadv_tt_check_roam_count - check if a client has roamed too frequently
3099 * @bat_priv: the bat priv with all the soft interface information
3100 * @client: mac address of the roaming client
3101 *
3102 * This function checks whether the client already reached the
3059 * maximum number of possible roaming phases. In this case the ROAMING_ADV 3103 * maximum number of possible roaming phases. In this case the ROAMING_ADV
3060 * will not be sent. 3104 * will not be sent.
3061 * 3105 *
3062 * returns true if the ROAMING_ADV can be sent, false otherwise 3106 * Return: true if the ROAMING_ADV can be sent, false otherwise
3063 */ 3107 */
3064static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client) 3108static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client)
3065{ 3109{
@@ -3148,7 +3192,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, u8 *client,
3148 3192
3149out: 3193out:
3150 if (primary_if) 3194 if (primary_if)
3151 batadv_hardif_free_ref(primary_if); 3195 batadv_hardif_put(primary_if);
3152} 3196}
3153 3197
3154static void batadv_tt_purge(struct work_struct *work) 3198static void batadv_tt_purge(struct work_struct *work)
@@ -3239,7 +3283,6 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
3239 struct batadv_hashtable *hash = bat_priv->tt.local_hash; 3283 struct batadv_hashtable *hash = bat_priv->tt.local_hash;
3240 struct batadv_tt_common_entry *tt_common; 3284 struct batadv_tt_common_entry *tt_common;
3241 struct batadv_tt_local_entry *tt_local; 3285 struct batadv_tt_local_entry *tt_local;
3242 struct batadv_softif_vlan *vlan;
3243 struct hlist_node *node_tmp; 3286 struct hlist_node *node_tmp;
3244 struct hlist_head *head; 3287 struct hlist_head *head;
3245 spinlock_t *list_lock; /* protects write access to the hash lists */ 3288 spinlock_t *list_lock; /* protects write access to the hash lists */
@@ -3269,14 +3312,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
3269 struct batadv_tt_local_entry, 3312 struct batadv_tt_local_entry,
3270 common); 3313 common);
3271 3314
3272 /* decrease the reference held for this vlan */ 3315 batadv_tt_local_entry_put(tt_local);
3273 vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid);
3274 if (vlan) {
3275 batadv_softif_vlan_free_ref(vlan);
3276 batadv_softif_vlan_free_ref(vlan);
3277 }
3278
3279 batadv_tt_local_entry_free_ref(tt_local);
3280 } 3316 }
3281 spin_unlock_bh(list_lock); 3317 spin_unlock_bh(list_lock);
3282 } 3318 }
@@ -3359,11 +3395,11 @@ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst,
3359 ret = true; 3395 ret = true;
3360 3396
3361out: 3397out:
3362 batadv_softif_vlan_free_ref(vlan); 3398 batadv_softif_vlan_put(vlan);
3363 if (tt_global_entry) 3399 if (tt_global_entry)
3364 batadv_tt_global_entry_free_ref(tt_global_entry); 3400 batadv_tt_global_entry_put(tt_global_entry);
3365 if (tt_local_entry) 3401 if (tt_local_entry)
3366 batadv_tt_local_entry_free_ref(tt_local_entry); 3402 batadv_tt_local_entry_put(tt_local_entry);
3367 return ret; 3403 return ret;
3368} 3404}
3369 3405
@@ -3371,13 +3407,12 @@ out:
3371 * batadv_tt_update_orig - update global translation table with new tt 3407 * batadv_tt_update_orig - update global translation table with new tt
3372 * information received via ogms 3408 * information received via ogms
3373 * @bat_priv: the bat priv with all the soft interface information 3409 * @bat_priv: the bat priv with all the soft interface information
3374 * @orig: the orig_node of the ogm 3410 * @orig_node: the orig_node of the ogm
3375 * @tt_vlan: pointer to the first tvlv VLAN entry 3411 * @tt_buff: pointer to the first tvlv VLAN entry
3376 * @tt_num_vlan: number of tvlv VLAN entries 3412 * @tt_num_vlan: number of tvlv VLAN entries
3377 * @tt_change: pointer to the first entry in the TT buffer 3413 * @tt_change: pointer to the first entry in the TT buffer
3378 * @tt_num_changes: number of tt changes inside the tt buffer 3414 * @tt_num_changes: number of tt changes inside the tt buffer
3379 * @ttvn: translation table version number of this changeset 3415 * @ttvn: translation table version number of this changeset
3380 * @tt_crc: crc32 checksum of orig node's translation table
3381 */ 3416 */
3382static void batadv_tt_update_orig(struct batadv_priv *bat_priv, 3417static void batadv_tt_update_orig(struct batadv_priv *bat_priv,
3383 struct batadv_orig_node *orig_node, 3418 struct batadv_orig_node *orig_node,
@@ -3459,7 +3494,7 @@ request_table:
3459 * @addr: the mac address of the client to check 3494 * @addr: the mac address of the client to check
3460 * @vid: VLAN identifier 3495 * @vid: VLAN identifier
3461 * 3496 *
3462 * Returns true if we know that the client has moved from its old originator 3497 * Return: true if we know that the client has moved from its old originator
3463 * to another one. This entry is still kept for consistency purposes and will be 3498 * to another one. This entry is still kept for consistency purposes and will be
3464 * deleted later by a DEL or because of timeout 3499 * deleted later by a DEL or because of timeout
3465 */ 3500 */
@@ -3474,7 +3509,7 @@ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv,
3474 goto out; 3509 goto out;
3475 3510
3476 ret = tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM; 3511 ret = tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM;
3477 batadv_tt_global_entry_free_ref(tt_global_entry); 3512 batadv_tt_global_entry_put(tt_global_entry);
3478out: 3513out:
3479 return ret; 3514 return ret;
3480} 3515}
@@ -3485,7 +3520,7 @@ out:
3485 * @addr: the mac address of the local client to query 3520 * @addr: the mac address of the local client to query
3486 * @vid: VLAN identifier 3521 * @vid: VLAN identifier
3487 * 3522 *
3488 * Returns true if the local client is known to be roaming (it is not served by 3523 * Return: true if the local client is known to be roaming (it is not served by
3489 * this node anymore) or not. If yes, the client is still present in the table 3524 * this node anymore) or not. If yes, the client is still present in the table
3490 * to keep the latter consistent with the node TTVN 3525 * to keep the latter consistent with the node TTVN
3491 */ 3526 */
@@ -3500,7 +3535,7 @@ bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv,
3500 goto out; 3535 goto out;
3501 3536
3502 ret = tt_local_entry->common.flags & BATADV_TT_CLIENT_ROAM; 3537 ret = tt_local_entry->common.flags & BATADV_TT_CLIENT_ROAM;
3503 batadv_tt_local_entry_free_ref(tt_local_entry); 3538 batadv_tt_local_entry_put(tt_local_entry);
3504out: 3539out:
3505 return ret; 3540 return ret;
3506} 3541}
@@ -3614,7 +3649,7 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
3614 * @tvlv_value: tvlv buffer containing the tt data 3649 * @tvlv_value: tvlv buffer containing the tt data
3615 * @tvlv_value_len: tvlv buffer length 3650 * @tvlv_value_len: tvlv buffer length
3616 * 3651 *
3617 * Returns NET_RX_DROP if the tt tvlv is to be re-routed, NET_RX_SUCCESS 3652 * Return: NET_RX_DROP if the tt tvlv is to be re-routed, NET_RX_SUCCESS
3618 * otherwise. 3653 * otherwise.
3619 */ 3654 */
3620static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, 3655static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
@@ -3695,7 +3730,7 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
3695 * @tvlv_value: tvlv buffer containing the tt data 3730 * @tvlv_value: tvlv buffer containing the tt data
3696 * @tvlv_value_len: tvlv buffer length 3731 * @tvlv_value_len: tvlv buffer length
3697 * 3732 *
3698 * Returns NET_RX_DROP if the tt roam tvlv is to be re-routed, NET_RX_SUCCESS 3733 * Return: NET_RX_DROP if the tt roam tvlv is to be re-routed, NET_RX_SUCCESS
3699 * otherwise. 3734 * otherwise.
3700 */ 3735 */
3701static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, 3736static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
@@ -3733,7 +3768,7 @@ static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
3733 3768
3734out: 3769out:
3735 if (orig_node) 3770 if (orig_node)
3736 batadv_orig_node_free_ref(orig_node); 3771 batadv_orig_node_put(orig_node);
3737 return NET_RX_SUCCESS; 3772 return NET_RX_SUCCESS;
3738} 3773}
3739 3774
@@ -3741,7 +3776,7 @@ out:
3741 * batadv_tt_init - initialise the translation table internals 3776 * batadv_tt_init - initialise the translation table internals
3742 * @bat_priv: the bat priv with all the soft interface information 3777 * @bat_priv: the bat priv with all the soft interface information
3743 * 3778 *
3744 * Return 0 on success or negative error number in case of failure. 3779 * Return: 0 on success or negative error number in case of failure.
3745 */ 3780 */
3746int batadv_tt_init(struct batadv_priv *bat_priv) 3781int batadv_tt_init(struct batadv_priv *bat_priv)
3747{ 3782{
@@ -3779,7 +3814,7 @@ int batadv_tt_init(struct batadv_priv *bat_priv)
3779 * @addr: the mac address of the client 3814 * @addr: the mac address of the client
3780 * @vid: the identifier of the VLAN where this client is connected 3815 * @vid: the identifier of the VLAN where this client is connected
3781 * 3816 *
3782 * Returns true if the client is marked with the TT_CLIENT_ISOLA flag, false 3817 * Return: true if the client is marked with the TT_CLIENT_ISOLA flag, false
3783 * otherwise 3818 * otherwise
3784 */ 3819 */
3785bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv, 3820bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
@@ -3794,7 +3829,7 @@ bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
3794 3829
3795 ret = tt->common.flags & BATADV_TT_CLIENT_ISOLA; 3830 ret = tt->common.flags & BATADV_TT_CLIENT_ISOLA;
3796 3831
3797 batadv_tt_global_entry_free_ref(tt); 3832 batadv_tt_global_entry_put(tt);
3798 3833
3799 return ret; 3834 return ret;
3800} 3835}
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index abd8e116e5fb..7c7e2c006bfe 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich, Antonio Quartulli 3 * Marek Lindner, Simon Wunderlich, Antonio Quartulli
4 * 4 *
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 3437b667a2cd..1e47fbe8bb7b 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -22,9 +22,11 @@
22#error only "main.h" can be included directly 22#error only "main.h" can be included directly
23#endif 23#endif
24 24
25#include <linux/average.h>
25#include <linux/bitops.h> 26#include <linux/bitops.h>
26#include <linux/compiler.h> 27#include <linux/compiler.h>
27#include <linux/if_ether.h> 28#include <linux/if_ether.h>
29#include <linux/kref.h>
28#include <linux/netdevice.h> 30#include <linux/netdevice.h>
29#include <linux/sched.h> /* for linux/wait.h */ 31#include <linux/sched.h> /* for linux/wait.h */
30#include <linux/spinlock.h> 32#include <linux/spinlock.h>
@@ -73,7 +75,7 @@ enum batadv_dhcp_recipient {
73#define BATADV_TT_SYNC_MASK 0x00F0 75#define BATADV_TT_SYNC_MASK 0x00F0
74 76
75/** 77/**
76 * struct batadv_hard_iface_bat_iv - per hard interface B.A.T.M.A.N. IV data 78 * struct batadv_hard_iface_bat_iv - per hard-interface B.A.T.M.A.N. IV data
77 * @ogm_buff: buffer holding the OGM packet 79 * @ogm_buff: buffer holding the OGM packet
78 * @ogm_buff_len: length of the OGM packet buffer 80 * @ogm_buff_len: length of the OGM packet buffer
79 * @ogm_seqno: OGM sequence number - used to identify each OGM 81 * @ogm_seqno: OGM sequence number - used to identify each OGM
@@ -85,6 +87,36 @@ struct batadv_hard_iface_bat_iv {
85}; 87};
86 88
87/** 89/**
90 * enum batadv_v_hard_iface_flags - interface flags useful to B.A.T.M.A.N. V
91 * @BATADV_FULL_DUPLEX: tells if the connection over this link is full-duplex
92 * @BATADV_WARNING_DEFAULT: tells whether we have warned the user that no
93 * throughput data is available for this interface and that default values are
94 * assumed.
95 */
96enum batadv_v_hard_iface_flags {
97 BATADV_FULL_DUPLEX = BIT(0),
98 BATADV_WARNING_DEFAULT = BIT(1),
99};
100
101/**
102 * struct batadv_hard_iface_bat_v - per hard-interface B.A.T.M.A.N. V data
103 * @elp_interval: time interval between two ELP transmissions
104 * @elp_seqno: current ELP sequence number
105 * @elp_skb: base skb containing the ELP message to send
106 * @elp_wq: workqueue used to schedule ELP transmissions
107 * @throughput_override: throughput override to disable link auto-detection
108 * @flags: interface specific flags
109 */
110struct batadv_hard_iface_bat_v {
111 atomic_t elp_interval;
112 atomic_t elp_seqno;
113 struct sk_buff *elp_skb;
114 struct delayed_work elp_wq;
115 atomic_t throughput_override;
116 u8 flags;
117};
118
119/**
88 * struct batadv_hard_iface - network device known to batman-adv 120 * struct batadv_hard_iface - network device known to batman-adv
89 * @list: list node for batadv_hardif_list 121 * @list: list node for batadv_hardif_list
90 * @if_num: identificator of the interface 122 * @if_num: identificator of the interface
@@ -97,8 +129,9 @@ struct batadv_hard_iface_bat_iv {
97 * batman-adv for this interface 129 * batman-adv for this interface
98 * @soft_iface: the batman-adv interface which uses this network interface 130 * @soft_iface: the batman-adv interface which uses this network interface
99 * @rcu: struct used for freeing in an RCU-safe manner 131 * @rcu: struct used for freeing in an RCU-safe manner
100 * @bat_iv: BATMAN IV specific per hard interface data 132 * @bat_iv: per hard-interface B.A.T.M.A.N. IV data
101 * @cleanup_work: work queue callback item for hard interface deinit 133 * @bat_v: per hard-interface B.A.T.M.A.N. V data
134 * @cleanup_work: work queue callback item for hard-interface deinit
102 * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs 135 * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
103 * @neigh_list: list of unique single hop neighbors via this interface 136 * @neigh_list: list of unique single hop neighbors via this interface
104 * @neigh_list_lock: lock protecting neigh_list 137 * @neigh_list_lock: lock protecting neigh_list
@@ -110,11 +143,14 @@ struct batadv_hard_iface {
110 struct net_device *net_dev; 143 struct net_device *net_dev;
111 u8 num_bcasts; 144 u8 num_bcasts;
112 struct kobject *hardif_obj; 145 struct kobject *hardif_obj;
113 atomic_t refcount; 146 struct kref refcount;
114 struct packet_type batman_adv_ptype; 147 struct packet_type batman_adv_ptype;
115 struct net_device *soft_iface; 148 struct net_device *soft_iface;
116 struct rcu_head rcu; 149 struct rcu_head rcu;
117 struct batadv_hard_iface_bat_iv bat_iv; 150 struct batadv_hard_iface_bat_iv bat_iv;
151#ifdef CONFIG_BATMAN_ADV_BATMAN_V
152 struct batadv_hard_iface_bat_v bat_v;
153#endif
118 struct work_struct cleanup_work; 154 struct work_struct cleanup_work;
119 struct dentry *debug_dir; 155 struct dentry *debug_dir;
120 struct hlist_head neigh_list; 156 struct hlist_head neigh_list;
@@ -125,10 +161,11 @@ struct batadv_hard_iface {
125/** 161/**
126 * struct batadv_orig_ifinfo - originator info per outgoing interface 162 * struct batadv_orig_ifinfo - originator info per outgoing interface
127 * @list: list node for orig_node::ifinfo_list 163 * @list: list node for orig_node::ifinfo_list
128 * @if_outgoing: pointer to outgoing hard interface 164 * @if_outgoing: pointer to outgoing hard-interface
129 * @router: router that should be used to reach this originator 165 * @router: router that should be used to reach this originator
130 * @last_real_seqno: last and best known sequence number 166 * @last_real_seqno: last and best known sequence number
131 * @last_ttl: ttl of last received packet 167 * @last_ttl: ttl of last received packet
168 * @last_seqno_forwarded: seqno of the OGM which was forwarded last
132 * @batman_seqno_reset: time when the batman seqno window was reset 169 * @batman_seqno_reset: time when the batman seqno window was reset
133 * @refcount: number of contexts the object is used 170 * @refcount: number of contexts the object is used
134 * @rcu: struct used for freeing in an RCU-safe manner 171 * @rcu: struct used for freeing in an RCU-safe manner
@@ -139,8 +176,9 @@ struct batadv_orig_ifinfo {
139 struct batadv_neigh_node __rcu *router; /* rcu protected pointer */ 176 struct batadv_neigh_node __rcu *router; /* rcu protected pointer */
140 u32 last_real_seqno; 177 u32 last_real_seqno;
141 u8 last_ttl; 178 u8 last_ttl;
179 u32 last_seqno_forwarded;
142 unsigned long batman_seqno_reset; 180 unsigned long batman_seqno_reset;
143 atomic_t refcount; 181 struct kref refcount;
144 struct rcu_head rcu; 182 struct rcu_head rcu;
145}; 183};
146 184
@@ -196,13 +234,13 @@ struct batadv_orig_node_vlan {
196 unsigned short vid; 234 unsigned short vid;
197 struct batadv_vlan_tt tt; 235 struct batadv_vlan_tt tt;
198 struct hlist_node list; 236 struct hlist_node list;
199 atomic_t refcount; 237 struct kref refcount;
200 struct rcu_head rcu; 238 struct rcu_head rcu;
201}; 239};
202 240
203/** 241/**
204 * struct batadv_orig_bat_iv - B.A.T.M.A.N. IV private orig_node members 242 * struct batadv_orig_bat_iv - B.A.T.M.A.N. IV private orig_node members
205 * @bcast_own: set of bitfields (one per hard interface) where each one counts 243 * @bcast_own: set of bitfields (one per hard-interface) where each one counts
206 * the number of our OGMs this orig_node rebroadcasted "back" to us (relative 244 * the number of our OGMs this orig_node rebroadcasted "back" to us (relative
207 * to last_real_seqno). Every bitfield is BATADV_TQ_LOCAL_WINDOW_SIZE bits long. 245 * to last_real_seqno). Every bitfield is BATADV_TQ_LOCAL_WINDOW_SIZE bits long.
208 * @bcast_own_sum: sum of bcast_own 246 * @bcast_own_sum: sum of bcast_own
@@ -298,7 +336,7 @@ struct batadv_orig_node {
298 struct batadv_priv *bat_priv; 336 struct batadv_priv *bat_priv;
299 /* bcast_seqno_lock protects: bcast_bits & last_bcast_seqno */ 337 /* bcast_seqno_lock protects: bcast_bits & last_bcast_seqno */
300 spinlock_t bcast_seqno_lock; 338 spinlock_t bcast_seqno_lock;
301 atomic_t refcount; 339 struct kref refcount;
302 struct rcu_head rcu; 340 struct rcu_head rcu;
303#ifdef CONFIG_BATMAN_ADV_NC 341#ifdef CONFIG_BATMAN_ADV_NC
304 struct list_head in_coding_list; 342 struct list_head in_coding_list;
@@ -341,15 +379,36 @@ struct batadv_gw_node {
341 struct batadv_orig_node *orig_node; 379 struct batadv_orig_node *orig_node;
342 u32 bandwidth_down; 380 u32 bandwidth_down;
343 u32 bandwidth_up; 381 u32 bandwidth_up;
344 atomic_t refcount; 382 struct kref refcount;
345 struct rcu_head rcu; 383 struct rcu_head rcu;
346}; 384};
347 385
386DECLARE_EWMA(throughput, 1024, 8)
387
388/**
389 * struct batadv_hardif_neigh_node_bat_v - B.A.T.M.A.N. V private neighbor
390 * information
391 * @throughput: ewma link throughput towards this neighbor
392 * @elp_interval: time interval between two ELP transmissions
393 * @elp_latest_seqno: latest and best known ELP sequence number
394 * @last_unicast_tx: when the last unicast packet has been sent to this neighbor
395 * @metric_work: work queue callback item for metric update
396 */
397struct batadv_hardif_neigh_node_bat_v {
398 struct ewma_throughput throughput;
399 u32 elp_interval;
400 u32 elp_latest_seqno;
401 unsigned long last_unicast_tx;
402 struct work_struct metric_work;
403};
404
348/** 405/**
349 * batadv_hardif_neigh_node - unique neighbor per hard interface 406 * struct batadv_hardif_neigh_node - unique neighbor per hard-interface
350 * @list: list node for batadv_hard_iface::neigh_list 407 * @list: list node for batadv_hard_iface::neigh_list
351 * @addr: the MAC address of the neighboring interface 408 * @addr: the MAC address of the neighboring interface
352 * @if_incoming: pointer to incoming hard interface 409 * @if_incoming: pointer to incoming hard-interface
410 * @last_seen: when last packet via this neighbor was received
411 * @bat_v: B.A.T.M.A.N. V private data
353 * @refcount: number of contexts the object is used 412 * @refcount: number of contexts the object is used
354 * @rcu: struct used for freeing in a RCU-safe manner 413 * @rcu: struct used for freeing in a RCU-safe manner
355 */ 414 */
@@ -358,7 +417,10 @@ struct batadv_hardif_neigh_node {
358 u8 addr[ETH_ALEN]; 417 u8 addr[ETH_ALEN];
359 struct batadv_hard_iface *if_incoming; 418 struct batadv_hard_iface *if_incoming;
360 unsigned long last_seen; 419 unsigned long last_seen;
361 atomic_t refcount; 420#ifdef CONFIG_BATMAN_ADV_BATMAN_V
421 struct batadv_hardif_neigh_node_bat_v bat_v;
422#endif
423 struct kref refcount;
362 struct rcu_head rcu; 424 struct rcu_head rcu;
363}; 425};
364 426
@@ -369,8 +431,9 @@ struct batadv_hardif_neigh_node {
369 * @addr: the MAC address of the neighboring interface 431 * @addr: the MAC address of the neighboring interface
370 * @ifinfo_list: list for routing metrics per outgoing interface 432 * @ifinfo_list: list for routing metrics per outgoing interface
371 * @ifinfo_lock: lock protecting private ifinfo members and list 433 * @ifinfo_lock: lock protecting private ifinfo members and list
372 * @if_incoming: pointer to incoming hard interface 434 * @if_incoming: pointer to incoming hard-interface
373 * @last_seen: when last packet via this neighbor was received 435 * @last_seen: when last packet via this neighbor was received
436 * @hardif_neigh: hardif_neigh of this neighbor
374 * @refcount: number of contexts the object is used 437 * @refcount: number of contexts the object is used
375 * @rcu: struct used for freeing in an RCU-safe manner 438 * @rcu: struct used for freeing in an RCU-safe manner
376 */ 439 */
@@ -382,13 +445,14 @@ struct batadv_neigh_node {
382 spinlock_t ifinfo_lock; /* protects ifinfo_list and its members */ 445 spinlock_t ifinfo_lock; /* protects ifinfo_list and its members */
383 struct batadv_hard_iface *if_incoming; 446 struct batadv_hard_iface *if_incoming;
384 unsigned long last_seen; 447 unsigned long last_seen;
385 atomic_t refcount; 448 struct batadv_hardif_neigh_node *hardif_neigh;
449 struct kref refcount;
386 struct rcu_head rcu; 450 struct rcu_head rcu;
387}; 451};
388 452
389/** 453/**
390 * struct batadv_neigh_ifinfo_bat_iv - neighbor information per outgoing 454 * struct batadv_neigh_ifinfo_bat_iv - neighbor information per outgoing
391 * interface for BATMAN IV 455 * interface for B.A.T.M.A.N. IV
392 * @tq_recv: ring buffer of received TQ values from this neigh node 456 * @tq_recv: ring buffer of received TQ values from this neigh node
393 * @tq_index: ring buffer index 457 * @tq_index: ring buffer index
394 * @tq_avg: averaged tq of all tq values in the ring buffer (tq_recv) 458 * @tq_avg: averaged tq of all tq values in the ring buffer (tq_recv)
@@ -405,10 +469,22 @@ struct batadv_neigh_ifinfo_bat_iv {
405}; 469};
406 470
407/** 471/**
472 * struct batadv_neigh_ifinfo_bat_v - neighbor information per outgoing
473 * interface for B.A.T.M.A.N. V
474 * @throughput: last throughput metric received from originator via this neigh
475 * @last_seqno: last sequence number known for this neighbor
476 */
477struct batadv_neigh_ifinfo_bat_v {
478 u32 throughput;
479 u32 last_seqno;
480};
481
482/**
408 * struct batadv_neigh_ifinfo - neighbor information per outgoing interface 483 * struct batadv_neigh_ifinfo - neighbor information per outgoing interface
409 * @list: list node for batadv_neigh_node::ifinfo_list 484 * @list: list node for batadv_neigh_node::ifinfo_list
410 * @if_outgoing: pointer to outgoing hard interface 485 * @if_outgoing: pointer to outgoing hard-interface
411 * @bat_iv: B.A.T.M.A.N. IV private structure 486 * @bat_iv: B.A.T.M.A.N. IV private structure
487 * @bat_v: B.A.T.M.A.N. V private data
412 * @last_ttl: last received ttl from this neigh node 488 * @last_ttl: last received ttl from this neigh node
413 * @refcount: number of contexts the object is used 489 * @refcount: number of contexts the object is used
414 * @rcu: struct used for freeing in a RCU-safe manner 490 * @rcu: struct used for freeing in a RCU-safe manner
@@ -417,8 +493,11 @@ struct batadv_neigh_ifinfo {
417 struct hlist_node list; 493 struct hlist_node list;
418 struct batadv_hard_iface *if_outgoing; 494 struct batadv_hard_iface *if_outgoing;
419 struct batadv_neigh_ifinfo_bat_iv bat_iv; 495 struct batadv_neigh_ifinfo_bat_iv bat_iv;
496#ifdef CONFIG_BATMAN_ADV_BATMAN_V
497 struct batadv_neigh_ifinfo_bat_v bat_v;
498#endif
420 u8 last_ttl; 499 u8 last_ttl;
421 atomic_t refcount; 500 struct kref refcount;
422 struct rcu_head rcu; 501 struct rcu_head rcu;
423}; 502};
424 503
@@ -744,11 +823,25 @@ struct batadv_softif_vlan {
744 atomic_t ap_isolation; /* boolean */ 823 atomic_t ap_isolation; /* boolean */
745 struct batadv_vlan_tt tt; 824 struct batadv_vlan_tt tt;
746 struct hlist_node list; 825 struct hlist_node list;
747 atomic_t refcount; 826 struct kref refcount;
748 struct rcu_head rcu; 827 struct rcu_head rcu;
749}; 828};
750 829
751/** 830/**
831 * struct batadv_priv_bat_v - B.A.T.M.A.N. V per soft-interface private data
832 * @ogm_buff: buffer holding the OGM packet
833 * @ogm_buff_len: length of the OGM packet buffer
834 * @ogm_seqno: OGM sequence number - used to identify each OGM
835 * @ogm_wq: workqueue used to schedule OGM transmissions
836 */
837struct batadv_priv_bat_v {
838 unsigned char *ogm_buff;
839 int ogm_buff_len;
840 atomic_t ogm_seqno;
841 struct delayed_work ogm_wq;
842};
843
844/**
752 * struct batadv_priv - per mesh interface data 845 * struct batadv_priv - per mesh interface data
753 * @mesh_state: current status of the mesh (inactive/active/deactivating) 846 * @mesh_state: current status of the mesh (inactive/active/deactivating)
754 * @soft_iface: net device which holds this struct as private data 847 * @soft_iface: net device which holds this struct as private data
@@ -771,6 +864,9 @@ struct batadv_softif_vlan {
771 * @orig_interval: OGM broadcast interval in milliseconds 864 * @orig_interval: OGM broadcast interval in milliseconds
772 * @hop_penalty: penalty which will be applied to an OGM's tq-field on every hop 865 * @hop_penalty: penalty which will be applied to an OGM's tq-field on every hop
773 * @log_level: configured log level (see batadv_dbg_level) 866 * @log_level: configured log level (see batadv_dbg_level)
867 * @isolation_mark: the skb->mark value used to match packets for AP isolation
868 * @isolation_mark_mask: bitmask identifying the bits in skb->mark to be used
869 * for the isolation mark
774 * @bcast_seqno: last sent broadcast packet sequence number 870 * @bcast_seqno: last sent broadcast packet sequence number
775 * @bcast_queue_left: number of remaining buffered broadcast packet slots 871 * @bcast_queue_left: number of remaining buffered broadcast packet slots
776 * @batman_queue_left: number of remaining OGM packet slots 872 * @batman_queue_left: number of remaining OGM packet slots
@@ -783,8 +879,8 @@ struct batadv_softif_vlan {
783 * @forw_bat_list_lock: lock protecting forw_bat_list 879 * @forw_bat_list_lock: lock protecting forw_bat_list
784 * @forw_bcast_list_lock: lock protecting forw_bcast_list 880 * @forw_bcast_list_lock: lock protecting forw_bcast_list
785 * @orig_work: work queue callback item for orig node purging 881 * @orig_work: work queue callback item for orig node purging
786 * @cleanup_work: work queue callback item for soft interface deinit 882 * @cleanup_work: work queue callback item for soft-interface deinit
787 * @primary_if: one of the hard interfaces assigned to this mesh interface 883 * @primary_if: one of the hard-interfaces assigned to this mesh interface
788 * becomes the primary interface 884 * becomes the primary interface
789 * @bat_algo_ops: routing algorithm used by this mesh interface 885 * @bat_algo_ops: routing algorithm used by this mesh interface
790 * @softif_vlan_list: a list of softif_vlan structs, one per VLAN created on top 886 * @softif_vlan_list: a list of softif_vlan structs, one per VLAN created on top
@@ -799,6 +895,7 @@ struct batadv_softif_vlan {
799 * @mcast: multicast data 895 * @mcast: multicast data
800 * @network_coding: bool indicating whether network coding is enabled 896 * @network_coding: bool indicating whether network coding is enabled
801 * @nc: network coding data 897 * @nc: network coding data
898 * @bat_v: B.A.T.M.A.N. V per soft-interface private data
802 */ 899 */
803struct batadv_priv { 900struct batadv_priv {
804 atomic_t mesh_state; 901 atomic_t mesh_state;
@@ -864,6 +961,9 @@ struct batadv_priv {
864 atomic_t network_coding; 961 atomic_t network_coding;
865 struct batadv_priv_nc nc; 962 struct batadv_priv_nc nc;
866#endif /* CONFIG_BATMAN_ADV_NC */ 963#endif /* CONFIG_BATMAN_ADV_NC */
964#ifdef CONFIG_BATMAN_ADV_BATMAN_V
965 struct batadv_priv_bat_v bat_v;
966#endif
867}; 967};
868 968
869/** 969/**
@@ -925,7 +1025,7 @@ struct batadv_bla_backbone_gw {
925 atomic_t request_sent; 1025 atomic_t request_sent;
926 u16 crc; 1026 u16 crc;
927 spinlock_t crc_lock; /* protects crc */ 1027 spinlock_t crc_lock; /* protects crc */
928 atomic_t refcount; 1028 struct kref refcount;
929 struct rcu_head rcu; 1029 struct rcu_head rcu;
930}; 1030};
931 1031
@@ -946,7 +1046,7 @@ struct batadv_bla_claim {
946 unsigned long lasttime; 1046 unsigned long lasttime;
947 struct hlist_node hash_entry; 1047 struct hlist_node hash_entry;
948 struct rcu_head rcu; 1048 struct rcu_head rcu;
949 atomic_t refcount; 1049 struct kref refcount;
950}; 1050};
951#endif 1051#endif
952 1052
@@ -967,7 +1067,7 @@ struct batadv_tt_common_entry {
967 struct hlist_node hash_entry; 1067 struct hlist_node hash_entry;
968 u16 flags; 1068 u16 flags;
969 unsigned long added_at; 1069 unsigned long added_at;
970 atomic_t refcount; 1070 struct kref refcount;
971 struct rcu_head rcu; 1071 struct rcu_head rcu;
972}; 1072};
973 1073
@@ -975,10 +1075,12 @@ struct batadv_tt_common_entry {
975 * struct batadv_tt_local_entry - translation table local entry data 1075 * struct batadv_tt_local_entry - translation table local entry data
976 * @common: general translation table data 1076 * @common: general translation table data
977 * @last_seen: timestamp used for purging stale tt local entries 1077 * @last_seen: timestamp used for purging stale tt local entries
1078 * @vlan: soft-interface vlan of the entry
978 */ 1079 */
979struct batadv_tt_local_entry { 1080struct batadv_tt_local_entry {
980 struct batadv_tt_common_entry common; 1081 struct batadv_tt_common_entry common;
981 unsigned long last_seen; 1082 unsigned long last_seen;
1083 struct batadv_softif_vlan *vlan;
982}; 1084};
983 1085
984/** 1086/**
@@ -1009,7 +1111,7 @@ struct batadv_tt_orig_list_entry {
1009 struct batadv_orig_node *orig_node; 1111 struct batadv_orig_node *orig_node;
1010 u8 ttvn; 1112 u8 ttvn;
1011 struct hlist_node list; 1113 struct hlist_node list;
1012 atomic_t refcount; 1114 struct kref refcount;
1013 struct rcu_head rcu; 1115 struct rcu_head rcu;
1014}; 1116};
1015 1117
@@ -1062,7 +1164,7 @@ struct batadv_tt_roam_node {
1062struct batadv_nc_node { 1164struct batadv_nc_node {
1063 struct list_head list; 1165 struct list_head list;
1064 u8 addr[ETH_ALEN]; 1166 u8 addr[ETH_ALEN];
1065 atomic_t refcount; 1167 struct kref refcount;
1066 struct rcu_head rcu; 1168 struct rcu_head rcu;
1067 struct batadv_orig_node *orig_node; 1169 struct batadv_orig_node *orig_node;
1068 unsigned long last_seen; 1170 unsigned long last_seen;
@@ -1082,7 +1184,7 @@ struct batadv_nc_node {
1082struct batadv_nc_path { 1184struct batadv_nc_path {
1083 struct hlist_node hash_entry; 1185 struct hlist_node hash_entry;
1084 struct rcu_head rcu; 1186 struct rcu_head rcu;
1085 atomic_t refcount; 1187 struct kref refcount;
1086 struct list_head packet_list; 1188 struct list_head packet_list;
1087 spinlock_t packet_list_lock; /* Protects packet_list */ 1189 spinlock_t packet_list_lock; /* Protects packet_list */
1088 u8 next_hop[ETH_ALEN]; 1190 u8 next_hop[ETH_ALEN];
@@ -1152,6 +1254,8 @@ struct batadv_forw_packet {
1152 * struct batadv_algo_ops - mesh algorithm callbacks 1254 * struct batadv_algo_ops - mesh algorithm callbacks
1153 * @list: list node for the batadv_algo_list 1255 * @list: list node for the batadv_algo_list
1154 * @name: name of the algorithm 1256 * @name: name of the algorithm
1257 * @bat_iface_activate: start routing mechanisms when hard-interface is brought
1258 * up
1155 * @bat_iface_enable: init routing info when hard-interface is enabled 1259 * @bat_iface_enable: init routing info when hard-interface is enabled
1156 * @bat_iface_disable: de-init routing info when hard-interface is disabled 1260 * @bat_iface_disable: de-init routing info when hard-interface is disabled
1157 * @bat_iface_update_mac: (re-)init mac addresses of the protocol information 1261 * @bat_iface_update_mac: (re-)init mac addresses of the protocol information
@@ -1179,6 +1283,7 @@ struct batadv_forw_packet {
1179struct batadv_algo_ops { 1283struct batadv_algo_ops {
1180 struct hlist_node list; 1284 struct hlist_node list;
1181 char *name; 1285 char *name;
1286 void (*bat_iface_activate)(struct batadv_hard_iface *hard_iface);
1182 int (*bat_iface_enable)(struct batadv_hard_iface *hard_iface); 1287 int (*bat_iface_enable)(struct batadv_hard_iface *hard_iface);
1183 void (*bat_iface_disable)(struct batadv_hard_iface *hard_iface); 1288 void (*bat_iface_disable)(struct batadv_hard_iface *hard_iface);
1184 void (*bat_iface_update_mac)(struct batadv_hard_iface *hard_iface); 1289 void (*bat_iface_update_mac)(struct batadv_hard_iface *hard_iface);
@@ -1225,7 +1330,7 @@ struct batadv_dat_entry {
1225 unsigned short vid; 1330 unsigned short vid;
1226 unsigned long last_update; 1331 unsigned long last_update;
1227 struct hlist_node hash_entry; 1332 struct hlist_node hash_entry;
1228 atomic_t refcount; 1333 struct kref refcount;
1229 struct rcu_head rcu; 1334 struct rcu_head rcu;
1230}; 1335};
1231 1336
@@ -1261,7 +1366,7 @@ struct batadv_dat_candidate {
1261struct batadv_tvlv_container { 1366struct batadv_tvlv_container {
1262 struct hlist_node list; 1367 struct hlist_node list;
1263 struct batadv_tvlv_hdr tvlv_hdr; 1368 struct batadv_tvlv_hdr tvlv_hdr;
1264 atomic_t refcount; 1369 struct kref refcount;
1265}; 1370};
1266 1371
1267/** 1372/**
@@ -1288,7 +1393,7 @@ struct batadv_tvlv_handler {
1288 u8 type; 1393 u8 type;
1289 u8 version; 1394 u8 version;
1290 u8 flags; 1395 u8 flags;
1291 atomic_t refcount; 1396 struct kref refcount;
1292 struct rcu_head rcu; 1397 struct rcu_head rcu;
1293}; 1398};
1294 1399
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 95d1a66ba03a..06c31b9a68b0 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -69,6 +69,15 @@ config BT_6LOWPAN
69 help 69 help
70 IPv6 compression over Bluetooth Low Energy. 70 IPv6 compression over Bluetooth Low Energy.
71 71
72config BT_LEDS
73 bool "Enable LED triggers"
74 depends on BT
75 depends on LEDS_CLASS
76 select LEDS_TRIGGERS
77 help
78 This option selects a few LED triggers for different
79 Bluetooth events.
80
72config BT_SELFTEST 81config BT_SELFTEST
73 bool "Bluetooth self testing support" 82 bool "Bluetooth self testing support"
74 depends on BT && DEBUG_KERNEL 83 depends on BT && DEBUG_KERNEL
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 2b15ae8c1def..b3ff12eb9b6d 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -17,6 +17,7 @@ bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \
17 17
18bluetooth-$(CONFIG_BT_BREDR) += sco.o 18bluetooth-$(CONFIG_BT_BREDR) += sco.o
19bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o 19bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o
20bluetooth-$(CONFIG_BT_LEDS) += leds.o
20bluetooth-$(CONFIG_BT_DEBUGFS) += hci_debugfs.o 21bluetooth-$(CONFIG_BT_DEBUGFS) += hci_debugfs.o
21bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o 22bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o
22 23
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 32575b49f4a0..bf9f8a801a2e 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -719,6 +719,13 @@ done:
719 hci_dev_unlock(hdev); 719 hci_dev_unlock(hdev);
720} 720}
721 721
722static bool conn_use_rpa(struct hci_conn *conn)
723{
724 struct hci_dev *hdev = conn->hdev;
725
726 return hci_dev_test_flag(hdev, HCI_PRIVACY);
727}
728
722static void hci_req_add_le_create_conn(struct hci_request *req, 729static void hci_req_add_le_create_conn(struct hci_request *req,
723 struct hci_conn *conn) 730 struct hci_conn *conn)
724{ 731{
@@ -726,14 +733,15 @@ static void hci_req_add_le_create_conn(struct hci_request *req,
726 struct hci_dev *hdev = conn->hdev; 733 struct hci_dev *hdev = conn->hdev;
727 u8 own_addr_type; 734 u8 own_addr_type;
728 735
729 memset(&cp, 0, sizeof(cp));
730
731 /* Update random address, but set require_privacy to false so 736 /* Update random address, but set require_privacy to false so
732 * that we never connect with an non-resolvable address. 737 * that we never connect with an non-resolvable address.
733 */ 738 */
734 if (hci_update_random_address(req, false, &own_addr_type)) 739 if (hci_update_random_address(req, false, conn_use_rpa(conn),
740 &own_addr_type))
735 return; 741 return;
736 742
743 memset(&cp, 0, sizeof(cp));
744
737 /* Set window to be the same value as the interval to enable 745 /* Set window to be the same value as the interval to enable
738 * continuous scanning. 746 * continuous scanning.
739 */ 747 */
@@ -774,7 +782,8 @@ static void hci_req_directed_advertising(struct hci_request *req,
774 /* Set require_privacy to false so that the remote device has a 782 /* Set require_privacy to false so that the remote device has a
775 * chance of identifying us. 783 * chance of identifying us.
776 */ 784 */
777 if (hci_update_random_address(req, false, &own_addr_type) < 0) 785 if (hci_update_random_address(req, false, conn_use_rpa(conn),
786 &own_addr_type) < 0)
778 return; 787 return;
779 788
780 memset(&cp, 0, sizeof(cp)); 789 memset(&cp, 0, sizeof(cp));
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 883c821a9e78..2713fc86e85a 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -40,6 +40,7 @@
40#include "hci_request.h" 40#include "hci_request.h"
41#include "hci_debugfs.h" 41#include "hci_debugfs.h"
42#include "smp.h" 42#include "smp.h"
43#include "leds.h"
43 44
44static void hci_rx_work(struct work_struct *work); 45static void hci_rx_work(struct work_struct *work);
45static void hci_cmd_work(struct work_struct *work); 46static void hci_cmd_work(struct work_struct *work);
@@ -1395,6 +1396,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
1395 hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); 1396 hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
1396 set_bit(HCI_UP, &hdev->flags); 1397 set_bit(HCI_UP, &hdev->flags);
1397 hci_sock_dev_event(hdev, HCI_DEV_UP); 1398 hci_sock_dev_event(hdev, HCI_DEV_UP);
1399 hci_leds_update_powered(hdev, true);
1398 if (!hci_dev_test_flag(hdev, HCI_SETUP) && 1400 if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
1399 !hci_dev_test_flag(hdev, HCI_CONFIG) && 1401 !hci_dev_test_flag(hdev, HCI_CONFIG) &&
1400 !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && 1402 !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
@@ -1532,6 +1534,8 @@ int hci_dev_do_close(struct hci_dev *hdev)
1532 return 0; 1534 return 0;
1533 } 1535 }
1534 1536
1537 hci_leds_update_powered(hdev, false);
1538
1535 /* Flush RX and TX works */ 1539 /* Flush RX and TX works */
1536 flush_work(&hdev->tx_work); 1540 flush_work(&hdev->tx_work);
1537 flush_work(&hdev->rx_work); 1541 flush_work(&hdev->rx_work);
@@ -2017,6 +2021,7 @@ static void hci_power_on(struct work_struct *work)
2017 if (test_bit(HCI_UP, &hdev->flags) && 2021 if (test_bit(HCI_UP, &hdev->flags) &&
2018 hci_dev_test_flag(hdev, HCI_MGMT) && 2022 hci_dev_test_flag(hdev, HCI_MGMT) &&
2019 hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) { 2023 hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) {
2024 cancel_delayed_work(&hdev->power_off);
2020 hci_req_sync_lock(hdev); 2025 hci_req_sync_lock(hdev);
2021 err = __hci_req_hci_power_on(hdev); 2026 err = __hci_req_hci_power_on(hdev);
2022 hci_req_sync_unlock(hdev); 2027 hci_req_sync_unlock(hdev);
@@ -3067,6 +3072,8 @@ int hci_register_dev(struct hci_dev *hdev)
3067 if (error < 0) 3072 if (error < 0)
3068 goto err_wqueue; 3073 goto err_wqueue;
3069 3074
3075 hci_leds_init(hdev);
3076
3070 hdev->rfkill = rfkill_alloc(hdev->name, &hdev->dev, 3077 hdev->rfkill = rfkill_alloc(hdev->name, &hdev->dev,
3071 RFKILL_TYPE_BLUETOOTH, &hci_rfkill_ops, 3078 RFKILL_TYPE_BLUETOOTH, &hci_rfkill_ops,
3072 hdev); 3079 hdev);
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index c78ee2dc9323..6e125d76df0d 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -771,6 +771,11 @@ static u8 update_white_list(struct hci_request *req)
771 return 0x01; 771 return 0x01;
772} 772}
773 773
774static bool scan_use_rpa(struct hci_dev *hdev)
775{
776 return hci_dev_test_flag(hdev, HCI_PRIVACY);
777}
778
774void hci_req_add_le_passive_scan(struct hci_request *req) 779void hci_req_add_le_passive_scan(struct hci_request *req)
775{ 780{
776 struct hci_cp_le_set_scan_param param_cp; 781 struct hci_cp_le_set_scan_param param_cp;
@@ -785,7 +790,8 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
785 * advertising with our address will be correctly reported 790 * advertising with our address will be correctly reported
786 * by the controller. 791 * by the controller.
787 */ 792 */
788 if (hci_update_random_address(req, false, &own_addr_type)) 793 if (hci_update_random_address(req, false, scan_use_rpa(hdev),
794 &own_addr_type))
789 return; 795 return;
790 796
791 /* Adding or removing entries from the white list must 797 /* Adding or removing entries from the white list must
@@ -866,6 +872,11 @@ static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance)
866 if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) 872 if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE))
867 flags |= MGMT_ADV_FLAG_CONNECTABLE; 873 flags |= MGMT_ADV_FLAG_CONNECTABLE;
868 874
875 if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE))
876 flags |= MGMT_ADV_FLAG_LIMITED_DISCOV;
877 else if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE))
878 flags |= MGMT_ADV_FLAG_DISCOV;
879
869 return flags; 880 return flags;
870 } 881 }
871 882
@@ -878,6 +889,29 @@ static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance)
878 return adv_instance->flags; 889 return adv_instance->flags;
879} 890}
880 891
892static bool adv_use_rpa(struct hci_dev *hdev, uint32_t flags)
893{
894 /* If privacy is not enabled don't use RPA */
895 if (!hci_dev_test_flag(hdev, HCI_PRIVACY))
896 return false;
897
898 /* If basic privacy mode is enabled use RPA */
899 if (!hci_dev_test_flag(hdev, HCI_LIMITED_PRIVACY))
900 return true;
901
902 /* If limited privacy mode is enabled don't use RPA if we're
903 * both discoverable and bondable.
904 */
905 if ((flags & MGMT_ADV_FLAG_DISCOV) &&
906 hci_dev_test_flag(hdev, HCI_BONDABLE))
907 return false;
908
909 /* We're neither bondable nor discoverable in the limited
910 * privacy mode, therefore use RPA.
911 */
912 return true;
913}
914
881void __hci_req_enable_advertising(struct hci_request *req) 915void __hci_req_enable_advertising(struct hci_request *req)
882{ 916{
883 struct hci_dev *hdev = req->hdev; 917 struct hci_dev *hdev = req->hdev;
@@ -911,7 +945,9 @@ void __hci_req_enable_advertising(struct hci_request *req)
911 * advertising is used. In that case it is fine to use a 945 * advertising is used. In that case it is fine to use a
912 * non-resolvable private address. 946 * non-resolvable private address.
913 */ 947 */
914 if (hci_update_random_address(req, !connectable, &own_addr_type) < 0) 948 if (hci_update_random_address(req, !connectable,
949 adv_use_rpa(hdev, flags),
950 &own_addr_type) < 0)
915 return; 951 return;
916 952
917 memset(&cp, 0, sizeof(cp)); 953 memset(&cp, 0, sizeof(cp));
@@ -1325,7 +1361,7 @@ static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
1325} 1361}
1326 1362
1327int hci_update_random_address(struct hci_request *req, bool require_privacy, 1363int hci_update_random_address(struct hci_request *req, bool require_privacy,
1328 u8 *own_addr_type) 1364 bool use_rpa, u8 *own_addr_type)
1329{ 1365{
1330 struct hci_dev *hdev = req->hdev; 1366 struct hci_dev *hdev = req->hdev;
1331 int err; 1367 int err;
@@ -1334,7 +1370,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
1334 * current RPA has expired or there is something else than 1370 * current RPA has expired or there is something else than
1335 * the current RPA in use, then generate a new one. 1371 * the current RPA in use, then generate a new one.
1336 */ 1372 */
1337 if (hci_dev_test_flag(hdev, HCI_PRIVACY)) { 1373 if (use_rpa) {
1338 int to; 1374 int to;
1339 1375
1340 *own_addr_type = ADDR_LE_DEV_RANDOM; 1376 *own_addr_type = ADDR_LE_DEV_RANDOM;
@@ -1596,9 +1632,16 @@ static int discoverable_update(struct hci_request *req, unsigned long opt)
1596 /* Advertising instances don't use the global discoverable setting, so 1632 /* Advertising instances don't use the global discoverable setting, so
1597 * only update AD if advertising was enabled using Set Advertising. 1633 * only update AD if advertising was enabled using Set Advertising.
1598 */ 1634 */
1599 if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) 1635 if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) {
1600 __hci_req_update_adv_data(req, 0x00); 1636 __hci_req_update_adv_data(req, 0x00);
1601 1637
1638 /* Discoverable mode affects the local advertising
1639 * address in limited privacy mode.
1640 */
1641 if (hci_dev_test_flag(hdev, HCI_LIMITED_PRIVACY))
1642 __hci_req_enable_advertising(req);
1643 }
1644
1602 hci_dev_unlock(hdev); 1645 hci_dev_unlock(hdev);
1603 1646
1604 return 0; 1647 return 0;
@@ -1941,7 +1984,8 @@ static int active_scan(struct hci_request *req, unsigned long opt)
1941 * address (when privacy feature has been enabled) or non-resolvable 1984 * address (when privacy feature has been enabled) or non-resolvable
1942 * private address. 1985 * private address.
1943 */ 1986 */
1944 err = hci_update_random_address(req, true, &own_addr_type); 1987 err = hci_update_random_address(req, true, scan_use_rpa(hdev),
1988 &own_addr_type);
1945 if (err < 0) 1989 if (err < 0)
1946 own_addr_type = ADDR_LE_DEV_PUBLIC; 1990 own_addr_type = ADDR_LE_DEV_PUBLIC;
1947 1991
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index 64ff8c040d50..b2d044bdc732 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -89,7 +89,7 @@ static inline void hci_req_update_scan(struct hci_dev *hdev)
89void __hci_req_update_scan(struct hci_request *req); 89void __hci_req_update_scan(struct hci_request *req);
90 90
91int hci_update_random_address(struct hci_request *req, bool require_privacy, 91int hci_update_random_address(struct hci_request *req, bool require_privacy,
92 u8 *own_addr_type); 92 bool use_rpa, u8 *own_addr_type);
93 93
94int hci_abort_conn(struct hci_conn *conn, u8 reason); 94int hci_abort_conn(struct hci_conn *conn, u8 reason);
95void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, 95void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn,
diff --git a/net/bluetooth/leds.c b/net/bluetooth/leds.c
new file mode 100644
index 000000000000..8319c8440c89
--- /dev/null
+++ b/net/bluetooth/leds.c
@@ -0,0 +1,74 @@
1/*
2 * Copyright 2015, Heiner Kallweit <hkallweit1@gmail.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <net/bluetooth/bluetooth.h>
10#include <net/bluetooth/hci_core.h>
11
12#include "leds.h"
13
14struct hci_basic_led_trigger {
15 struct led_trigger led_trigger;
16 struct hci_dev *hdev;
17};
18
19#define to_hci_basic_led_trigger(arg) container_of(arg, \
20 struct hci_basic_led_trigger, led_trigger)
21
22void hci_leds_update_powered(struct hci_dev *hdev, bool enabled)
23{
24 if (hdev->power_led)
25 led_trigger_event(hdev->power_led,
26 enabled ? LED_FULL : LED_OFF);
27}
28
29static void power_activate(struct led_classdev *led_cdev)
30{
31 struct hci_basic_led_trigger *htrig;
32 bool powered;
33
34 htrig = to_hci_basic_led_trigger(led_cdev->trigger);
35 powered = test_bit(HCI_UP, &htrig->hdev->flags);
36
37 led_trigger_event(led_cdev->trigger, powered ? LED_FULL : LED_OFF);
38}
39
40static struct led_trigger *led_allocate_basic(struct hci_dev *hdev,
41 void (*activate)(struct led_classdev *led_cdev),
42 const char *name)
43{
44 struct hci_basic_led_trigger *htrig;
45
46 htrig = devm_kzalloc(&hdev->dev, sizeof(*htrig), GFP_KERNEL);
47 if (!htrig)
48 return NULL;
49
50 htrig->hdev = hdev;
51 htrig->led_trigger.activate = activate;
52 htrig->led_trigger.name = devm_kasprintf(&hdev->dev, GFP_KERNEL,
53 "%s-%s", hdev->name,
54 name);
55 if (!htrig->led_trigger.name)
56 goto err_alloc;
57
58 if (devm_led_trigger_register(&hdev->dev, &htrig->led_trigger))
59 goto err_register;
60
61 return &htrig->led_trigger;
62
63err_register:
64 devm_kfree(&hdev->dev, (void *)htrig->led_trigger.name);
65err_alloc:
66 devm_kfree(&hdev->dev, htrig);
67 return NULL;
68}
69
70void hci_leds_init(struct hci_dev *hdev)
71{
72 /* initialize power_led */
73 hdev->power_led = led_allocate_basic(hdev, power_activate, "power");
74}
diff --git a/net/bluetooth/leds.h b/net/bluetooth/leds.h
new file mode 100644
index 000000000000..a9c4d6ea01cf
--- /dev/null
+++ b/net/bluetooth/leds.h
@@ -0,0 +1,16 @@
1/*
2 * Copyright 2015, Heiner Kallweit <hkallweit1@gmail.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#if IS_ENABLED(CONFIG_BT_LEDS)
10void hci_leds_update_powered(struct hci_dev *hdev, bool enabled);
11void hci_leds_init(struct hci_dev *hdev);
12#else
13static inline void hci_leds_update_powered(struct hci_dev *hdev,
14 bool enabled) {}
15static inline void hci_leds_init(struct hci_dev *hdev) {}
16#endif
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 5a5089cb6570..9e4b931588cf 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -38,7 +38,7 @@
38#include "mgmt_util.h" 38#include "mgmt_util.h"
39 39
40#define MGMT_VERSION 1 40#define MGMT_VERSION 1
41#define MGMT_REVISION 11 41#define MGMT_REVISION 12
42 42
43static const u16 mgmt_commands[] = { 43static const u16 mgmt_commands[] = {
44 MGMT_OP_READ_INDEX_LIST, 44 MGMT_OP_READ_INDEX_LIST,
@@ -1382,8 +1382,19 @@ static int set_bondable(struct sock *sk, struct hci_dev *hdev, void *data,
1382 if (err < 0) 1382 if (err < 0)
1383 goto unlock; 1383 goto unlock;
1384 1384
1385 if (changed) 1385 if (changed) {
1386 /* In limited privacy mode the change of bondable mode
1387 * may affect the local advertising address.
1388 */
1389 if (hdev_is_powered(hdev) &&
1390 hci_dev_test_flag(hdev, HCI_ADVERTISING) &&
1391 hci_dev_test_flag(hdev, HCI_DISCOVERABLE) &&
1392 hci_dev_test_flag(hdev, HCI_LIMITED_PRIVACY))
1393 queue_work(hdev->req_workqueue,
1394 &hdev->discoverable_update);
1395
1386 err = new_settings(hdev, sk); 1396 err = new_settings(hdev, sk);
1397 }
1387 1398
1388unlock: 1399unlock:
1389 hci_dev_unlock(hdev); 1400 hci_dev_unlock(hdev);
@@ -4423,7 +4434,7 @@ static int set_privacy(struct sock *sk, struct hci_dev *hdev, void *cp_data,
4423 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY, 4434 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY,
4424 MGMT_STATUS_NOT_SUPPORTED); 4435 MGMT_STATUS_NOT_SUPPORTED);
4425 4436
4426 if (cp->privacy != 0x00 && cp->privacy != 0x01) 4437 if (cp->privacy != 0x00 && cp->privacy != 0x01 && cp->privacy != 0x02)
4427 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY, 4438 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY,
4428 MGMT_STATUS_INVALID_PARAMS); 4439 MGMT_STATUS_INVALID_PARAMS);
4429 4440
@@ -4442,10 +4453,15 @@ static int set_privacy(struct sock *sk, struct hci_dev *hdev, void *cp_data,
4442 changed = !hci_dev_test_and_set_flag(hdev, HCI_PRIVACY); 4453 changed = !hci_dev_test_and_set_flag(hdev, HCI_PRIVACY);
4443 memcpy(hdev->irk, cp->irk, sizeof(hdev->irk)); 4454 memcpy(hdev->irk, cp->irk, sizeof(hdev->irk));
4444 hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); 4455 hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
4456 if (cp->privacy == 0x02)
4457 hci_dev_set_flag(hdev, HCI_LIMITED_PRIVACY);
4458 else
4459 hci_dev_clear_flag(hdev, HCI_LIMITED_PRIVACY);
4445 } else { 4460 } else {
4446 changed = hci_dev_test_and_clear_flag(hdev, HCI_PRIVACY); 4461 changed = hci_dev_test_and_clear_flag(hdev, HCI_PRIVACY);
4447 memset(hdev->irk, 0, sizeof(hdev->irk)); 4462 memset(hdev->irk, 0, sizeof(hdev->irk));
4448 hci_dev_clear_flag(hdev, HCI_RPA_EXPIRED); 4463 hci_dev_clear_flag(hdev, HCI_RPA_EXPIRED);
4464 hci_dev_clear_flag(hdev, HCI_LIMITED_PRIVACY);
4449 } 4465 }
4450 4466
4451 err = send_settings_rsp(sk, MGMT_OP_SET_PRIVACY, hdev); 4467 err = send_settings_rsp(sk, MGMT_OP_SET_PRIVACY, hdev);
@@ -5979,6 +5995,10 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
5979 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, 5995 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
5980 MGMT_STATUS_INVALID_PARAMS); 5996 MGMT_STATUS_INVALID_PARAMS);
5981 5997
5998 if (data_len != sizeof(*cp) + cp->adv_data_len + cp->scan_rsp_len)
5999 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
6000 MGMT_STATUS_INVALID_PARAMS);
6001
5982 flags = __le32_to_cpu(cp->flags); 6002 flags = __le32_to_cpu(cp->flags);
5983 timeout = __le16_to_cpu(cp->timeout); 6003 timeout = __le16_to_cpu(cp->timeout);
5984 duration = __le16_to_cpu(cp->duration); 6004 duration = __le16_to_cpu(cp->duration);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 4b175df35184..50976a6481f3 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -21,9 +21,10 @@
21*/ 21*/
22 22
23#include <linux/debugfs.h> 23#include <linux/debugfs.h>
24#include <linux/crypto.h>
25#include <linux/scatterlist.h> 24#include <linux/scatterlist.h>
26#include <crypto/b128ops.h> 25#include <crypto/b128ops.h>
26#include <crypto/hash.h>
27#include <crypto/skcipher.h>
27 28
28#include <net/bluetooth/bluetooth.h> 29#include <net/bluetooth/bluetooth.h>
29#include <net/bluetooth/hci_core.h> 30#include <net/bluetooth/hci_core.h>
@@ -87,8 +88,8 @@ struct smp_dev {
87 u8 min_key_size; 88 u8 min_key_size;
88 u8 max_key_size; 89 u8 max_key_size;
89 90
90 struct crypto_blkcipher *tfm_aes; 91 struct crypto_skcipher *tfm_aes;
91 struct crypto_hash *tfm_cmac; 92 struct crypto_shash *tfm_cmac;
92}; 93};
93 94
94struct smp_chan { 95struct smp_chan {
@@ -126,8 +127,8 @@ struct smp_chan {
126 u8 dhkey[32]; 127 u8 dhkey[32];
127 u8 mackey[16]; 128 u8 mackey[16];
128 129
129 struct crypto_blkcipher *tfm_aes; 130 struct crypto_skcipher *tfm_aes;
130 struct crypto_hash *tfm_cmac; 131 struct crypto_shash *tfm_cmac;
131}; 132};
132 133
133/* These debug key values are defined in the SMP section of the core 134/* These debug key values are defined in the SMP section of the core
@@ -165,12 +166,11 @@ static inline void swap_buf(const u8 *src, u8 *dst, size_t len)
165 * AES-CMAC, f4, f5, f6, g2 and h6. 166 * AES-CMAC, f4, f5, f6, g2 and h6.
166 */ 167 */
167 168
168static int aes_cmac(struct crypto_hash *tfm, const u8 k[16], const u8 *m, 169static int aes_cmac(struct crypto_shash *tfm, const u8 k[16], const u8 *m,
169 size_t len, u8 mac[16]) 170 size_t len, u8 mac[16])
170{ 171{
171 uint8_t tmp[16], mac_msb[16], msg_msb[CMAC_MSG_MAX]; 172 uint8_t tmp[16], mac_msb[16], msg_msb[CMAC_MSG_MAX];
172 struct hash_desc desc; 173 SHASH_DESC_ON_STACK(desc, tfm);
173 struct scatterlist sg;
174 int err; 174 int err;
175 175
176 if (len > CMAC_MSG_MAX) 176 if (len > CMAC_MSG_MAX)
@@ -181,10 +181,8 @@ static int aes_cmac(struct crypto_hash *tfm, const u8 k[16], const u8 *m,
181 return -EINVAL; 181 return -EINVAL;
182 } 182 }
183 183
184 desc.tfm = tfm; 184 desc->tfm = tfm;
185 desc.flags = 0; 185 desc->flags = 0;
186
187 crypto_hash_init(&desc);
188 186
189 /* Swap key and message from LSB to MSB */ 187 /* Swap key and message from LSB to MSB */
190 swap_buf(k, tmp, 16); 188 swap_buf(k, tmp, 16);
@@ -193,23 +191,16 @@ static int aes_cmac(struct crypto_hash *tfm, const u8 k[16], const u8 *m,
193 SMP_DBG("msg (len %zu) %*phN", len, (int) len, m); 191 SMP_DBG("msg (len %zu) %*phN", len, (int) len, m);
194 SMP_DBG("key %16phN", k); 192 SMP_DBG("key %16phN", k);
195 193
196 err = crypto_hash_setkey(tfm, tmp, 16); 194 err = crypto_shash_setkey(tfm, tmp, 16);
197 if (err) { 195 if (err) {
198 BT_ERR("cipher setkey failed: %d", err); 196 BT_ERR("cipher setkey failed: %d", err);
199 return err; 197 return err;
200 } 198 }
201 199
202 sg_init_one(&sg, msg_msb, len); 200 err = crypto_shash_digest(desc, msg_msb, len, mac_msb);
203 201 shash_desc_zero(desc);
204 err = crypto_hash_update(&desc, &sg, len);
205 if (err) { 202 if (err) {
206 BT_ERR("Hash update error %d", err); 203 BT_ERR("Hash computation error %d", err);
207 return err;
208 }
209
210 err = crypto_hash_final(&desc, mac_msb);
211 if (err) {
212 BT_ERR("Hash final error %d", err);
213 return err; 204 return err;
214 } 205 }
215 206
@@ -220,8 +211,8 @@ static int aes_cmac(struct crypto_hash *tfm, const u8 k[16], const u8 *m,
220 return 0; 211 return 0;
221} 212}
222 213
223static int smp_f4(struct crypto_hash *tfm_cmac, const u8 u[32], const u8 v[32], 214static int smp_f4(struct crypto_shash *tfm_cmac, const u8 u[32],
224 const u8 x[16], u8 z, u8 res[16]) 215 const u8 v[32], const u8 x[16], u8 z, u8 res[16])
225{ 216{
226 u8 m[65]; 217 u8 m[65];
227 int err; 218 int err;
@@ -243,7 +234,7 @@ static int smp_f4(struct crypto_hash *tfm_cmac, const u8 u[32], const u8 v[32],
243 return err; 234 return err;
244} 235}
245 236
246static int smp_f5(struct crypto_hash *tfm_cmac, const u8 w[32], 237static int smp_f5(struct crypto_shash *tfm_cmac, const u8 w[32],
247 const u8 n1[16], const u8 n2[16], const u8 a1[7], 238 const u8 n1[16], const u8 n2[16], const u8 a1[7],
248 const u8 a2[7], u8 mackey[16], u8 ltk[16]) 239 const u8 a2[7], u8 mackey[16], u8 ltk[16])
249{ 240{
@@ -296,7 +287,7 @@ static int smp_f5(struct crypto_hash *tfm_cmac, const u8 w[32],
296 return 0; 287 return 0;
297} 288}
298 289
299static int smp_f6(struct crypto_hash *tfm_cmac, const u8 w[16], 290static int smp_f6(struct crypto_shash *tfm_cmac, const u8 w[16],
300 const u8 n1[16], const u8 n2[16], const u8 r[16], 291 const u8 n1[16], const u8 n2[16], const u8 r[16],
301 const u8 io_cap[3], const u8 a1[7], const u8 a2[7], 292 const u8 io_cap[3], const u8 a1[7], const u8 a2[7],
302 u8 res[16]) 293 u8 res[16])
@@ -324,7 +315,7 @@ static int smp_f6(struct crypto_hash *tfm_cmac, const u8 w[16],
324 return err; 315 return err;
325} 316}
326 317
327static int smp_g2(struct crypto_hash *tfm_cmac, const u8 u[32], const u8 v[32], 318static int smp_g2(struct crypto_shash *tfm_cmac, const u8 u[32], const u8 v[32],
328 const u8 x[16], const u8 y[16], u32 *val) 319 const u8 x[16], const u8 y[16], u32 *val)
329{ 320{
330 u8 m[80], tmp[16]; 321 u8 m[80], tmp[16];
@@ -350,7 +341,7 @@ static int smp_g2(struct crypto_hash *tfm_cmac, const u8 u[32], const u8 v[32],
350 return 0; 341 return 0;
351} 342}
352 343
353static int smp_h6(struct crypto_hash *tfm_cmac, const u8 w[16], 344static int smp_h6(struct crypto_shash *tfm_cmac, const u8 w[16],
354 const u8 key_id[4], u8 res[16]) 345 const u8 key_id[4], u8 res[16])
355{ 346{
356 int err; 347 int err;
@@ -370,9 +361,9 @@ static int smp_h6(struct crypto_hash *tfm_cmac, const u8 w[16],
370 * s1 and ah. 361 * s1 and ah.
371 */ 362 */
372 363
373static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r) 364static int smp_e(struct crypto_skcipher *tfm, const u8 *k, u8 *r)
374{ 365{
375 struct blkcipher_desc desc; 366 SKCIPHER_REQUEST_ON_STACK(req, tfm);
376 struct scatterlist sg; 367 struct scatterlist sg;
377 uint8_t tmp[16], data[16]; 368 uint8_t tmp[16], data[16];
378 int err; 369 int err;
@@ -384,13 +375,10 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r)
384 return -EINVAL; 375 return -EINVAL;
385 } 376 }
386 377
387 desc.tfm = tfm;
388 desc.flags = 0;
389
390 /* The most significant octet of key corresponds to k[0] */ 378 /* The most significant octet of key corresponds to k[0] */
391 swap_buf(k, tmp, 16); 379 swap_buf(k, tmp, 16);
392 380
393 err = crypto_blkcipher_setkey(tfm, tmp, 16); 381 err = crypto_skcipher_setkey(tfm, tmp, 16);
394 if (err) { 382 if (err) {
395 BT_ERR("cipher setkey failed: %d", err); 383 BT_ERR("cipher setkey failed: %d", err);
396 return err; 384 return err;
@@ -401,7 +389,12 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r)
401 389
402 sg_init_one(&sg, data, 16); 390 sg_init_one(&sg, data, 16);
403 391
404 err = crypto_blkcipher_encrypt(&desc, &sg, &sg, 16); 392 skcipher_request_set_tfm(req, tfm);
393 skcipher_request_set_callback(req, 0, NULL, NULL);
394 skcipher_request_set_crypt(req, &sg, &sg, 16, NULL);
395
396 err = crypto_skcipher_encrypt(req);
397 skcipher_request_zero(req);
405 if (err) 398 if (err)
406 BT_ERR("Encrypt data error %d", err); 399 BT_ERR("Encrypt data error %d", err);
407 400
@@ -413,7 +406,7 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r)
413 return err; 406 return err;
414} 407}
415 408
416static int smp_c1(struct crypto_blkcipher *tfm_aes, const u8 k[16], 409static int smp_c1(struct crypto_skcipher *tfm_aes, const u8 k[16],
417 const u8 r[16], const u8 preq[7], const u8 pres[7], u8 _iat, 410 const u8 r[16], const u8 preq[7], const u8 pres[7], u8 _iat,
418 const bdaddr_t *ia, u8 _rat, const bdaddr_t *ra, u8 res[16]) 411 const bdaddr_t *ia, u8 _rat, const bdaddr_t *ra, u8 res[16])
419{ 412{
@@ -462,7 +455,7 @@ static int smp_c1(struct crypto_blkcipher *tfm_aes, const u8 k[16],
462 return err; 455 return err;
463} 456}
464 457
465static int smp_s1(struct crypto_blkcipher *tfm_aes, const u8 k[16], 458static int smp_s1(struct crypto_skcipher *tfm_aes, const u8 k[16],
466 const u8 r1[16], const u8 r2[16], u8 _r[16]) 459 const u8 r1[16], const u8 r2[16], u8 _r[16])
467{ 460{
468 int err; 461 int err;
@@ -478,7 +471,7 @@ static int smp_s1(struct crypto_blkcipher *tfm_aes, const u8 k[16],
478 return err; 471 return err;
479} 472}
480 473
481static int smp_ah(struct crypto_blkcipher *tfm, const u8 irk[16], 474static int smp_ah(struct crypto_skcipher *tfm, const u8 irk[16],
482 const u8 r[3], u8 res[3]) 475 const u8 r[3], u8 res[3])
483{ 476{
484 u8 _res[16]; 477 u8 _res[16];
@@ -766,8 +759,8 @@ static void smp_chan_destroy(struct l2cap_conn *conn)
766 kzfree(smp->slave_csrk); 759 kzfree(smp->slave_csrk);
767 kzfree(smp->link_key); 760 kzfree(smp->link_key);
768 761
769 crypto_free_blkcipher(smp->tfm_aes); 762 crypto_free_skcipher(smp->tfm_aes);
770 crypto_free_hash(smp->tfm_cmac); 763 crypto_free_shash(smp->tfm_cmac);
771 764
772 /* Ensure that we don't leave any debug key around if debug key 765 /* Ensure that we don't leave any debug key around if debug key
773 * support hasn't been explicitly enabled. 766 * support hasn't been explicitly enabled.
@@ -1366,17 +1359,17 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
1366 if (!smp) 1359 if (!smp)
1367 return NULL; 1360 return NULL;
1368 1361
1369 smp->tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC); 1362 smp->tfm_aes = crypto_alloc_skcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
1370 if (IS_ERR(smp->tfm_aes)) { 1363 if (IS_ERR(smp->tfm_aes)) {
1371 BT_ERR("Unable to create ECB crypto context"); 1364 BT_ERR("Unable to create ECB crypto context");
1372 kzfree(smp); 1365 kzfree(smp);
1373 return NULL; 1366 return NULL;
1374 } 1367 }
1375 1368
1376 smp->tfm_cmac = crypto_alloc_hash("cmac(aes)", 0, CRYPTO_ALG_ASYNC); 1369 smp->tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
1377 if (IS_ERR(smp->tfm_cmac)) { 1370 if (IS_ERR(smp->tfm_cmac)) {
1378 BT_ERR("Unable to create CMAC crypto context"); 1371 BT_ERR("Unable to create CMAC crypto context");
1379 crypto_free_blkcipher(smp->tfm_aes); 1372 crypto_free_skcipher(smp->tfm_aes);
1380 kzfree(smp); 1373 kzfree(smp);
1381 return NULL; 1374 return NULL;
1382 } 1375 }
@@ -3127,8 +3120,8 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
3127{ 3120{
3128 struct l2cap_chan *chan; 3121 struct l2cap_chan *chan;
3129 struct smp_dev *smp; 3122 struct smp_dev *smp;
3130 struct crypto_blkcipher *tfm_aes; 3123 struct crypto_skcipher *tfm_aes;
3131 struct crypto_hash *tfm_cmac; 3124 struct crypto_shash *tfm_cmac;
3132 3125
3133 if (cid == L2CAP_CID_SMP_BREDR) { 3126 if (cid == L2CAP_CID_SMP_BREDR) {
3134 smp = NULL; 3127 smp = NULL;
@@ -3139,17 +3132,17 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
3139 if (!smp) 3132 if (!smp)
3140 return ERR_PTR(-ENOMEM); 3133 return ERR_PTR(-ENOMEM);
3141 3134
3142 tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC); 3135 tfm_aes = crypto_alloc_skcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
3143 if (IS_ERR(tfm_aes)) { 3136 if (IS_ERR(tfm_aes)) {
3144 BT_ERR("Unable to create ECB crypto context"); 3137 BT_ERR("Unable to create ECB crypto context");
3145 kzfree(smp); 3138 kzfree(smp);
3146 return ERR_CAST(tfm_aes); 3139 return ERR_CAST(tfm_aes);
3147 } 3140 }
3148 3141
3149 tfm_cmac = crypto_alloc_hash("cmac(aes)", 0, CRYPTO_ALG_ASYNC); 3142 tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
3150 if (IS_ERR(tfm_cmac)) { 3143 if (IS_ERR(tfm_cmac)) {
3151 BT_ERR("Unable to create CMAC crypto context"); 3144 BT_ERR("Unable to create CMAC crypto context");
3152 crypto_free_blkcipher(tfm_aes); 3145 crypto_free_skcipher(tfm_aes);
3153 kzfree(smp); 3146 kzfree(smp);
3154 return ERR_CAST(tfm_cmac); 3147 return ERR_CAST(tfm_cmac);
3155 } 3148 }
@@ -3163,8 +3156,8 @@ create_chan:
3163 chan = l2cap_chan_create(); 3156 chan = l2cap_chan_create();
3164 if (!chan) { 3157 if (!chan) {
3165 if (smp) { 3158 if (smp) {
3166 crypto_free_blkcipher(smp->tfm_aes); 3159 crypto_free_skcipher(smp->tfm_aes);
3167 crypto_free_hash(smp->tfm_cmac); 3160 crypto_free_shash(smp->tfm_cmac);
3168 kzfree(smp); 3161 kzfree(smp);
3169 } 3162 }
3170 return ERR_PTR(-ENOMEM); 3163 return ERR_PTR(-ENOMEM);
@@ -3210,10 +3203,8 @@ static void smp_del_chan(struct l2cap_chan *chan)
3210 smp = chan->data; 3203 smp = chan->data;
3211 if (smp) { 3204 if (smp) {
3212 chan->data = NULL; 3205 chan->data = NULL;
3213 if (smp->tfm_aes) 3206 crypto_free_skcipher(smp->tfm_aes);
3214 crypto_free_blkcipher(smp->tfm_aes); 3207 crypto_free_shash(smp->tfm_cmac);
3215 if (smp->tfm_cmac)
3216 crypto_free_hash(smp->tfm_cmac);
3217 kzfree(smp); 3208 kzfree(smp);
3218 } 3209 }
3219 3210
@@ -3449,7 +3440,7 @@ void smp_unregister(struct hci_dev *hdev)
3449 3440
3450#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP) 3441#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP)
3451 3442
3452static int __init test_ah(struct crypto_blkcipher *tfm_aes) 3443static int __init test_ah(struct crypto_skcipher *tfm_aes)
3453{ 3444{
3454 const u8 irk[16] = { 3445 const u8 irk[16] = {
3455 0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34, 3446 0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34,
@@ -3469,7 +3460,7 @@ static int __init test_ah(struct crypto_blkcipher *tfm_aes)
3469 return 0; 3460 return 0;
3470} 3461}
3471 3462
3472static int __init test_c1(struct crypto_blkcipher *tfm_aes) 3463static int __init test_c1(struct crypto_skcipher *tfm_aes)
3473{ 3464{
3474 const u8 k[16] = { 3465 const u8 k[16] = {
3475 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3466 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -3499,7 +3490,7 @@ static int __init test_c1(struct crypto_blkcipher *tfm_aes)
3499 return 0; 3490 return 0;
3500} 3491}
3501 3492
3502static int __init test_s1(struct crypto_blkcipher *tfm_aes) 3493static int __init test_s1(struct crypto_skcipher *tfm_aes)
3503{ 3494{
3504 const u8 k[16] = { 3495 const u8 k[16] = {
3505 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3496 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -3524,7 +3515,7 @@ static int __init test_s1(struct crypto_blkcipher *tfm_aes)
3524 return 0; 3515 return 0;
3525} 3516}
3526 3517
3527static int __init test_f4(struct crypto_hash *tfm_cmac) 3518static int __init test_f4(struct crypto_shash *tfm_cmac)
3528{ 3519{
3529 const u8 u[32] = { 3520 const u8 u[32] = {
3530 0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc, 3521 0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc,
@@ -3556,7 +3547,7 @@ static int __init test_f4(struct crypto_hash *tfm_cmac)
3556 return 0; 3547 return 0;
3557} 3548}
3558 3549
3559static int __init test_f5(struct crypto_hash *tfm_cmac) 3550static int __init test_f5(struct crypto_shash *tfm_cmac)
3560{ 3551{
3561 const u8 w[32] = { 3552 const u8 w[32] = {
3562 0x98, 0xa6, 0xbf, 0x73, 0xf3, 0x34, 0x8d, 0x86, 3553 0x98, 0xa6, 0xbf, 0x73, 0xf3, 0x34, 0x8d, 0x86,
@@ -3593,7 +3584,7 @@ static int __init test_f5(struct crypto_hash *tfm_cmac)
3593 return 0; 3584 return 0;
3594} 3585}
3595 3586
3596static int __init test_f6(struct crypto_hash *tfm_cmac) 3587static int __init test_f6(struct crypto_shash *tfm_cmac)
3597{ 3588{
3598 const u8 w[16] = { 3589 const u8 w[16] = {
3599 0x20, 0x6e, 0x63, 0xce, 0x20, 0x6a, 0x3f, 0xfd, 3590 0x20, 0x6e, 0x63, 0xce, 0x20, 0x6a, 0x3f, 0xfd,
@@ -3626,7 +3617,7 @@ static int __init test_f6(struct crypto_hash *tfm_cmac)
3626 return 0; 3617 return 0;
3627} 3618}
3628 3619
3629static int __init test_g2(struct crypto_hash *tfm_cmac) 3620static int __init test_g2(struct crypto_shash *tfm_cmac)
3630{ 3621{
3631 const u8 u[32] = { 3622 const u8 u[32] = {
3632 0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc, 3623 0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc,
@@ -3658,7 +3649,7 @@ static int __init test_g2(struct crypto_hash *tfm_cmac)
3658 return 0; 3649 return 0;
3659} 3650}
3660 3651
3661static int __init test_h6(struct crypto_hash *tfm_cmac) 3652static int __init test_h6(struct crypto_shash *tfm_cmac)
3662{ 3653{
3663 const u8 w[16] = { 3654 const u8 w[16] = {
3664 0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34, 3655 0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34,
@@ -3695,8 +3686,8 @@ static const struct file_operations test_smp_fops = {
3695 .llseek = default_llseek, 3686 .llseek = default_llseek,
3696}; 3687};
3697 3688
3698static int __init run_selftests(struct crypto_blkcipher *tfm_aes, 3689static int __init run_selftests(struct crypto_skcipher *tfm_aes,
3699 struct crypto_hash *tfm_cmac) 3690 struct crypto_shash *tfm_cmac)
3700{ 3691{
3701 ktime_t calltime, delta, rettime; 3692 ktime_t calltime, delta, rettime;
3702 unsigned long long duration; 3693 unsigned long long duration;
@@ -3773,27 +3764,27 @@ done:
3773 3764
3774int __init bt_selftest_smp(void) 3765int __init bt_selftest_smp(void)
3775{ 3766{
3776 struct crypto_blkcipher *tfm_aes; 3767 struct crypto_skcipher *tfm_aes;
3777 struct crypto_hash *tfm_cmac; 3768 struct crypto_shash *tfm_cmac;
3778 int err; 3769 int err;
3779 3770
3780 tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC); 3771 tfm_aes = crypto_alloc_skcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
3781 if (IS_ERR(tfm_aes)) { 3772 if (IS_ERR(tfm_aes)) {
3782 BT_ERR("Unable to create ECB crypto context"); 3773 BT_ERR("Unable to create ECB crypto context");
3783 return PTR_ERR(tfm_aes); 3774 return PTR_ERR(tfm_aes);
3784 } 3775 }
3785 3776
3786 tfm_cmac = crypto_alloc_hash("cmac(aes)", 0, CRYPTO_ALG_ASYNC); 3777 tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, CRYPTO_ALG_ASYNC);
3787 if (IS_ERR(tfm_cmac)) { 3778 if (IS_ERR(tfm_cmac)) {
3788 BT_ERR("Unable to create CMAC crypto context"); 3779 BT_ERR("Unable to create CMAC crypto context");
3789 crypto_free_blkcipher(tfm_aes); 3780 crypto_free_skcipher(tfm_aes);
3790 return PTR_ERR(tfm_cmac); 3781 return PTR_ERR(tfm_cmac);
3791 } 3782 }
3792 3783
3793 err = run_selftests(tfm_aes, tfm_cmac); 3784 err = run_selftests(tfm_aes, tfm_cmac);
3794 3785
3795 crypto_free_hash(tfm_cmac); 3786 crypto_free_shash(tfm_cmac);
3796 crypto_free_blkcipher(tfm_aes); 3787 crypto_free_skcipher(tfm_aes);
3797 3788
3798 return err; 3789 return err;
3799} 3790}
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index fcdb86dd5a23..f47759f05b6d 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -44,7 +44,6 @@ int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb
44 44
45 skb_push(skb, ETH_HLEN); 45 skb_push(skb, ETH_HLEN);
46 br_drop_fake_rtable(skb); 46 br_drop_fake_rtable(skb);
47 skb_sender_cpu_clear(skb);
48 47
49 if (skb->ip_summed == CHECKSUM_PARTIAL && 48 if (skb->ip_summed == CHECKSUM_PARTIAL &&
50 (skb->protocol == htons(ETH_P_8021Q) || 49 (skb->protocol == htons(ETH_P_8021Q) ||
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index c367b3e1b5ac..8217aecf025b 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -36,10 +36,10 @@
36 */ 36 */
37static int port_cost(struct net_device *dev) 37static int port_cost(struct net_device *dev)
38{ 38{
39 struct ethtool_cmd ecmd; 39 struct ethtool_link_ksettings ecmd;
40 40
41 if (!__ethtool_get_settings(dev, &ecmd)) { 41 if (!__ethtool_get_link_ksettings(dev, &ecmd)) {
42 switch (ethtool_cmd_speed(&ecmd)) { 42 switch (ecmd.base.speed) {
43 case SPEED_10000: 43 case SPEED_10000:
44 return 2; 44 return 2;
45 case SPEED_1000: 45 case SPEED_1000:
@@ -223,6 +223,31 @@ static void destroy_nbp_rcu(struct rcu_head *head)
223 destroy_nbp(p); 223 destroy_nbp(p);
224} 224}
225 225
226static unsigned get_max_headroom(struct net_bridge *br)
227{
228 unsigned max_headroom = 0;
229 struct net_bridge_port *p;
230
231 list_for_each_entry(p, &br->port_list, list) {
232 unsigned dev_headroom = netdev_get_fwd_headroom(p->dev);
233
234 if (dev_headroom > max_headroom)
235 max_headroom = dev_headroom;
236 }
237
238 return max_headroom;
239}
240
241static void update_headroom(struct net_bridge *br, int new_hr)
242{
243 struct net_bridge_port *p;
244
245 list_for_each_entry(p, &br->port_list, list)
246 netdev_set_rx_headroom(p->dev, new_hr);
247
248 br->dev->needed_headroom = new_hr;
249}
250
226/* Delete port(interface) from bridge is done in two steps. 251/* Delete port(interface) from bridge is done in two steps.
227 * via RCU. First step, marks device as down. That deletes 252 * via RCU. First step, marks device as down. That deletes
228 * all the timers and stops new packets from flowing through. 253 * all the timers and stops new packets from flowing through.
@@ -248,6 +273,9 @@ static void del_nbp(struct net_bridge_port *p)
248 br_ifinfo_notify(RTM_DELLINK, p); 273 br_ifinfo_notify(RTM_DELLINK, p);
249 274
250 list_del_rcu(&p->list); 275 list_del_rcu(&p->list);
276 if (netdev_get_fwd_headroom(dev) == br->dev->needed_headroom)
277 update_headroom(br, get_max_headroom(br));
278 netdev_reset_rx_headroom(dev);
251 279
252 nbp_vlan_flush(p); 280 nbp_vlan_flush(p);
253 br_fdb_delete_by_port(br, p, 0, 1); 281 br_fdb_delete_by_port(br, p, 0, 1);
@@ -409,6 +437,20 @@ int br_min_mtu(const struct net_bridge *br)
409 return mtu; 437 return mtu;
410} 438}
411 439
440static void br_set_gso_limits(struct net_bridge *br)
441{
442 unsigned int gso_max_size = GSO_MAX_SIZE;
443 u16 gso_max_segs = GSO_MAX_SEGS;
444 const struct net_bridge_port *p;
445
446 list_for_each_entry(p, &br->port_list, list) {
447 gso_max_size = min(gso_max_size, p->dev->gso_max_size);
448 gso_max_segs = min(gso_max_segs, p->dev->gso_max_segs);
449 }
450 br->dev->gso_max_size = gso_max_size;
451 br->dev->gso_max_segs = gso_max_segs;
452}
453
412/* 454/*
413 * Recomputes features using slave's features 455 * Recomputes features using slave's features
414 */ 456 */
@@ -438,6 +480,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
438{ 480{
439 struct net_bridge_port *p; 481 struct net_bridge_port *p;
440 int err = 0; 482 int err = 0;
483 unsigned br_hr, dev_hr;
441 bool changed_addr; 484 bool changed_addr;
442 485
443 /* Don't allow bridging non-ethernet like devices, or DSA-enabled 486 /* Don't allow bridging non-ethernet like devices, or DSA-enabled
@@ -505,8 +548,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
505 548
506 netdev_update_features(br->dev); 549 netdev_update_features(br->dev);
507 550
508 if (br->dev->needed_headroom < dev->needed_headroom) 551 br_hr = br->dev->needed_headroom;
509 br->dev->needed_headroom = dev->needed_headroom; 552 dev_hr = netdev_get_fwd_headroom(dev);
553 if (br_hr < dev_hr)
554 update_headroom(br, dev_hr);
555 else
556 netdev_set_rx_headroom(dev, br_hr);
510 557
511 if (br_fdb_insert(br, p, dev->dev_addr, 0)) 558 if (br_fdb_insert(br, p, dev->dev_addr, 0))
512 netdev_err(dev, "failed insert local address bridge forwarding table\n"); 559 netdev_err(dev, "failed insert local address bridge forwarding table\n");
@@ -531,6 +578,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
531 call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); 578 call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
532 579
533 dev_set_mtu(br->dev, br_min_mtu(br)); 580 dev_set_mtu(br->dev, br_min_mtu(br));
581 br_set_gso_limits(br);
534 582
535 kobject_uevent(&p->kobj, KOBJ_ADD); 583 kobject_uevent(&p->kobj, KOBJ_ADD);
536 584
@@ -577,6 +625,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
577 del_nbp(p); 625 del_nbp(p);
578 626
579 dev_set_mtu(br->dev, br_min_mtu(br)); 627 dev_set_mtu(br->dev, br_min_mtu(br));
628 br_set_gso_limits(br);
580 629
581 spin_lock_bh(&br->lock); 630 spin_lock_bh(&br->lock);
582 changed_addr = br_stp_recalculate_bridge_id(br); 631 changed_addr = br_stp_recalculate_bridge_id(br);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index f7fba74108a9..160797722228 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -222,7 +222,10 @@ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_bu
222 /* check if vlan is allowed, to avoid spoofing */ 222 /* check if vlan is allowed, to avoid spoofing */
223 if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid)) 223 if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid))
224 br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false); 224 br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
225 return 0; /* process further */ 225
226 BR_INPUT_SKB_CB(skb)->brdev = p->br->dev;
227 br_pass_frame_up(skb);
228 return 0;
226} 229}
227 230
228/* 231/*
@@ -284,14 +287,9 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
284 } 287 }
285 288
286 /* Deliver packet to local host only */ 289 /* Deliver packet to local host only */
287 if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, 290 NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(skb->dev),
288 dev_net(skb->dev), NULL, skb, skb->dev, NULL, 291 NULL, skb, skb->dev, NULL, br_handle_local_finish);
289 br_handle_local_finish)) { 292 return RX_HANDLER_CONSUMED;
290 return RX_HANDLER_CONSUMED; /* consumed by filter */
291 } else {
292 *pskb = skb;
293 return RX_HANDLER_PASS; /* continue processing */
294 }
295 } 293 }
296 294
297forward: 295forward:
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 263b4de4de57..60a3dbfca8a1 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -21,18 +21,19 @@
21#include <asm/uaccess.h> 21#include <asm/uaccess.h>
22#include "br_private.h" 22#include "br_private.h"
23 23
24/* called with RTNL */
25static int get_bridge_ifindices(struct net *net, int *indices, int num) 24static int get_bridge_ifindices(struct net *net, int *indices, int num)
26{ 25{
27 struct net_device *dev; 26 struct net_device *dev;
28 int i = 0; 27 int i = 0;
29 28
30 for_each_netdev(net, dev) { 29 rcu_read_lock();
30 for_each_netdev_rcu(net, dev) {
31 if (i >= num) 31 if (i >= num)
32 break; 32 break;
33 if (dev->priv_flags & IFF_EBRIDGE) 33 if (dev->priv_flags & IFF_EBRIDGE)
34 indices[i++] = dev->ifindex; 34 indices[i++] = dev->ifindex;
35 } 35 }
36 rcu_read_unlock();
36 37
37 return i; 38 return i;
38} 39}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 74c278e00225..7dbc80d01eb0 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -20,7 +20,7 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
20{ 20{
21 struct net_bridge *br = netdev_priv(dev); 21 struct net_bridge *br = netdev_priv(dev);
22 struct net_bridge_port *p; 22 struct net_bridge_port *p;
23 struct nlattr *nest; 23 struct nlattr *nest, *port_nest;
24 24
25 if (!br->multicast_router || hlist_empty(&br->router_list)) 25 if (!br->multicast_router || hlist_empty(&br->router_list))
26 return 0; 26 return 0;
@@ -30,8 +30,20 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
30 return -EMSGSIZE; 30 return -EMSGSIZE;
31 31
32 hlist_for_each_entry_rcu(p, &br->router_list, rlist) { 32 hlist_for_each_entry_rcu(p, &br->router_list, rlist) {
33 if (p && nla_put_u32(skb, MDBA_ROUTER_PORT, p->dev->ifindex)) 33 if (!p)
34 continue;
35 port_nest = nla_nest_start(skb, MDBA_ROUTER_PORT);
36 if (!port_nest)
37 goto fail;
38 if (nla_put_nohdr(skb, sizeof(u32), &p->dev->ifindex) ||
39 nla_put_u32(skb, MDBA_ROUTER_PATTR_TIMER,
40 br_timer_value(&p->multicast_router_timer)) ||
41 nla_put_u8(skb, MDBA_ROUTER_PATTR_TYPE,
42 p->multicast_router)) {
43 nla_nest_cancel(skb, port_nest);
34 goto fail; 44 goto fail;
45 }
46 nla_nest_end(skb, port_nest);
35 } 47 }
36 48
37 nla_nest_end(skb, nest); 49 nla_nest_end(skb, nest);
@@ -41,6 +53,27 @@ fail:
41 return -EMSGSIZE; 53 return -EMSGSIZE;
42} 54}
43 55
56static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags)
57{
58 e->state = flags & MDB_PG_FLAGS_PERMANENT;
59 e->flags = 0;
60 if (flags & MDB_PG_FLAGS_OFFLOAD)
61 e->flags |= MDB_FLAGS_OFFLOAD;
62}
63
64static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip)
65{
66 memset(ip, 0, sizeof(struct br_ip));
67 ip->vid = entry->vid;
68 ip->proto = entry->addr.proto;
69 if (ip->proto == htons(ETH_P_IP))
70 ip->u.ip4 = entry->addr.u.ip4;
71#if IS_ENABLED(CONFIG_IPV6)
72 else
73 ip->u.ip6 = entry->addr.u.ip6;
74#endif
75}
76
44static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, 77static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
45 struct net_device *dev) 78 struct net_device *dev)
46{ 79{
@@ -80,26 +113,41 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
80 for (pp = &mp->ports; 113 for (pp = &mp->ports;
81 (p = rcu_dereference(*pp)) != NULL; 114 (p = rcu_dereference(*pp)) != NULL;
82 pp = &p->next) { 115 pp = &p->next) {
116 struct nlattr *nest_ent;
117 struct br_mdb_entry e;
118
83 port = p->port; 119 port = p->port;
84 if (port) { 120 if (!port)
85 struct br_mdb_entry e; 121 continue;
86 memset(&e, 0, sizeof(e)); 122
87 e.ifindex = port->dev->ifindex; 123 memset(&e, 0, sizeof(e));
88 e.state = p->state; 124 e.ifindex = port->dev->ifindex;
89 e.vid = p->addr.vid; 125 e.vid = p->addr.vid;
90 if (p->addr.proto == htons(ETH_P_IP)) 126 __mdb_entry_fill_flags(&e, p->flags);
91 e.addr.u.ip4 = p->addr.u.ip4; 127 if (p->addr.proto == htons(ETH_P_IP))
128 e.addr.u.ip4 = p->addr.u.ip4;
92#if IS_ENABLED(CONFIG_IPV6) 129#if IS_ENABLED(CONFIG_IPV6)
93 if (p->addr.proto == htons(ETH_P_IPV6)) 130 if (p->addr.proto == htons(ETH_P_IPV6))
94 e.addr.u.ip6 = p->addr.u.ip6; 131 e.addr.u.ip6 = p->addr.u.ip6;
95#endif 132#endif
96 e.addr.proto = p->addr.proto; 133 e.addr.proto = p->addr.proto;
97 if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(e), &e)) { 134 nest_ent = nla_nest_start(skb,
98 nla_nest_cancel(skb, nest2); 135 MDBA_MDB_ENTRY_INFO);
99 err = -EMSGSIZE; 136 if (!nest_ent) {
100 goto out; 137 nla_nest_cancel(skb, nest2);
101 } 138 err = -EMSGSIZE;
139 goto out;
102 } 140 }
141 if (nla_put_nohdr(skb, sizeof(e), &e) ||
142 nla_put_u32(skb,
143 MDBA_MDB_EATTR_TIMER,
144 br_timer_value(&p->timer))) {
145 nla_nest_cancel(skb, nest_ent);
146 nla_nest_cancel(skb, nest2);
147 err = -EMSGSIZE;
148 goto out;
149 }
150 nla_nest_end(skb, nest_ent);
103 } 151 }
104 nla_nest_end(skb, nest2); 152 nla_nest_end(skb, nest2);
105 skip: 153 skip:
@@ -208,9 +256,45 @@ static inline size_t rtnl_mdb_nlmsg_size(void)
208 + nla_total_size(sizeof(struct br_mdb_entry)); 256 + nla_total_size(sizeof(struct br_mdb_entry));
209} 257}
210 258
211static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry, 259struct br_mdb_complete_info {
212 int type) 260 struct net_bridge_port *port;
261 struct br_ip ip;
262};
263
264static void br_mdb_complete(struct net_device *dev, int err, void *priv)
265{
266 struct br_mdb_complete_info *data = priv;
267 struct net_bridge_port_group __rcu **pp;
268 struct net_bridge_port_group *p;
269 struct net_bridge_mdb_htable *mdb;
270 struct net_bridge_mdb_entry *mp;
271 struct net_bridge_port *port = data->port;
272 struct net_bridge *br = port->br;
273
274 if (err)
275 goto err;
276
277 spin_lock_bh(&br->multicast_lock);
278 mdb = mlock_dereference(br->mdb, br);
279 mp = br_mdb_ip_get(mdb, &data->ip);
280 if (!mp)
281 goto out;
282 for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
283 pp = &p->next) {
284 if (p->port != port)
285 continue;
286 p->flags |= MDB_PG_FLAGS_OFFLOAD;
287 }
288out:
289 spin_unlock_bh(&br->multicast_lock);
290err:
291 kfree(priv);
292}
293
294static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
295 struct br_mdb_entry *entry, int type)
213{ 296{
297 struct br_mdb_complete_info *complete_info;
214 struct switchdev_obj_port_mdb mdb = { 298 struct switchdev_obj_port_mdb mdb = {
215 .obj = { 299 .obj = {
216 .id = SWITCHDEV_OBJ_ID_PORT_MDB, 300 .id = SWITCHDEV_OBJ_ID_PORT_MDB,
@@ -232,10 +316,18 @@ static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry,
232#endif 316#endif
233 317
234 mdb.obj.orig_dev = port_dev; 318 mdb.obj.orig_dev = port_dev;
235 if (port_dev && type == RTM_NEWMDB) 319 if (port_dev && type == RTM_NEWMDB) {
236 switchdev_port_obj_add(port_dev, &mdb.obj); 320 complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC);
237 else if (port_dev && type == RTM_DELMDB) 321 if (complete_info) {
322 complete_info->port = p;
323 __mdb_entry_to_br_ip(entry, &complete_info->ip);
324 mdb.obj.complete_priv = complete_info;
325 mdb.obj.complete = br_mdb_complete;
326 switchdev_port_obj_add(port_dev, &mdb.obj);
327 }
328 } else if (port_dev && type == RTM_DELMDB) {
238 switchdev_port_obj_del(port_dev, &mdb.obj); 329 switchdev_port_obj_del(port_dev, &mdb.obj);
330 }
239 331
240 skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC); 332 skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC);
241 if (!skb) 333 if (!skb)
@@ -254,7 +346,7 @@ errout:
254} 346}
255 347
256void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, 348void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
257 struct br_ip *group, int type, u8 state) 349 struct br_ip *group, int type, u8 flags)
258{ 350{
259 struct br_mdb_entry entry; 351 struct br_mdb_entry entry;
260 352
@@ -265,9 +357,9 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
265#if IS_ENABLED(CONFIG_IPV6) 357#if IS_ENABLED(CONFIG_IPV6)
266 entry.addr.u.ip6 = group->u.ip6; 358 entry.addr.u.ip6 = group->u.ip6;
267#endif 359#endif
268 entry.state = state;
269 entry.vid = group->vid; 360 entry.vid = group->vid;
270 __br_mdb_notify(dev, &entry, type); 361 __mdb_entry_fill_flags(&entry, flags);
362 __br_mdb_notify(dev, port, &entry, type);
271} 363}
272 364
273static int nlmsg_populate_rtr_fill(struct sk_buff *skb, 365static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
@@ -468,15 +560,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
468 if (!p || p->br != br || p->state == BR_STATE_DISABLED) 560 if (!p || p->br != br || p->state == BR_STATE_DISABLED)
469 return -EINVAL; 561 return -EINVAL;
470 562
471 memset(&ip, 0, sizeof(ip)); 563 __mdb_entry_to_br_ip(entry, &ip);
472 ip.vid = entry->vid;
473 ip.proto = entry->addr.proto;
474 if (ip.proto == htons(ETH_P_IP))
475 ip.u.ip4 = entry->addr.u.ip4;
476#if IS_ENABLED(CONFIG_IPV6)
477 else
478 ip.u.ip6 = entry->addr.u.ip6;
479#endif
480 564
481 spin_lock_bh(&br->multicast_lock); 565 spin_lock_bh(&br->multicast_lock);
482 ret = br_mdb_add_group(br, p, &ip, entry->state); 566 ret = br_mdb_add_group(br, p, &ip, entry->state);
@@ -519,12 +603,12 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
519 err = __br_mdb_add(net, br, entry); 603 err = __br_mdb_add(net, br, entry);
520 if (err) 604 if (err)
521 break; 605 break;
522 __br_mdb_notify(dev, entry, RTM_NEWMDB); 606 __br_mdb_notify(dev, p, entry, RTM_NEWMDB);
523 } 607 }
524 } else { 608 } else {
525 err = __br_mdb_add(net, br, entry); 609 err = __br_mdb_add(net, br, entry);
526 if (!err) 610 if (!err)
527 __br_mdb_notify(dev, entry, RTM_NEWMDB); 611 __br_mdb_notify(dev, p, entry, RTM_NEWMDB);
528 } 612 }
529 613
530 return err; 614 return err;
@@ -542,15 +626,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
542 if (!netif_running(br->dev) || br->multicast_disabled) 626 if (!netif_running(br->dev) || br->multicast_disabled)
543 return -EINVAL; 627 return -EINVAL;
544 628
545 memset(&ip, 0, sizeof(ip)); 629 __mdb_entry_to_br_ip(entry, &ip);
546 ip.vid = entry->vid;
547 ip.proto = entry->addr.proto;
548 if (ip.proto == htons(ETH_P_IP))
549 ip.u.ip4 = entry->addr.u.ip4;
550#if IS_ENABLED(CONFIG_IPV6)
551 else
552 ip.u.ip6 = entry->addr.u.ip6;
553#endif
554 630
555 spin_lock_bh(&br->multicast_lock); 631 spin_lock_bh(&br->multicast_lock);
556 mdb = mlock_dereference(br->mdb, br); 632 mdb = mlock_dereference(br->mdb, br);
@@ -568,7 +644,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
568 if (p->port->state == BR_STATE_DISABLED) 644 if (p->port->state == BR_STATE_DISABLED)
569 goto unlock; 645 goto unlock;
570 646
571 entry->state = p->state; 647 __mdb_entry_fill_flags(entry, p->flags);
572 rcu_assign_pointer(*pp, p->next); 648 rcu_assign_pointer(*pp, p->next);
573 hlist_del_init(&p->mglist); 649 hlist_del_init(&p->mglist);
574 del_timer(&p->timer); 650 del_timer(&p->timer);
@@ -620,12 +696,12 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
620 entry->vid = v->vid; 696 entry->vid = v->vid;
621 err = __br_mdb_del(br, entry); 697 err = __br_mdb_del(br, entry);
622 if (!err) 698 if (!err)
623 __br_mdb_notify(dev, entry, RTM_DELMDB); 699 __br_mdb_notify(dev, p, entry, RTM_DELMDB);
624 } 700 }
625 } else { 701 } else {
626 err = __br_mdb_del(br, entry); 702 err = __br_mdb_del(br, entry);
627 if (!err) 703 if (!err)
628 __br_mdb_notify(dev, entry, RTM_DELMDB); 704 __br_mdb_notify(dev, p, entry, RTM_DELMDB);
629 } 705 }
630 706
631 return err; 707 return err;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 03661d97463c..6852f3c7009c 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -284,7 +284,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
284 hlist_del_init(&p->mglist); 284 hlist_del_init(&p->mglist);
285 del_timer(&p->timer); 285 del_timer(&p->timer);
286 br_mdb_notify(br->dev, p->port, &pg->addr, RTM_DELMDB, 286 br_mdb_notify(br->dev, p->port, &pg->addr, RTM_DELMDB,
287 p->state); 287 p->flags);
288 call_rcu_bh(&p->rcu, br_multicast_free_pg); 288 call_rcu_bh(&p->rcu, br_multicast_free_pg);
289 289
290 if (!mp->ports && !mp->mglist && 290 if (!mp->ports && !mp->mglist &&
@@ -304,7 +304,7 @@ static void br_multicast_port_group_expired(unsigned long data)
304 304
305 spin_lock(&br->multicast_lock); 305 spin_lock(&br->multicast_lock);
306 if (!netif_running(br->dev) || timer_pending(&pg->timer) || 306 if (!netif_running(br->dev) || timer_pending(&pg->timer) ||
307 hlist_unhashed(&pg->mglist) || pg->state & MDB_PERMANENT) 307 hlist_unhashed(&pg->mglist) || pg->flags & MDB_PG_FLAGS_PERMANENT)
308 goto out; 308 goto out;
309 309
310 br_multicast_del_pg(br, pg); 310 br_multicast_del_pg(br, pg);
@@ -649,7 +649,7 @@ struct net_bridge_port_group *br_multicast_new_port_group(
649 struct net_bridge_port *port, 649 struct net_bridge_port *port,
650 struct br_ip *group, 650 struct br_ip *group,
651 struct net_bridge_port_group __rcu *next, 651 struct net_bridge_port_group __rcu *next,
652 unsigned char state) 652 unsigned char flags)
653{ 653{
654 struct net_bridge_port_group *p; 654 struct net_bridge_port_group *p;
655 655
@@ -659,7 +659,7 @@ struct net_bridge_port_group *br_multicast_new_port_group(
659 659
660 p->addr = *group; 660 p->addr = *group;
661 p->port = port; 661 p->port = port;
662 p->state = state; 662 p->flags = flags;
663 rcu_assign_pointer(p->next, next); 663 rcu_assign_pointer(p->next, next);
664 hlist_add_head(&p->mglist, &port->mglist); 664 hlist_add_head(&p->mglist, &port->mglist);
665 setup_timer(&p->timer, br_multicast_port_group_expired, 665 setup_timer(&p->timer, br_multicast_port_group_expired,
@@ -702,11 +702,11 @@ static int br_multicast_add_group(struct net_bridge *br,
702 break; 702 break;
703 } 703 }
704 704
705 p = br_multicast_new_port_group(port, group, *pp, MDB_TEMPORARY); 705 p = br_multicast_new_port_group(port, group, *pp, 0);
706 if (unlikely(!p)) 706 if (unlikely(!p))
707 goto err; 707 goto err;
708 rcu_assign_pointer(*pp, p); 708 rcu_assign_pointer(*pp, p);
709 br_mdb_notify(br->dev, port, group, RTM_NEWMDB, MDB_TEMPORARY); 709 br_mdb_notify(br->dev, port, group, RTM_NEWMDB, 0);
710 710
711found: 711found:
712 mod_timer(&p->timer, now + br->multicast_membership_interval); 712 mod_timer(&p->timer, now + br->multicast_membership_interval);
@@ -760,13 +760,17 @@ static void br_multicast_router_expired(unsigned long data)
760 struct net_bridge *br = port->br; 760 struct net_bridge *br = port->br;
761 761
762 spin_lock(&br->multicast_lock); 762 spin_lock(&br->multicast_lock);
763 if (port->multicast_router != 1 || 763 if (port->multicast_router == MDB_RTR_TYPE_DISABLED ||
764 port->multicast_router == MDB_RTR_TYPE_PERM ||
764 timer_pending(&port->multicast_router_timer) || 765 timer_pending(&port->multicast_router_timer) ||
765 hlist_unhashed(&port->rlist)) 766 hlist_unhashed(&port->rlist))
766 goto out; 767 goto out;
767 768
768 hlist_del_init_rcu(&port->rlist); 769 hlist_del_init_rcu(&port->rlist);
769 br_rtr_notify(br->dev, port, RTM_DELMDB); 770 br_rtr_notify(br->dev, port, RTM_DELMDB);
771 /* Don't allow timer refresh if the router expired */
772 if (port->multicast_router == MDB_RTR_TYPE_TEMP)
773 port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
770 774
771out: 775out:
772 spin_unlock(&br->multicast_lock); 776 spin_unlock(&br->multicast_lock);
@@ -913,7 +917,7 @@ static void br_ip6_multicast_port_query_expired(unsigned long data)
913 917
914void br_multicast_add_port(struct net_bridge_port *port) 918void br_multicast_add_port(struct net_bridge_port *port)
915{ 919{
916 port->multicast_router = 1; 920 port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
917 921
918 setup_timer(&port->multicast_router_timer, br_multicast_router_expired, 922 setup_timer(&port->multicast_router_timer, br_multicast_router_expired,
919 (unsigned long)port); 923 (unsigned long)port);
@@ -960,7 +964,8 @@ void br_multicast_enable_port(struct net_bridge_port *port)
960#if IS_ENABLED(CONFIG_IPV6) 964#if IS_ENABLED(CONFIG_IPV6)
961 br_multicast_enable(&port->ip6_own_query); 965 br_multicast_enable(&port->ip6_own_query);
962#endif 966#endif
963 if (port->multicast_router == 2 && hlist_unhashed(&port->rlist)) 967 if (port->multicast_router == MDB_RTR_TYPE_PERM &&
968 hlist_unhashed(&port->rlist))
964 br_multicast_add_router(br, port); 969 br_multicast_add_router(br, port);
965 970
966out: 971out:
@@ -975,12 +980,15 @@ void br_multicast_disable_port(struct net_bridge_port *port)
975 980
976 spin_lock(&br->multicast_lock); 981 spin_lock(&br->multicast_lock);
977 hlist_for_each_entry_safe(pg, n, &port->mglist, mglist) 982 hlist_for_each_entry_safe(pg, n, &port->mglist, mglist)
978 if (pg->state == MDB_TEMPORARY) 983 if (!(pg->flags & MDB_PG_FLAGS_PERMANENT))
979 br_multicast_del_pg(br, pg); 984 br_multicast_del_pg(br, pg);
980 985
981 if (!hlist_unhashed(&port->rlist)) { 986 if (!hlist_unhashed(&port->rlist)) {
982 hlist_del_init_rcu(&port->rlist); 987 hlist_del_init_rcu(&port->rlist);
983 br_rtr_notify(br->dev, port, RTM_DELMDB); 988 br_rtr_notify(br->dev, port, RTM_DELMDB);
989 /* Don't allow timer refresh if disabling */
990 if (port->multicast_router == MDB_RTR_TYPE_TEMP)
991 port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
984 } 992 }
985 del_timer(&port->multicast_router_timer); 993 del_timer(&port->multicast_router_timer);
986 del_timer(&port->ip4_own_query.timer); 994 del_timer(&port->ip4_own_query.timer);
@@ -1228,13 +1236,14 @@ static void br_multicast_mark_router(struct net_bridge *br,
1228 unsigned long now = jiffies; 1236 unsigned long now = jiffies;
1229 1237
1230 if (!port) { 1238 if (!port) {
1231 if (br->multicast_router == 1) 1239 if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY)
1232 mod_timer(&br->multicast_router_timer, 1240 mod_timer(&br->multicast_router_timer,
1233 now + br->multicast_querier_interval); 1241 now + br->multicast_querier_interval);
1234 return; 1242 return;
1235 } 1243 }
1236 1244
1237 if (port->multicast_router != 1) 1245 if (port->multicast_router == MDB_RTR_TYPE_DISABLED ||
1246 port->multicast_router == MDB_RTR_TYPE_PERM)
1238 return; 1247 return;
1239 1248
1240 br_multicast_add_router(br, port); 1249 br_multicast_add_router(br, port);
@@ -1270,6 +1279,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
1270 struct br_ip saddr; 1279 struct br_ip saddr;
1271 unsigned long max_delay; 1280 unsigned long max_delay;
1272 unsigned long now = jiffies; 1281 unsigned long now = jiffies;
1282 unsigned int offset = skb_transport_offset(skb);
1273 __be32 group; 1283 __be32 group;
1274 int err = 0; 1284 int err = 0;
1275 1285
@@ -1280,14 +1290,14 @@ static int br_ip4_multicast_query(struct net_bridge *br,
1280 1290
1281 group = ih->group; 1291 group = ih->group;
1282 1292
1283 if (skb->len == sizeof(*ih)) { 1293 if (skb->len == offset + sizeof(*ih)) {
1284 max_delay = ih->code * (HZ / IGMP_TIMER_SCALE); 1294 max_delay = ih->code * (HZ / IGMP_TIMER_SCALE);
1285 1295
1286 if (!max_delay) { 1296 if (!max_delay) {
1287 max_delay = 10 * HZ; 1297 max_delay = 10 * HZ;
1288 group = 0; 1298 group = 0;
1289 } 1299 }
1290 } else if (skb->len >= sizeof(*ih3)) { 1300 } else if (skb->len >= offset + sizeof(*ih3)) {
1291 ih3 = igmpv3_query_hdr(skb); 1301 ih3 = igmpv3_query_hdr(skb);
1292 if (ih3->nsrcs) 1302 if (ih3->nsrcs)
1293 goto out; 1303 goto out;
@@ -1348,6 +1358,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1348 struct br_ip saddr; 1358 struct br_ip saddr;
1349 unsigned long max_delay; 1359 unsigned long max_delay;
1350 unsigned long now = jiffies; 1360 unsigned long now = jiffies;
1361 unsigned int offset = skb_transport_offset(skb);
1351 const struct in6_addr *group = NULL; 1362 const struct in6_addr *group = NULL;
1352 bool is_general_query; 1363 bool is_general_query;
1353 int err = 0; 1364 int err = 0;
@@ -1357,8 +1368,8 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1357 (port && port->state == BR_STATE_DISABLED)) 1368 (port && port->state == BR_STATE_DISABLED))
1358 goto out; 1369 goto out;
1359 1370
1360 if (skb->len == sizeof(*mld)) { 1371 if (skb->len == offset + sizeof(*mld)) {
1361 if (!pskb_may_pull(skb, sizeof(*mld))) { 1372 if (!pskb_may_pull(skb, offset + sizeof(*mld))) {
1362 err = -EINVAL; 1373 err = -EINVAL;
1363 goto out; 1374 goto out;
1364 } 1375 }
@@ -1367,7 +1378,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1367 if (max_delay) 1378 if (max_delay)
1368 group = &mld->mld_mca; 1379 group = &mld->mld_mca;
1369 } else { 1380 } else {
1370 if (!pskb_may_pull(skb, sizeof(*mld2q))) { 1381 if (!pskb_may_pull(skb, offset + sizeof(*mld2q))) {
1371 err = -EINVAL; 1382 err = -EINVAL;
1372 goto out; 1383 goto out;
1373 } 1384 }
@@ -1454,7 +1465,7 @@ br_multicast_leave_group(struct net_bridge *br,
1454 del_timer(&p->timer); 1465 del_timer(&p->timer);
1455 call_rcu_bh(&p->rcu, br_multicast_free_pg); 1466 call_rcu_bh(&p->rcu, br_multicast_free_pg);
1456 br_mdb_notify(br->dev, port, group, RTM_DELMDB, 1467 br_mdb_notify(br->dev, port, group, RTM_DELMDB,
1457 p->state); 1468 p->flags);
1458 1469
1459 if (!mp->ports && !mp->mglist && 1470 if (!mp->ports && !mp->mglist &&
1460 netif_running(br->dev)) 1471 netif_running(br->dev))
@@ -1715,7 +1726,7 @@ void br_multicast_init(struct net_bridge *br)
1715 br->hash_elasticity = 4; 1726 br->hash_elasticity = 4;
1716 br->hash_max = 512; 1727 br->hash_max = 512;
1717 1728
1718 br->multicast_router = 1; 1729 br->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
1719 br->multicast_querier = 0; 1730 br->multicast_querier = 0;
1720 br->multicast_query_use_ifaddr = 0; 1731 br->multicast_query_use_ifaddr = 0;
1721 br->multicast_last_member_count = 2; 1732 br->multicast_last_member_count = 2;
@@ -1825,11 +1836,11 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
1825 spin_lock_bh(&br->multicast_lock); 1836 spin_lock_bh(&br->multicast_lock);
1826 1837
1827 switch (val) { 1838 switch (val) {
1828 case 0: 1839 case MDB_RTR_TYPE_DISABLED:
1829 case 2: 1840 case MDB_RTR_TYPE_PERM:
1830 del_timer(&br->multicast_router_timer); 1841 del_timer(&br->multicast_router_timer);
1831 /* fall through */ 1842 /* fall through */
1832 case 1: 1843 case MDB_RTR_TYPE_TEMP_QUERY:
1833 br->multicast_router = val; 1844 br->multicast_router = val;
1834 err = 0; 1845 err = 0;
1835 break; 1846 break;
@@ -1840,37 +1851,53 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
1840 return err; 1851 return err;
1841} 1852}
1842 1853
1854static void __del_port_router(struct net_bridge_port *p)
1855{
1856 if (hlist_unhashed(&p->rlist))
1857 return;
1858 hlist_del_init_rcu(&p->rlist);
1859 br_rtr_notify(p->br->dev, p, RTM_DELMDB);
1860}
1861
1843int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val) 1862int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
1844{ 1863{
1845 struct net_bridge *br = p->br; 1864 struct net_bridge *br = p->br;
1865 unsigned long now = jiffies;
1846 int err = -EINVAL; 1866 int err = -EINVAL;
1847 1867
1848 spin_lock(&br->multicast_lock); 1868 spin_lock(&br->multicast_lock);
1849 1869 if (p->multicast_router == val) {
1850 switch (val) { 1870 /* Refresh the temp router port timer */
1851 case 0: 1871 if (p->multicast_router == MDB_RTR_TYPE_TEMP)
1852 case 1: 1872 mod_timer(&p->multicast_router_timer,
1853 case 2: 1873 now + br->multicast_querier_interval);
1854 p->multicast_router = val;
1855 err = 0; 1874 err = 0;
1856 1875 goto unlock;
1857 if (val < 2 && !hlist_unhashed(&p->rlist)) { 1876 }
1858 hlist_del_init_rcu(&p->rlist); 1877 switch (val) {
1859 br_rtr_notify(br->dev, p, RTM_DELMDB); 1878 case MDB_RTR_TYPE_DISABLED:
1860 } 1879 p->multicast_router = MDB_RTR_TYPE_DISABLED;
1861 1880 __del_port_router(p);
1862 if (val == 1) 1881 del_timer(&p->multicast_router_timer);
1863 break; 1882 break;
1864 1883 case MDB_RTR_TYPE_TEMP_QUERY:
1884 p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
1885 __del_port_router(p);
1886 break;
1887 case MDB_RTR_TYPE_PERM:
1888 p->multicast_router = MDB_RTR_TYPE_PERM;
1865 del_timer(&p->multicast_router_timer); 1889 del_timer(&p->multicast_router_timer);
1866
1867 if (val == 0)
1868 break;
1869
1870 br_multicast_add_router(br, p); 1890 br_multicast_add_router(br, p);
1871 break; 1891 break;
1892 case MDB_RTR_TYPE_TEMP:
1893 p->multicast_router = MDB_RTR_TYPE_TEMP;
1894 br_multicast_mark_router(br, p);
1895 break;
1896 default:
1897 goto unlock;
1872 } 1898 }
1873 1899 err = 0;
1900unlock:
1874 spin_unlock(&br->multicast_lock); 1901 spin_unlock(&br->multicast_lock);
1875 1902
1876 return err; 1903 return err;
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 7ddbe7ec81d6..44114a94c576 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -37,6 +37,7 @@
37#include <net/addrconf.h> 37#include <net/addrconf.h>
38#include <net/route.h> 38#include <net/route.h>
39#include <net/netfilter/br_netfilter.h> 39#include <net/netfilter/br_netfilter.h>
40#include <net/netns/generic.h>
40 41
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
42#include "br_private.h" 43#include "br_private.h"
@@ -44,6 +45,12 @@
44#include <linux/sysctl.h> 45#include <linux/sysctl.h>
45#endif 46#endif
46 47
48static int brnf_net_id __read_mostly;
49
50struct brnf_net {
51 bool enabled;
52};
53
47#ifdef CONFIG_SYSCTL 54#ifdef CONFIG_SYSCTL
48static struct ctl_table_header *brnf_sysctl_header; 55static struct ctl_table_header *brnf_sysctl_header;
49static int brnf_call_iptables __read_mostly = 1; 56static int brnf_call_iptables __read_mostly = 1;
@@ -938,6 +945,53 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
938 }, 945 },
939}; 946};
940 947
948static int brnf_device_event(struct notifier_block *unused, unsigned long event,
949 void *ptr)
950{
951 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
952 struct brnf_net *brnet;
953 struct net *net;
954 int ret;
955
956 if (event != NETDEV_REGISTER || !(dev->priv_flags & IFF_EBRIDGE))
957 return NOTIFY_DONE;
958
959 ASSERT_RTNL();
960
961 net = dev_net(dev);
962 brnet = net_generic(net, brnf_net_id);
963 if (brnet->enabled)
964 return NOTIFY_OK;
965
966 ret = nf_register_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops));
967 if (ret)
968 return NOTIFY_BAD;
969
970 brnet->enabled = true;
971 return NOTIFY_OK;
972}
973
974static void __net_exit brnf_exit_net(struct net *net)
975{
976 struct brnf_net *brnet = net_generic(net, brnf_net_id);
977
978 if (!brnet->enabled)
979 return;
980
981 nf_unregister_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops));
982 brnet->enabled = false;
983}
984
985static struct pernet_operations brnf_net_ops __read_mostly = {
986 .exit = brnf_exit_net,
987 .id = &brnf_net_id,
988 .size = sizeof(struct brnf_net),
989};
990
991static struct notifier_block brnf_notifier __read_mostly = {
992 .notifier_call = brnf_device_event,
993};
994
941#ifdef CONFIG_SYSCTL 995#ifdef CONFIG_SYSCTL
942static 996static
943int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, 997int brnf_sysctl_call_tables(struct ctl_table *ctl, int write,
@@ -1003,16 +1057,23 @@ static int __init br_netfilter_init(void)
1003{ 1057{
1004 int ret; 1058 int ret;
1005 1059
1006 ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1060 ret = register_pernet_subsys(&brnf_net_ops);
1007 if (ret < 0) 1061 if (ret < 0)
1008 return ret; 1062 return ret;
1009 1063
1064 ret = register_netdevice_notifier(&brnf_notifier);
1065 if (ret < 0) {
1066 unregister_pernet_subsys(&brnf_net_ops);
1067 return ret;
1068 }
1069
1010#ifdef CONFIG_SYSCTL 1070#ifdef CONFIG_SYSCTL
1011 brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table); 1071 brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table);
1012 if (brnf_sysctl_header == NULL) { 1072 if (brnf_sysctl_header == NULL) {
1013 printk(KERN_WARNING 1073 printk(KERN_WARNING
1014 "br_netfilter: can't register to sysctl.\n"); 1074 "br_netfilter: can't register to sysctl.\n");
1015 nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1075 unregister_netdevice_notifier(&brnf_notifier);
1076 unregister_pernet_subsys(&brnf_net_ops);
1016 return -ENOMEM; 1077 return -ENOMEM;
1017 } 1078 }
1018#endif 1079#endif
@@ -1024,7 +1085,8 @@ static int __init br_netfilter_init(void)
1024static void __exit br_netfilter_fini(void) 1085static void __exit br_netfilter_fini(void)
1025{ 1086{
1026 RCU_INIT_POINTER(nf_br_ops, NULL); 1087 RCU_INIT_POINTER(nf_br_ops, NULL);
1027 nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1088 unregister_netdevice_notifier(&brnf_notifier);
1089 unregister_pernet_subsys(&brnf_net_ops);
1028#ifdef CONFIG_SYSCTL 1090#ifdef CONFIG_SYSCTL
1029 unregister_net_sysctl_table(brnf_sysctl_header); 1091 unregister_net_sysctl_table(brnf_sysctl_header);
1030#endif 1092#endif
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 40197ff8918a..e9c635eae24d 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -598,7 +598,6 @@ static int br_set_port_state(struct net_bridge_port *p, u8 state)
598 return -ENETDOWN; 598 return -ENETDOWN;
599 599
600 br_set_state(p, state); 600 br_set_state(p, state);
601 br_log_state(p);
602 br_port_state_selection(p->br); 601 br_port_state_selection(p->br);
603 return 0; 602 return 0;
604} 603}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 216018c76018..d9da857182ef 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -150,6 +150,9 @@ struct net_bridge_fdb_entry
150 struct rcu_head rcu; 150 struct rcu_head rcu;
151}; 151};
152 152
153#define MDB_PG_FLAGS_PERMANENT BIT(0)
154#define MDB_PG_FLAGS_OFFLOAD BIT(1)
155
153struct net_bridge_port_group { 156struct net_bridge_port_group {
154 struct net_bridge_port *port; 157 struct net_bridge_port *port;
155 struct net_bridge_port_group __rcu *next; 158 struct net_bridge_port_group __rcu *next;
@@ -157,7 +160,7 @@ struct net_bridge_port_group {
157 struct rcu_head rcu; 160 struct rcu_head rcu;
158 struct timer_list timer; 161 struct timer_list timer;
159 struct br_ip addr; 162 struct br_ip addr;
160 unsigned char state; 163 unsigned char flags;
161}; 164};
162 165
163struct net_bridge_mdb_entry 166struct net_bridge_mdb_entry
@@ -554,11 +557,11 @@ void br_multicast_free_pg(struct rcu_head *head);
554struct net_bridge_port_group * 557struct net_bridge_port_group *
555br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group, 558br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group,
556 struct net_bridge_port_group __rcu *next, 559 struct net_bridge_port_group __rcu *next,
557 unsigned char state); 560 unsigned char flags);
558void br_mdb_init(void); 561void br_mdb_init(void);
559void br_mdb_uninit(void); 562void br_mdb_uninit(void);
560void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, 563void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
561 struct br_ip *group, int type, u8 state); 564 struct br_ip *group, int type, u8 flags);
562void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port, 565void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
563 int type); 566 int type);
564 567
@@ -897,7 +900,6 @@ static inline void br_nf_core_fini(void) {}
897#endif 900#endif
898 901
899/* br_stp.c */ 902/* br_stp.c */
900void br_log_state(const struct net_bridge_port *p);
901void br_set_state(struct net_bridge_port *p, unsigned int state); 903void br_set_state(struct net_bridge_port *p, unsigned int state);
902struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no); 904struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no);
903void br_init_port(struct net_bridge_port *p); 905void br_init_port(struct net_bridge_port *p);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index b3cca126b103..9cb7044d0801 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -30,13 +30,6 @@ static const char *const br_port_state_names[] = {
30 [BR_STATE_BLOCKING] = "blocking", 30 [BR_STATE_BLOCKING] = "blocking",
31}; 31};
32 32
33void br_log_state(const struct net_bridge_port *p)
34{
35 br_info(p->br, "port %u(%s) entered %s state\n",
36 (unsigned int) p->port_no, p->dev->name,
37 br_port_state_names[p->state]);
38}
39
40void br_set_state(struct net_bridge_port *p, unsigned int state) 33void br_set_state(struct net_bridge_port *p, unsigned int state)
41{ 34{
42 struct switchdev_attr attr = { 35 struct switchdev_attr attr = {
@@ -52,6 +45,10 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
52 if (err && err != -EOPNOTSUPP) 45 if (err && err != -EOPNOTSUPP)
53 br_warn(p->br, "error setting offload STP state on port %u(%s)\n", 46 br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
54 (unsigned int) p->port_no, p->dev->name); 47 (unsigned int) p->port_no, p->dev->name);
48 else
49 br_info(p->br, "port %u(%s) entered %s state\n",
50 (unsigned int) p->port_no, p->dev->name,
51 br_port_state_names[p->state]);
55} 52}
56 53
57/* called under bridge lock */ 54/* called under bridge lock */
@@ -126,7 +123,6 @@ static void br_root_port_block(const struct net_bridge *br,
126 (unsigned int) p->port_no, p->dev->name); 123 (unsigned int) p->port_no, p->dev->name);
127 124
128 br_set_state(p, BR_STATE_LISTENING); 125 br_set_state(p, BR_STATE_LISTENING);
129 br_log_state(p);
130 br_ifinfo_notify(RTM_NEWLINK, p); 126 br_ifinfo_notify(RTM_NEWLINK, p);
131 127
132 if (br->forward_delay > 0) 128 if (br->forward_delay > 0)
@@ -407,7 +403,6 @@ static void br_make_blocking(struct net_bridge_port *p)
407 br_topology_change_detection(p->br); 403 br_topology_change_detection(p->br);
408 404
409 br_set_state(p, BR_STATE_BLOCKING); 405 br_set_state(p, BR_STATE_BLOCKING);
410 br_log_state(p);
411 br_ifinfo_notify(RTM_NEWLINK, p); 406 br_ifinfo_notify(RTM_NEWLINK, p);
412 407
413 del_timer(&p->forward_delay_timer); 408 del_timer(&p->forward_delay_timer);
@@ -431,7 +426,6 @@ static void br_make_forwarding(struct net_bridge_port *p)
431 else 426 else
432 br_set_state(p, BR_STATE_LEARNING); 427 br_set_state(p, BR_STATE_LEARNING);
433 428
434 br_log_state(p);
435 br_ifinfo_notify(RTM_NEWLINK, p); 429 br_ifinfo_notify(RTM_NEWLINK, p);
436 430
437 if (br->forward_delay != 0) 431 if (br->forward_delay != 0)
@@ -568,6 +562,14 @@ int br_set_max_age(struct net_bridge *br, unsigned long val)
568 562
569} 563}
570 564
565/* Set time interval that dynamic forwarding entries live
566 * For pure software bridge, allow values outside the 802.1
567 * standard specification for special cases:
568 * 0 - entry never ages (all permanant)
569 * 1 - entry disappears (no persistance)
570 *
571 * Offloaded switch entries maybe more restrictive
572 */
571int br_set_ageing_time(struct net_bridge *br, u32 ageing_time) 573int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
572{ 574{
573 struct switchdev_attr attr = { 575 struct switchdev_attr attr = {
@@ -579,11 +581,8 @@ int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
579 unsigned long t = clock_t_to_jiffies(ageing_time); 581 unsigned long t = clock_t_to_jiffies(ageing_time);
580 int err; 582 int err;
581 583
582 if (t < BR_MIN_AGEING_TIME || t > BR_MAX_AGEING_TIME)
583 return -ERANGE;
584
585 err = switchdev_port_attr_set(br->dev, &attr); 584 err = switchdev_port_attr_set(br->dev, &attr);
586 if (err) 585 if (err && err != -EOPNOTSUPP)
587 return err; 586 return err;
588 587
589 br->ageing_time = t; 588 br->ageing_time = t;
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index a31ac6ad76a2..984d46263007 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -102,7 +102,6 @@ void br_stp_enable_port(struct net_bridge_port *p)
102{ 102{
103 br_init_port(p); 103 br_init_port(p);
104 br_port_state_selection(p->br); 104 br_port_state_selection(p->br);
105 br_log_state(p);
106 br_ifinfo_notify(RTM_NEWLINK, p); 105 br_ifinfo_notify(RTM_NEWLINK, p);
107} 106}
108 107
@@ -118,7 +117,6 @@ void br_stp_disable_port(struct net_bridge_port *p)
118 p->topology_change_ack = 0; 117 p->topology_change_ack = 0;
119 p->config_pending = 0; 118 p->config_pending = 0;
120 119
121 br_log_state(p);
122 br_ifinfo_notify(RTM_NEWLINK, p); 120 br_ifinfo_notify(RTM_NEWLINK, p);
123 121
124 del_timer(&p->message_age_timer); 122 del_timer(&p->message_age_timer);
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 5f0f5af0ec35..da058b85aa22 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -98,7 +98,6 @@ static void br_forward_delay_timer_expired(unsigned long arg)
98 br_topology_change_detection(br); 98 br_topology_change_detection(br);
99 netif_carrier_on(br->dev); 99 netif_carrier_on(br->dev);
100 } 100 }
101 br_log_state(p);
102 rcu_read_lock(); 101 rcu_read_lock();
103 br_ifinfo_notify(RTM_NEWLINK, p); 102 br_ifinfo_notify(RTM_NEWLINK, p);
104 rcu_read_unlock(); 103 rcu_read_unlock();
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 85e43af4af7a..9309bb4f2a5b 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -955,6 +955,13 @@ err_rhtbl:
955 */ 955 */
956int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) 956int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
957{ 957{
958 struct switchdev_obj_port_vlan v = {
959 .obj.orig_dev = port->dev,
960 .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
961 .flags = flags,
962 .vid_begin = vid,
963 .vid_end = vid,
964 };
958 struct net_bridge_vlan *vlan; 965 struct net_bridge_vlan *vlan;
959 int ret; 966 int ret;
960 967
@@ -962,6 +969,10 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
962 969
963 vlan = br_vlan_find(nbp_vlan_group(port), vid); 970 vlan = br_vlan_find(nbp_vlan_group(port), vid);
964 if (vlan) { 971 if (vlan) {
972 /* Pass the flags to the hardware bridge */
973 ret = switchdev_port_obj_add(port->dev, &v.obj);
974 if (ret && ret != -EOPNOTSUPP)
975 return ret;
965 __vlan_add_flags(vlan, flags); 976 __vlan_add_flags(vlan, flags);
966 return 0; 977 return 0;
967 } 978 }
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 67b2e27999aa..5a61f35412a0 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -370,7 +370,11 @@ ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par,
370 left - sizeof(struct ebt_entry_match) < m->match_size) 370 left - sizeof(struct ebt_entry_match) < m->match_size)
371 return -EINVAL; 371 return -EINVAL;
372 372
373 match = xt_request_find_match(NFPROTO_BRIDGE, m->u.name, 0); 373 match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0);
374 if (IS_ERR(match) || match->family != NFPROTO_BRIDGE) {
375 request_module("ebt_%s", m->u.name);
376 match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0);
377 }
374 if (IS_ERR(match)) 378 if (IS_ERR(match))
375 return PTR_ERR(match); 379 return PTR_ERR(match);
376 m->u.match = match; 380 m->u.match = match;
@@ -1521,6 +1525,8 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1521 if (copy_from_user(&tmp, user, sizeof(tmp))) 1525 if (copy_from_user(&tmp, user, sizeof(tmp)))
1522 return -EFAULT; 1526 return -EFAULT;
1523 1527
1528 tmp.name[sizeof(tmp.name) - 1] = '\0';
1529
1524 t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); 1530 t = find_table_lock(net, tmp.name, &ret, &ebt_mutex);
1525 if (!t) 1531 if (!t)
1526 return ret; 1532 return ret;
@@ -2332,6 +2338,8 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd,
2332 if (copy_from_user(&tmp, user, sizeof(tmp))) 2338 if (copy_from_user(&tmp, user, sizeof(tmp)))
2333 return -EFAULT; 2339 return -EFAULT;
2334 2340
2341 tmp.name[sizeof(tmp.name) - 1] = '\0';
2342
2335 t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); 2343 t = find_table_lock(net, tmp.name, &ret, &ebt_mutex);
2336 if (!t) 2344 if (!t)
2337 return ret; 2345 return ret;
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index fdba3d9fbff3..77f7e7a9ebe1 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -40,7 +40,8 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb,
40/* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT) 40/* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT)
41 * or the bridge port (NF_BRIDGE PREROUTING). 41 * or the bridge port (NF_BRIDGE PREROUTING).
42 */ 42 */
43static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, 43static void nft_reject_br_send_v4_tcp_reset(struct net *net,
44 struct sk_buff *oldskb,
44 const struct net_device *dev, 45 const struct net_device *dev,
45 int hook) 46 int hook)
46{ 47{
@@ -63,9 +64,9 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb,
63 64
64 skb_reserve(nskb, LL_MAX_HEADER); 65 skb_reserve(nskb, LL_MAX_HEADER);
65 niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, 66 niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
66 sysctl_ip_default_ttl); 67 net->ipv4.sysctl_ip_default_ttl);
67 nf_reject_ip_tcphdr_put(nskb, oldskb, oth); 68 nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
68 niph->ttl = sysctl_ip_default_ttl; 69 niph->ttl = net->ipv4.sysctl_ip_default_ttl;
69 niph->tot_len = htons(nskb->len); 70 niph->tot_len = htons(nskb->len);
70 ip_send_check(niph); 71 ip_send_check(niph);
71 72
@@ -74,7 +75,8 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb,
74 br_deliver(br_port_get_rcu(dev), nskb); 75 br_deliver(br_port_get_rcu(dev), nskb);
75} 76}
76 77
77static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, 78static void nft_reject_br_send_v4_unreach(struct net *net,
79 struct sk_buff *oldskb,
78 const struct net_device *dev, 80 const struct net_device *dev,
79 int hook, u8 code) 81 int hook, u8 code)
80{ 82{
@@ -119,7 +121,7 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb,
119 121
120 skb_reserve(nskb, LL_MAX_HEADER); 122 skb_reserve(nskb, LL_MAX_HEADER);
121 niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP, 123 niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP,
122 sysctl_ip_default_ttl); 124 net->ipv4.sysctl_ip_default_ttl);
123 125
124 skb_reset_transport_header(nskb); 126 skb_reset_transport_header(nskb);
125 icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr)); 127 icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr));
@@ -271,17 +273,17 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
271 case htons(ETH_P_IP): 273 case htons(ETH_P_IP):
272 switch (priv->type) { 274 switch (priv->type) {
273 case NFT_REJECT_ICMP_UNREACH: 275 case NFT_REJECT_ICMP_UNREACH:
274 nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, 276 nft_reject_br_send_v4_unreach(pkt->net, pkt->skb,
275 pkt->hook, 277 pkt->in, pkt->hook,
276 priv->icmp_code); 278 priv->icmp_code);
277 break; 279 break;
278 case NFT_REJECT_TCP_RST: 280 case NFT_REJECT_TCP_RST:
279 nft_reject_br_send_v4_tcp_reset(pkt->skb, pkt->in, 281 nft_reject_br_send_v4_tcp_reset(pkt->net, pkt->skb,
280 pkt->hook); 282 pkt->in, pkt->hook);
281 break; 283 break;
282 case NFT_REJECT_ICMPX_UNREACH: 284 case NFT_REJECT_ICMPX_UNREACH:
283 nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, 285 nft_reject_br_send_v4_unreach(pkt->net, pkt->skb,
284 pkt->hook, 286 pkt->in, pkt->hook,
285 nft_reject_icmp_code(priv->icmp_code)); 287 nft_reject_icmp_code(priv->icmp_code));
286 break; 288 break;
287 } 289 }
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index f6c3b2137eea..59ce1fcc220c 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -286,7 +286,7 @@ int cfpkt_setlen(struct cfpkt *pkt, u16 len)
286 else 286 else
287 skb_trim(skb, len); 287 skb_trim(skb, len);
288 288
289 return cfpkt_getlen(pkt); 289 return cfpkt_getlen(pkt);
290 } 290 }
291 291
292 /* Need to expand SKB */ 292 /* Need to expand SKB */
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index 6b923bcaa2a4..2bc5965fdd1e 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -293,13 +293,9 @@ int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
293} 293}
294EXPORT_SYMBOL(ceph_auth_create_authorizer); 294EXPORT_SYMBOL(ceph_auth_create_authorizer);
295 295
296void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac, 296void ceph_auth_destroy_authorizer(struct ceph_authorizer *a)
297 struct ceph_authorizer *a)
298{ 297{
299 mutex_lock(&ac->mutex); 298 a->destroy(a);
300 if (ac->ops && ac->ops->destroy_authorizer)
301 ac->ops->destroy_authorizer(ac, a);
302 mutex_unlock(&ac->mutex);
303} 299}
304EXPORT_SYMBOL(ceph_auth_destroy_authorizer); 300EXPORT_SYMBOL(ceph_auth_destroy_authorizer);
305 301
diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
index 8c93fa8d81bc..5f836f02ae36 100644
--- a/net/ceph/auth_none.c
+++ b/net/ceph/auth_none.c
@@ -16,7 +16,6 @@ static void reset(struct ceph_auth_client *ac)
16 struct ceph_auth_none_info *xi = ac->private; 16 struct ceph_auth_none_info *xi = ac->private;
17 17
18 xi->starting = true; 18 xi->starting = true;
19 xi->built_authorizer = false;
20} 19}
21 20
22static void destroy(struct ceph_auth_client *ac) 21static void destroy(struct ceph_auth_client *ac)
@@ -39,6 +38,27 @@ static int should_authenticate(struct ceph_auth_client *ac)
39 return xi->starting; 38 return xi->starting;
40} 39}
41 40
41static int ceph_auth_none_build_authorizer(struct ceph_auth_client *ac,
42 struct ceph_none_authorizer *au)
43{
44 void *p = au->buf;
45 void *const end = p + sizeof(au->buf);
46 int ret;
47
48 ceph_encode_8_safe(&p, end, 1, e_range);
49 ret = ceph_entity_name_encode(ac->name, &p, end);
50 if (ret < 0)
51 return ret;
52
53 ceph_encode_64_safe(&p, end, ac->global_id, e_range);
54 au->buf_len = p - (void *)au->buf;
55 dout("%s built authorizer len %d\n", __func__, au->buf_len);
56 return 0;
57
58e_range:
59 return -ERANGE;
60}
61
42static int build_request(struct ceph_auth_client *ac, void *buf, void *end) 62static int build_request(struct ceph_auth_client *ac, void *buf, void *end)
43{ 63{
44 return 0; 64 return 0;
@@ -57,32 +77,32 @@ static int handle_reply(struct ceph_auth_client *ac, int result,
57 return result; 77 return result;
58} 78}
59 79
80static void ceph_auth_none_destroy_authorizer(struct ceph_authorizer *a)
81{
82 kfree(a);
83}
84
60/* 85/*
61 * build an 'authorizer' with our entity_name and global_id. we can 86 * build an 'authorizer' with our entity_name and global_id. it is
62 * reuse a single static copy since it is identical for all services 87 * identical for all services we connect to.
63 * we connect to.
64 */ 88 */
65static int ceph_auth_none_create_authorizer( 89static int ceph_auth_none_create_authorizer(
66 struct ceph_auth_client *ac, int peer_type, 90 struct ceph_auth_client *ac, int peer_type,
67 struct ceph_auth_handshake *auth) 91 struct ceph_auth_handshake *auth)
68{ 92{
69 struct ceph_auth_none_info *ai = ac->private; 93 struct ceph_none_authorizer *au;
70 struct ceph_none_authorizer *au = &ai->au;
71 void *p, *end;
72 int ret; 94 int ret;
73 95
74 if (!ai->built_authorizer) { 96 au = kmalloc(sizeof(*au), GFP_NOFS);
75 p = au->buf; 97 if (!au)
76 end = p + sizeof(au->buf); 98 return -ENOMEM;
77 ceph_encode_8(&p, 1); 99
78 ret = ceph_entity_name_encode(ac->name, &p, end - 8); 100 au->base.destroy = ceph_auth_none_destroy_authorizer;
79 if (ret < 0) 101
80 goto bad; 102 ret = ceph_auth_none_build_authorizer(ac, au);
81 ceph_decode_need(&p, end, sizeof(u64), bad2); 103 if (ret) {
82 ceph_encode_64(&p, ac->global_id); 104 kfree(au);
83 au->buf_len = p - (void *)au->buf; 105 return ret;
84 ai->built_authorizer = true;
85 dout("built authorizer len %d\n", au->buf_len);
86 } 106 }
87 107
88 auth->authorizer = (struct ceph_authorizer *) au; 108 auth->authorizer = (struct ceph_authorizer *) au;
@@ -92,17 +112,6 @@ static int ceph_auth_none_create_authorizer(
92 auth->authorizer_reply_buf_len = sizeof (au->reply_buf); 112 auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
93 113
94 return 0; 114 return 0;
95
96bad2:
97 ret = -ERANGE;
98bad:
99 return ret;
100}
101
102static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac,
103 struct ceph_authorizer *a)
104{
105 /* nothing to do */
106} 115}
107 116
108static const struct ceph_auth_client_ops ceph_auth_none_ops = { 117static const struct ceph_auth_client_ops ceph_auth_none_ops = {
@@ -114,7 +123,6 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = {
114 .build_request = build_request, 123 .build_request = build_request,
115 .handle_reply = handle_reply, 124 .handle_reply = handle_reply,
116 .create_authorizer = ceph_auth_none_create_authorizer, 125 .create_authorizer = ceph_auth_none_create_authorizer,
117 .destroy_authorizer = ceph_auth_none_destroy_authorizer,
118}; 126};
119 127
120int ceph_auth_none_init(struct ceph_auth_client *ac) 128int ceph_auth_none_init(struct ceph_auth_client *ac)
@@ -127,7 +135,6 @@ int ceph_auth_none_init(struct ceph_auth_client *ac)
127 return -ENOMEM; 135 return -ENOMEM;
128 136
129 xi->starting = true; 137 xi->starting = true;
130 xi->built_authorizer = false;
131 138
132 ac->protocol = CEPH_AUTH_NONE; 139 ac->protocol = CEPH_AUTH_NONE;
133 ac->private = xi; 140 ac->private = xi;
diff --git a/net/ceph/auth_none.h b/net/ceph/auth_none.h
index 059a3ce4b53f..62021535ae4a 100644
--- a/net/ceph/auth_none.h
+++ b/net/ceph/auth_none.h
@@ -12,6 +12,7 @@
12 */ 12 */
13 13
14struct ceph_none_authorizer { 14struct ceph_none_authorizer {
15 struct ceph_authorizer base;
15 char buf[128]; 16 char buf[128];
16 int buf_len; 17 int buf_len;
17 char reply_buf[0]; 18 char reply_buf[0];
@@ -19,8 +20,6 @@ struct ceph_none_authorizer {
19 20
20struct ceph_auth_none_info { 21struct ceph_auth_none_info {
21 bool starting; 22 bool starting;
22 bool built_authorizer;
23 struct ceph_none_authorizer au; /* we only need one; it's static */
24}; 23};
25 24
26int ceph_auth_none_init(struct ceph_auth_client *ac); 25int ceph_auth_none_init(struct ceph_auth_client *ac);
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 9e43a315e662..a0905f04bd13 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -565,6 +565,14 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
565 return -EAGAIN; 565 return -EAGAIN;
566} 566}
567 567
568static void ceph_x_destroy_authorizer(struct ceph_authorizer *a)
569{
570 struct ceph_x_authorizer *au = (void *)a;
571
572 ceph_x_authorizer_cleanup(au);
573 kfree(au);
574}
575
568static int ceph_x_create_authorizer( 576static int ceph_x_create_authorizer(
569 struct ceph_auth_client *ac, int peer_type, 577 struct ceph_auth_client *ac, int peer_type,
570 struct ceph_auth_handshake *auth) 578 struct ceph_auth_handshake *auth)
@@ -581,6 +589,8 @@ static int ceph_x_create_authorizer(
581 if (!au) 589 if (!au)
582 return -ENOMEM; 590 return -ENOMEM;
583 591
592 au->base.destroy = ceph_x_destroy_authorizer;
593
584 ret = ceph_x_build_authorizer(ac, th, au); 594 ret = ceph_x_build_authorizer(ac, th, au);
585 if (ret) { 595 if (ret) {
586 kfree(au); 596 kfree(au);
@@ -643,16 +653,6 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
643 return ret; 653 return ret;
644} 654}
645 655
646static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac,
647 struct ceph_authorizer *a)
648{
649 struct ceph_x_authorizer *au = (void *)a;
650
651 ceph_x_authorizer_cleanup(au);
652 kfree(au);
653}
654
655
656static void ceph_x_reset(struct ceph_auth_client *ac) 656static void ceph_x_reset(struct ceph_auth_client *ac)
657{ 657{
658 struct ceph_x_info *xi = ac->private; 658 struct ceph_x_info *xi = ac->private;
@@ -770,7 +770,6 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
770 .create_authorizer = ceph_x_create_authorizer, 770 .create_authorizer = ceph_x_create_authorizer,
771 .update_authorizer = ceph_x_update_authorizer, 771 .update_authorizer = ceph_x_update_authorizer,
772 .verify_authorizer_reply = ceph_x_verify_authorizer_reply, 772 .verify_authorizer_reply = ceph_x_verify_authorizer_reply,
773 .destroy_authorizer = ceph_x_destroy_authorizer,
774 .invalidate_authorizer = ceph_x_invalidate_authorizer, 773 .invalidate_authorizer = ceph_x_invalidate_authorizer,
775 .reset = ceph_x_reset, 774 .reset = ceph_x_reset,
776 .destroy = ceph_x_destroy, 775 .destroy = ceph_x_destroy,
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index 40b1a3cf7397..21a5af904bae 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -26,6 +26,7 @@ struct ceph_x_ticket_handler {
26 26
27 27
28struct ceph_x_authorizer { 28struct ceph_x_authorizer {
29 struct ceph_authorizer base;
29 struct ceph_crypto_key session_key; 30 struct ceph_crypto_key session_key;
30 struct ceph_buffer *buf; 31 struct ceph_buffer *buf;
31 unsigned int service; 32 unsigned int service;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index bcbec33c6a14..dcc18c6f7cf9 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -361,7 +361,6 @@ ceph_parse_options(char *options, const char *dev_name,
361 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; 361 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
362 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; 362 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
363 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; 363 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
364 opt->monc_ping_timeout = CEPH_MONC_PING_TIMEOUT_DEFAULT;
365 364
366 /* get mon ip(s) */ 365 /* get mon ip(s) */
367 /* ip1[:port1][,ip2[:port2]...] */ 366 /* ip1[:port1][,ip2[:port2]...] */
@@ -686,6 +685,9 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
686 return client->auth_err; 685 return client->auth_err;
687 } 686 }
688 687
688 pr_info("client%llu fsid %pU\n", ceph_client_id(client), &client->fsid);
689 ceph_debugfs_client_init(client);
690
689 return 0; 691 return 0;
690} 692}
691EXPORT_SYMBOL(__ceph_open_session); 693EXPORT_SYMBOL(__ceph_open_session);
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 42e8649c6e79..db2847ac5f12 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -4,7 +4,8 @@
4#include <linux/err.h> 4#include <linux/err.h>
5#include <linux/scatterlist.h> 5#include <linux/scatterlist.h>
6#include <linux/slab.h> 6#include <linux/slab.h>
7#include <crypto/hash.h> 7#include <crypto/aes.h>
8#include <crypto/skcipher.h>
8#include <linux/key-type.h> 9#include <linux/key-type.h>
9 10
10#include <keys/ceph-type.h> 11#include <keys/ceph-type.h>
@@ -79,9 +80,9 @@ int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey)
79 return 0; 80 return 0;
80} 81}
81 82
82static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) 83static struct crypto_skcipher *ceph_crypto_alloc_cipher(void)
83{ 84{
84 return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); 85 return crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
85} 86}
86 87
87static const u8 *aes_iv = (u8 *)CEPH_AES_IV; 88static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
@@ -162,11 +163,10 @@ static int ceph_aes_encrypt(const void *key, int key_len,
162{ 163{
163 struct scatterlist sg_in[2], prealloc_sg; 164 struct scatterlist sg_in[2], prealloc_sg;
164 struct sg_table sg_out; 165 struct sg_table sg_out;
165 struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); 166 struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher();
166 struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 }; 167 SKCIPHER_REQUEST_ON_STACK(req, tfm);
167 int ret; 168 int ret;
168 void *iv; 169 char iv[AES_BLOCK_SIZE];
169 int ivsize;
170 size_t zero_padding = (0x10 - (src_len & 0x0f)); 170 size_t zero_padding = (0x10 - (src_len & 0x0f));
171 char pad[16]; 171 char pad[16];
172 172
@@ -184,10 +184,13 @@ static int ceph_aes_encrypt(const void *key, int key_len,
184 if (ret) 184 if (ret)
185 goto out_tfm; 185 goto out_tfm;
186 186
187 crypto_blkcipher_setkey((void *)tfm, key, key_len); 187 crypto_skcipher_setkey((void *)tfm, key, key_len);
188 iv = crypto_blkcipher_crt(tfm)->iv; 188 memcpy(iv, aes_iv, AES_BLOCK_SIZE);
189 ivsize = crypto_blkcipher_ivsize(tfm); 189
190 memcpy(iv, aes_iv, ivsize); 190 skcipher_request_set_tfm(req, tfm);
191 skcipher_request_set_callback(req, 0, NULL, NULL);
192 skcipher_request_set_crypt(req, sg_in, sg_out.sgl,
193 src_len + zero_padding, iv);
191 194
192 /* 195 /*
193 print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, 196 print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1,
@@ -197,8 +200,8 @@ static int ceph_aes_encrypt(const void *key, int key_len,
197 print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, 200 print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1,
198 pad, zero_padding, 1); 201 pad, zero_padding, 1);
199 */ 202 */
200 ret = crypto_blkcipher_encrypt(&desc, sg_out.sgl, sg_in, 203 ret = crypto_skcipher_encrypt(req);
201 src_len + zero_padding); 204 skcipher_request_zero(req);
202 if (ret < 0) { 205 if (ret < 0) {
203 pr_err("ceph_aes_crypt failed %d\n", ret); 206 pr_err("ceph_aes_crypt failed %d\n", ret);
204 goto out_sg; 207 goto out_sg;
@@ -211,7 +214,7 @@ static int ceph_aes_encrypt(const void *key, int key_len,
211out_sg: 214out_sg:
212 teardown_sgtable(&sg_out); 215 teardown_sgtable(&sg_out);
213out_tfm: 216out_tfm:
214 crypto_free_blkcipher(tfm); 217 crypto_free_skcipher(tfm);
215 return ret; 218 return ret;
216} 219}
217 220
@@ -222,11 +225,10 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
222{ 225{
223 struct scatterlist sg_in[3], prealloc_sg; 226 struct scatterlist sg_in[3], prealloc_sg;
224 struct sg_table sg_out; 227 struct sg_table sg_out;
225 struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); 228 struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher();
226 struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 }; 229 SKCIPHER_REQUEST_ON_STACK(req, tfm);
227 int ret; 230 int ret;
228 void *iv; 231 char iv[AES_BLOCK_SIZE];
229 int ivsize;
230 size_t zero_padding = (0x10 - ((src1_len + src2_len) & 0x0f)); 232 size_t zero_padding = (0x10 - ((src1_len + src2_len) & 0x0f));
231 char pad[16]; 233 char pad[16];
232 234
@@ -245,10 +247,13 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
245 if (ret) 247 if (ret)
246 goto out_tfm; 248 goto out_tfm;
247 249
248 crypto_blkcipher_setkey((void *)tfm, key, key_len); 250 crypto_skcipher_setkey((void *)tfm, key, key_len);
249 iv = crypto_blkcipher_crt(tfm)->iv; 251 memcpy(iv, aes_iv, AES_BLOCK_SIZE);
250 ivsize = crypto_blkcipher_ivsize(tfm); 252
251 memcpy(iv, aes_iv, ivsize); 253 skcipher_request_set_tfm(req, tfm);
254 skcipher_request_set_callback(req, 0, NULL, NULL);
255 skcipher_request_set_crypt(req, sg_in, sg_out.sgl,
256 src1_len + src2_len + zero_padding, iv);
252 257
253 /* 258 /*
254 print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, 259 print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1,
@@ -260,8 +265,8 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
260 print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, 265 print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1,
261 pad, zero_padding, 1); 266 pad, zero_padding, 1);
262 */ 267 */
263 ret = crypto_blkcipher_encrypt(&desc, sg_out.sgl, sg_in, 268 ret = crypto_skcipher_encrypt(req);
264 src1_len + src2_len + zero_padding); 269 skcipher_request_zero(req);
265 if (ret < 0) { 270 if (ret < 0) {
266 pr_err("ceph_aes_crypt2 failed %d\n", ret); 271 pr_err("ceph_aes_crypt2 failed %d\n", ret);
267 goto out_sg; 272 goto out_sg;
@@ -274,7 +279,7 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
274out_sg: 279out_sg:
275 teardown_sgtable(&sg_out); 280 teardown_sgtable(&sg_out);
276out_tfm: 281out_tfm:
277 crypto_free_blkcipher(tfm); 282 crypto_free_skcipher(tfm);
278 return ret; 283 return ret;
279} 284}
280 285
@@ -284,11 +289,10 @@ static int ceph_aes_decrypt(const void *key, int key_len,
284{ 289{
285 struct sg_table sg_in; 290 struct sg_table sg_in;
286 struct scatterlist sg_out[2], prealloc_sg; 291 struct scatterlist sg_out[2], prealloc_sg;
287 struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); 292 struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher();
288 struct blkcipher_desc desc = { .tfm = tfm }; 293 SKCIPHER_REQUEST_ON_STACK(req, tfm);
289 char pad[16]; 294 char pad[16];
290 void *iv; 295 char iv[AES_BLOCK_SIZE];
291 int ivsize;
292 int ret; 296 int ret;
293 int last_byte; 297 int last_byte;
294 298
@@ -302,10 +306,13 @@ static int ceph_aes_decrypt(const void *key, int key_len,
302 if (ret) 306 if (ret)
303 goto out_tfm; 307 goto out_tfm;
304 308
305 crypto_blkcipher_setkey((void *)tfm, key, key_len); 309 crypto_skcipher_setkey((void *)tfm, key, key_len);
306 iv = crypto_blkcipher_crt(tfm)->iv; 310 memcpy(iv, aes_iv, AES_BLOCK_SIZE);
307 ivsize = crypto_blkcipher_ivsize(tfm); 311
308 memcpy(iv, aes_iv, ivsize); 312 skcipher_request_set_tfm(req, tfm);
313 skcipher_request_set_callback(req, 0, NULL, NULL);
314 skcipher_request_set_crypt(req, sg_in.sgl, sg_out,
315 src_len, iv);
309 316
310 /* 317 /*
311 print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1, 318 print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1,
@@ -313,7 +320,8 @@ static int ceph_aes_decrypt(const void *key, int key_len,
313 print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, 320 print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1,
314 src, src_len, 1); 321 src, src_len, 1);
315 */ 322 */
316 ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in.sgl, src_len); 323 ret = crypto_skcipher_decrypt(req);
324 skcipher_request_zero(req);
317 if (ret < 0) { 325 if (ret < 0) {
318 pr_err("ceph_aes_decrypt failed %d\n", ret); 326 pr_err("ceph_aes_decrypt failed %d\n", ret);
319 goto out_sg; 327 goto out_sg;
@@ -338,7 +346,7 @@ static int ceph_aes_decrypt(const void *key, int key_len,
338out_sg: 346out_sg:
339 teardown_sgtable(&sg_in); 347 teardown_sgtable(&sg_in);
340out_tfm: 348out_tfm:
341 crypto_free_blkcipher(tfm); 349 crypto_free_skcipher(tfm);
342 return ret; 350 return ret;
343} 351}
344 352
@@ -349,11 +357,10 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
349{ 357{
350 struct sg_table sg_in; 358 struct sg_table sg_in;
351 struct scatterlist sg_out[3], prealloc_sg; 359 struct scatterlist sg_out[3], prealloc_sg;
352 struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); 360 struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher();
353 struct blkcipher_desc desc = { .tfm = tfm }; 361 SKCIPHER_REQUEST_ON_STACK(req, tfm);
354 char pad[16]; 362 char pad[16];
355 void *iv; 363 char iv[AES_BLOCK_SIZE];
356 int ivsize;
357 int ret; 364 int ret;
358 int last_byte; 365 int last_byte;
359 366
@@ -368,10 +375,13 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
368 if (ret) 375 if (ret)
369 goto out_tfm; 376 goto out_tfm;
370 377
371 crypto_blkcipher_setkey((void *)tfm, key, key_len); 378 crypto_skcipher_setkey((void *)tfm, key, key_len);
372 iv = crypto_blkcipher_crt(tfm)->iv; 379 memcpy(iv, aes_iv, AES_BLOCK_SIZE);
373 ivsize = crypto_blkcipher_ivsize(tfm); 380
374 memcpy(iv, aes_iv, ivsize); 381 skcipher_request_set_tfm(req, tfm);
382 skcipher_request_set_callback(req, 0, NULL, NULL);
383 skcipher_request_set_crypt(req, sg_in.sgl, sg_out,
384 src_len, iv);
375 385
376 /* 386 /*
377 print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1, 387 print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1,
@@ -379,7 +389,8 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
379 print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, 389 print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1,
380 src, src_len, 1); 390 src, src_len, 1);
381 */ 391 */
382 ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in.sgl, src_len); 392 ret = crypto_skcipher_decrypt(req);
393 skcipher_request_zero(req);
383 if (ret < 0) { 394 if (ret < 0) {
384 pr_err("ceph_aes_decrypt failed %d\n", ret); 395 pr_err("ceph_aes_decrypt failed %d\n", ret);
385 goto out_sg; 396 goto out_sg;
@@ -415,7 +426,7 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
415out_sg: 426out_sg:
416 teardown_sgtable(&sg_in); 427 teardown_sgtable(&sg_in);
417out_tfm: 428out_tfm:
418 crypto_free_blkcipher(tfm); 429 crypto_free_skcipher(tfm);
419 return ret; 430 return ret;
420} 431}
421 432
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 593dc2eabcc8..b902fbc7863e 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -112,15 +112,20 @@ static int monc_show(struct seq_file *s, void *p)
112 struct ceph_mon_generic_request *req; 112 struct ceph_mon_generic_request *req;
113 struct ceph_mon_client *monc = &client->monc; 113 struct ceph_mon_client *monc = &client->monc;
114 struct rb_node *rp; 114 struct rb_node *rp;
115 int i;
115 116
116 mutex_lock(&monc->mutex); 117 mutex_lock(&monc->mutex);
117 118
118 if (monc->have_mdsmap) 119 for (i = 0; i < ARRAY_SIZE(monc->subs); i++) {
119 seq_printf(s, "have mdsmap %u\n", (unsigned int)monc->have_mdsmap); 120 seq_printf(s, "have %s %u", ceph_sub_str[i],
120 if (monc->have_osdmap) 121 monc->subs[i].have);
121 seq_printf(s, "have osdmap %u\n", (unsigned int)monc->have_osdmap); 122 if (monc->subs[i].want)
122 if (monc->want_next_osdmap) 123 seq_printf(s, " want %llu%s",
123 seq_printf(s, "want next osdmap\n"); 124 le64_to_cpu(monc->subs[i].item.start),
125 (monc->subs[i].item.flags &
126 CEPH_SUBSCRIBE_ONETIME ? "" : "+"));
127 seq_putc(s, '\n');
128 }
124 129
125 for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { 130 for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
126 __u16 op; 131 __u16 op;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 9382619a405b..a5502898ea33 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -235,18 +235,12 @@ static struct workqueue_struct *ceph_msgr_wq;
235static int ceph_msgr_slab_init(void) 235static int ceph_msgr_slab_init(void)
236{ 236{
237 BUG_ON(ceph_msg_cache); 237 BUG_ON(ceph_msg_cache);
238 ceph_msg_cache = kmem_cache_create("ceph_msg", 238 ceph_msg_cache = KMEM_CACHE(ceph_msg, 0);
239 sizeof (struct ceph_msg),
240 __alignof__(struct ceph_msg), 0, NULL);
241
242 if (!ceph_msg_cache) 239 if (!ceph_msg_cache)
243 return -ENOMEM; 240 return -ENOMEM;
244 241
245 BUG_ON(ceph_msg_data_cache); 242 BUG_ON(ceph_msg_data_cache);
246 ceph_msg_data_cache = kmem_cache_create("ceph_msg_data", 243 ceph_msg_data_cache = KMEM_CACHE(ceph_msg_data, 0);
247 sizeof (struct ceph_msg_data),
248 __alignof__(struct ceph_msg_data),
249 0, NULL);
250 if (ceph_msg_data_cache) 244 if (ceph_msg_data_cache)
251 return 0; 245 return 0;
252 246
@@ -275,7 +269,7 @@ static void _ceph_msgr_exit(void)
275 } 269 }
276 270
277 BUG_ON(zero_page == NULL); 271 BUG_ON(zero_page == NULL);
278 page_cache_release(zero_page); 272 put_page(zero_page);
279 zero_page = NULL; 273 zero_page = NULL;
280 274
281 ceph_msgr_slab_exit(); 275 ceph_msgr_slab_exit();
@@ -288,7 +282,7 @@ int ceph_msgr_init(void)
288 282
289 BUG_ON(zero_page != NULL); 283 BUG_ON(zero_page != NULL);
290 zero_page = ZERO_PAGE(0); 284 zero_page = ZERO_PAGE(0);
291 page_cache_get(zero_page); 285 get_page(zero_page);
292 286
293 /* 287 /*
294 * The number of active work items is limited by the number of 288 * The number of active work items is limited by the number of
@@ -1221,25 +1215,19 @@ static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
1221static void prepare_write_message_footer(struct ceph_connection *con) 1215static void prepare_write_message_footer(struct ceph_connection *con)
1222{ 1216{
1223 struct ceph_msg *m = con->out_msg; 1217 struct ceph_msg *m = con->out_msg;
1224 int v = con->out_kvec_left;
1225 1218
1226 m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; 1219 m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
1227 1220
1228 dout("prepare_write_message_footer %p\n", con); 1221 dout("prepare_write_message_footer %p\n", con);
1229 con->out_kvec[v].iov_base = &m->footer; 1222 con_out_kvec_add(con, sizeof_footer(con), &m->footer);
1230 if (con->peer_features & CEPH_FEATURE_MSG_AUTH) { 1223 if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
1231 if (con->ops->sign_message) 1224 if (con->ops->sign_message)
1232 con->ops->sign_message(m); 1225 con->ops->sign_message(m);
1233 else 1226 else
1234 m->footer.sig = 0; 1227 m->footer.sig = 0;
1235 con->out_kvec[v].iov_len = sizeof(m->footer);
1236 con->out_kvec_bytes += sizeof(m->footer);
1237 } else { 1228 } else {
1238 m->old_footer.flags = m->footer.flags; 1229 m->old_footer.flags = m->footer.flags;
1239 con->out_kvec[v].iov_len = sizeof(m->old_footer);
1240 con->out_kvec_bytes += sizeof(m->old_footer);
1241 } 1230 }
1242 con->out_kvec_left++;
1243 con->out_more = m->more_to_follow; 1231 con->out_more = m->more_to_follow;
1244 con->out_msg_done = true; 1232 con->out_msg_done = true;
1245} 1233}
@@ -1614,7 +1602,7 @@ static int write_partial_skip(struct ceph_connection *con)
1614 1602
1615 dout("%s %p %d left\n", __func__, con, con->out_skip); 1603 dout("%s %p %d left\n", __func__, con, con->out_skip);
1616 while (con->out_skip > 0) { 1604 while (con->out_skip > 0) {
1617 size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE); 1605 size_t size = min(con->out_skip, (int) PAGE_SIZE);
1618 1606
1619 ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true); 1607 ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true);
1620 if (ret <= 0) 1608 if (ret <= 0)
@@ -2409,11 +2397,7 @@ static int read_partial_message(struct ceph_connection *con)
2409 } 2397 }
2410 2398
2411 /* footer */ 2399 /* footer */
2412 if (need_sign) 2400 size = sizeof_footer(con);
2413 size = sizeof(m->footer);
2414 else
2415 size = sizeof(m->old_footer);
2416
2417 end += size; 2401 end += size;
2418 ret = read_partial(con, end, size, &m->footer); 2402 ret = read_partial(con, end, size, &m->footer);
2419 if (ret <= 0) 2403 if (ret <= 0)
@@ -3089,10 +3073,7 @@ void ceph_msg_revoke(struct ceph_msg *msg)
3089 con->out_skip += con_out_kvec_skip(con); 3073 con->out_skip += con_out_kvec_skip(con);
3090 } else { 3074 } else {
3091 BUG_ON(!msg->data_length); 3075 BUG_ON(!msg->data_length);
3092 if (con->peer_features & CEPH_FEATURE_MSG_AUTH) 3076 con->out_skip += sizeof_footer(con);
3093 con->out_skip += sizeof(msg->footer);
3094 else
3095 con->out_skip += sizeof(msg->old_footer);
3096 } 3077 }
3097 /* data, middle, front */ 3078 /* data, middle, front */
3098 if (msg->data_length) 3079 if (msg->data_length)
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index de85dddc3dc0..cf638c009cfa 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -122,51 +122,91 @@ static void __close_session(struct ceph_mon_client *monc)
122 ceph_msg_revoke(monc->m_subscribe); 122 ceph_msg_revoke(monc->m_subscribe);
123 ceph_msg_revoke_incoming(monc->m_subscribe_ack); 123 ceph_msg_revoke_incoming(monc->m_subscribe_ack);
124 ceph_con_close(&monc->con); 124 ceph_con_close(&monc->con);
125 monc->cur_mon = -1; 125
126 monc->pending_auth = 0; 126 monc->pending_auth = 0;
127 ceph_auth_reset(monc->auth); 127 ceph_auth_reset(monc->auth);
128} 128}
129 129
130/* 130/*
131 * Open a session with a (new) monitor. 131 * Pick a new monitor at random and set cur_mon. If we are repicking
132 * (i.e. cur_mon is already set), be sure to pick a different one.
132 */ 133 */
133static int __open_session(struct ceph_mon_client *monc) 134static void pick_new_mon(struct ceph_mon_client *monc)
134{ 135{
135 char r; 136 int old_mon = monc->cur_mon;
136 int ret;
137 137
138 if (monc->cur_mon < 0) { 138 BUG_ON(monc->monmap->num_mon < 1);
139 get_random_bytes(&r, 1); 139
140 monc->cur_mon = r % monc->monmap->num_mon; 140 if (monc->monmap->num_mon == 1) {
141 dout("open_session num=%d r=%d -> mon%d\n", 141 monc->cur_mon = 0;
142 monc->monmap->num_mon, r, monc->cur_mon);
143 monc->sub_sent = 0;
144 monc->sub_renew_after = jiffies; /* i.e., expired */
145 monc->want_next_osdmap = !!monc->want_next_osdmap;
146
147 dout("open_session mon%d opening\n", monc->cur_mon);
148 ceph_con_open(&monc->con,
149 CEPH_ENTITY_TYPE_MON, monc->cur_mon,
150 &monc->monmap->mon_inst[monc->cur_mon].addr);
151
152 /* send an initial keepalive to ensure our timestamp is
153 * valid by the time we are in an OPENED state */
154 ceph_con_keepalive(&monc->con);
155
156 /* initiatiate authentication handshake */
157 ret = ceph_auth_build_hello(monc->auth,
158 monc->m_auth->front.iov_base,
159 monc->m_auth->front_alloc_len);
160 __send_prepared_auth_request(monc, ret);
161 } else { 142 } else {
162 dout("open_session mon%d already open\n", monc->cur_mon); 143 int max = monc->monmap->num_mon;
144 int o = -1;
145 int n;
146
147 if (monc->cur_mon >= 0) {
148 if (monc->cur_mon < monc->monmap->num_mon)
149 o = monc->cur_mon;
150 if (o >= 0)
151 max--;
152 }
153
154 n = prandom_u32() % max;
155 if (o >= 0 && n >= o)
156 n++;
157
158 monc->cur_mon = n;
163 } 159 }
164 return 0; 160
161 dout("%s mon%d -> mon%d out of %d mons\n", __func__, old_mon,
162 monc->cur_mon, monc->monmap->num_mon);
163}
164
165/*
166 * Open a session with a new monitor.
167 */
168static void __open_session(struct ceph_mon_client *monc)
169{
170 int ret;
171
172 pick_new_mon(monc);
173
174 monc->hunting = true;
175 if (monc->had_a_connection) {
176 monc->hunt_mult *= CEPH_MONC_HUNT_BACKOFF;
177 if (monc->hunt_mult > CEPH_MONC_HUNT_MAX_MULT)
178 monc->hunt_mult = CEPH_MONC_HUNT_MAX_MULT;
179 }
180
181 monc->sub_renew_after = jiffies; /* i.e., expired */
182 monc->sub_renew_sent = 0;
183
184 dout("%s opening mon%d\n", __func__, monc->cur_mon);
185 ceph_con_open(&monc->con, CEPH_ENTITY_TYPE_MON, monc->cur_mon,
186 &monc->monmap->mon_inst[monc->cur_mon].addr);
187
188 /*
189 * send an initial keepalive to ensure our timestamp is valid
190 * by the time we are in an OPENED state
191 */
192 ceph_con_keepalive(&monc->con);
193
194 /* initiate authentication handshake */
195 ret = ceph_auth_build_hello(monc->auth,
196 monc->m_auth->front.iov_base,
197 monc->m_auth->front_alloc_len);
198 BUG_ON(ret <= 0);
199 __send_prepared_auth_request(monc, ret);
165} 200}
166 201
167static bool __sub_expired(struct ceph_mon_client *monc) 202static void reopen_session(struct ceph_mon_client *monc)
168{ 203{
169 return time_after_eq(jiffies, monc->sub_renew_after); 204 if (!monc->hunting)
205 pr_info("mon%d %s session lost, hunting for new mon\n",
206 monc->cur_mon, ceph_pr_addr(&monc->con.peer_addr.in_addr));
207
208 __close_session(monc);
209 __open_session(monc);
170} 210}
171 211
172/* 212/*
@@ -174,74 +214,70 @@ static bool __sub_expired(struct ceph_mon_client *monc)
174 */ 214 */
175static void __schedule_delayed(struct ceph_mon_client *monc) 215static void __schedule_delayed(struct ceph_mon_client *monc)
176{ 216{
177 struct ceph_options *opt = monc->client->options;
178 unsigned long delay; 217 unsigned long delay;
179 218
180 if (monc->cur_mon < 0 || __sub_expired(monc)) { 219 if (monc->hunting)
181 delay = 10 * HZ; 220 delay = CEPH_MONC_HUNT_INTERVAL * monc->hunt_mult;
182 } else { 221 else
183 delay = 20 * HZ; 222 delay = CEPH_MONC_PING_INTERVAL;
184 if (opt->monc_ping_timeout > 0) 223
185 delay = min(delay, opt->monc_ping_timeout / 3);
186 }
187 dout("__schedule_delayed after %lu\n", delay); 224 dout("__schedule_delayed after %lu\n", delay);
188 schedule_delayed_work(&monc->delayed_work, 225 mod_delayed_work(system_wq, &monc->delayed_work,
189 round_jiffies_relative(delay)); 226 round_jiffies_relative(delay));
190} 227}
191 228
229const char *ceph_sub_str[] = {
230 [CEPH_SUB_MDSMAP] = "mdsmap",
231 [CEPH_SUB_MONMAP] = "monmap",
232 [CEPH_SUB_OSDMAP] = "osdmap",
233};
234
192/* 235/*
193 * Send subscribe request for mdsmap and/or osdmap. 236 * Send subscribe request for one or more maps, according to
237 * monc->subs.
194 */ 238 */
195static void __send_subscribe(struct ceph_mon_client *monc) 239static void __send_subscribe(struct ceph_mon_client *monc)
196{ 240{
197 dout("__send_subscribe sub_sent=%u exp=%u want_osd=%d\n", 241 struct ceph_msg *msg = monc->m_subscribe;
198 (unsigned int)monc->sub_sent, __sub_expired(monc), 242 void *p = msg->front.iov_base;
199 monc->want_next_osdmap); 243 void *const end = p + msg->front_alloc_len;
200 if ((__sub_expired(monc) && !monc->sub_sent) || 244 int num = 0;
201 monc->want_next_osdmap == 1) { 245 int i;
202 struct ceph_msg *msg = monc->m_subscribe; 246
203 struct ceph_mon_subscribe_item *i; 247 dout("%s sent %lu\n", __func__, monc->sub_renew_sent);
204 void *p, *end; 248
205 int num; 249 BUG_ON(monc->cur_mon < 0);
206 250
207 p = msg->front.iov_base; 251 if (!monc->sub_renew_sent)
208 end = p + msg->front_alloc_len; 252 monc->sub_renew_sent = jiffies | 1; /* never 0 */
209 253
210 num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap; 254 msg->hdr.version = cpu_to_le16(2);
211 ceph_encode_32(&p, num); 255
212 256 for (i = 0; i < ARRAY_SIZE(monc->subs); i++) {
213 if (monc->want_next_osdmap) { 257 if (monc->subs[i].want)
214 dout("__send_subscribe to 'osdmap' %u\n", 258 num++;
215 (unsigned int)monc->have_osdmap);
216 ceph_encode_string(&p, end, "osdmap", 6);
217 i = p;
218 i->have = cpu_to_le64(monc->have_osdmap);
219 i->onetime = 1;
220 p += sizeof(*i);
221 monc->want_next_osdmap = 2; /* requested */
222 }
223 if (monc->want_mdsmap) {
224 dout("__send_subscribe to 'mdsmap' %u+\n",
225 (unsigned int)monc->have_mdsmap);
226 ceph_encode_string(&p, end, "mdsmap", 6);
227 i = p;
228 i->have = cpu_to_le64(monc->have_mdsmap);
229 i->onetime = 0;
230 p += sizeof(*i);
231 }
232 ceph_encode_string(&p, end, "monmap", 6);
233 i = p;
234 i->have = 0;
235 i->onetime = 0;
236 p += sizeof(*i);
237
238 msg->front.iov_len = p - msg->front.iov_base;
239 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
240 ceph_msg_revoke(msg);
241 ceph_con_send(&monc->con, ceph_msg_get(msg));
242
243 monc->sub_sent = jiffies | 1; /* never 0 */
244 } 259 }
260 BUG_ON(num < 1); /* monmap sub is always there */
261 ceph_encode_32(&p, num);
262 for (i = 0; i < ARRAY_SIZE(monc->subs); i++) {
263 const char *s = ceph_sub_str[i];
264
265 if (!monc->subs[i].want)
266 continue;
267
268 dout("%s %s start %llu flags 0x%x\n", __func__, s,
269 le64_to_cpu(monc->subs[i].item.start),
270 monc->subs[i].item.flags);
271 ceph_encode_string(&p, end, s, strlen(s));
272 memcpy(p, &monc->subs[i].item, sizeof(monc->subs[i].item));
273 p += sizeof(monc->subs[i].item);
274 }
275
276 BUG_ON(p != (end - 35 - (ARRAY_SIZE(monc->subs) - num) * 19));
277 msg->front.iov_len = p - msg->front.iov_base;
278 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
279 ceph_msg_revoke(msg);
280 ceph_con_send(&monc->con, ceph_msg_get(msg));
245} 281}
246 282
247static void handle_subscribe_ack(struct ceph_mon_client *monc, 283static void handle_subscribe_ack(struct ceph_mon_client *monc,
@@ -255,15 +291,16 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc,
255 seconds = le32_to_cpu(h->duration); 291 seconds = le32_to_cpu(h->duration);
256 292
257 mutex_lock(&monc->mutex); 293 mutex_lock(&monc->mutex);
258 if (monc->hunting) { 294 if (monc->sub_renew_sent) {
259 pr_info("mon%d %s session established\n", 295 monc->sub_renew_after = monc->sub_renew_sent +
260 monc->cur_mon, 296 (seconds >> 1) * HZ - 1;
261 ceph_pr_addr(&monc->con.peer_addr.in_addr)); 297 dout("%s sent %lu duration %d renew after %lu\n", __func__,
262 monc->hunting = false; 298 monc->sub_renew_sent, seconds, monc->sub_renew_after);
299 monc->sub_renew_sent = 0;
300 } else {
301 dout("%s sent %lu renew after %lu, ignoring\n", __func__,
302 monc->sub_renew_sent, monc->sub_renew_after);
263 } 303 }
264 dout("handle_subscribe_ack after %d seconds\n", seconds);
265 monc->sub_renew_after = monc->sub_sent + (seconds >> 1)*HZ - 1;
266 monc->sub_sent = 0;
267 mutex_unlock(&monc->mutex); 304 mutex_unlock(&monc->mutex);
268 return; 305 return;
269bad: 306bad:
@@ -272,36 +309,82 @@ bad:
272} 309}
273 310
274/* 311/*
275 * Keep track of which maps we have 312 * Register interest in a map
313 *
314 * @sub: one of CEPH_SUB_*
315 * @epoch: X for "every map since X", or 0 for "just the latest"
276 */ 316 */
277int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got) 317static bool __ceph_monc_want_map(struct ceph_mon_client *monc, int sub,
318 u32 epoch, bool continuous)
319{
320 __le64 start = cpu_to_le64(epoch);
321 u8 flags = !continuous ? CEPH_SUBSCRIBE_ONETIME : 0;
322
323 dout("%s %s epoch %u continuous %d\n", __func__, ceph_sub_str[sub],
324 epoch, continuous);
325
326 if (monc->subs[sub].want &&
327 monc->subs[sub].item.start == start &&
328 monc->subs[sub].item.flags == flags)
329 return false;
330
331 monc->subs[sub].item.start = start;
332 monc->subs[sub].item.flags = flags;
333 monc->subs[sub].want = true;
334
335 return true;
336}
337
338bool ceph_monc_want_map(struct ceph_mon_client *monc, int sub, u32 epoch,
339 bool continuous)
278{ 340{
341 bool need_request;
342
279 mutex_lock(&monc->mutex); 343 mutex_lock(&monc->mutex);
280 monc->have_mdsmap = got; 344 need_request = __ceph_monc_want_map(monc, sub, epoch, continuous);
281 mutex_unlock(&monc->mutex); 345 mutex_unlock(&monc->mutex);
282 return 0; 346
347 return need_request;
283} 348}
284EXPORT_SYMBOL(ceph_monc_got_mdsmap); 349EXPORT_SYMBOL(ceph_monc_want_map);
285 350
286int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got) 351/*
352 * Keep track of which maps we have
353 *
354 * @sub: one of CEPH_SUB_*
355 */
356static void __ceph_monc_got_map(struct ceph_mon_client *monc, int sub,
357 u32 epoch)
358{
359 dout("%s %s epoch %u\n", __func__, ceph_sub_str[sub], epoch);
360
361 if (monc->subs[sub].want) {
362 if (monc->subs[sub].item.flags & CEPH_SUBSCRIBE_ONETIME)
363 monc->subs[sub].want = false;
364 else
365 monc->subs[sub].item.start = cpu_to_le64(epoch + 1);
366 }
367
368 monc->subs[sub].have = epoch;
369}
370
371void ceph_monc_got_map(struct ceph_mon_client *monc, int sub, u32 epoch)
287{ 372{
288 mutex_lock(&monc->mutex); 373 mutex_lock(&monc->mutex);
289 monc->have_osdmap = got; 374 __ceph_monc_got_map(monc, sub, epoch);
290 monc->want_next_osdmap = 0;
291 mutex_unlock(&monc->mutex); 375 mutex_unlock(&monc->mutex);
292 return 0;
293} 376}
377EXPORT_SYMBOL(ceph_monc_got_map);
294 378
295/* 379/*
296 * Register interest in the next osdmap 380 * Register interest in the next osdmap
297 */ 381 */
298void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc) 382void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
299{ 383{
300 dout("request_next_osdmap have %u\n", monc->have_osdmap); 384 dout("%s have %u\n", __func__, monc->subs[CEPH_SUB_OSDMAP].have);
301 mutex_lock(&monc->mutex); 385 mutex_lock(&monc->mutex);
302 if (!monc->want_next_osdmap) 386 if (__ceph_monc_want_map(monc, CEPH_SUB_OSDMAP,
303 monc->want_next_osdmap = 1; 387 monc->subs[CEPH_SUB_OSDMAP].have + 1, false))
304 if (monc->want_next_osdmap < 2)
305 __send_subscribe(monc); 388 __send_subscribe(monc);
306 mutex_unlock(&monc->mutex); 389 mutex_unlock(&monc->mutex);
307} 390}
@@ -320,15 +403,15 @@ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
320 long ret; 403 long ret;
321 404
322 mutex_lock(&monc->mutex); 405 mutex_lock(&monc->mutex);
323 while (monc->have_osdmap < epoch) { 406 while (monc->subs[CEPH_SUB_OSDMAP].have < epoch) {
324 mutex_unlock(&monc->mutex); 407 mutex_unlock(&monc->mutex);
325 408
326 if (timeout && time_after_eq(jiffies, started + timeout)) 409 if (timeout && time_after_eq(jiffies, started + timeout))
327 return -ETIMEDOUT; 410 return -ETIMEDOUT;
328 411
329 ret = wait_event_interruptible_timeout(monc->client->auth_wq, 412 ret = wait_event_interruptible_timeout(monc->client->auth_wq,
330 monc->have_osdmap >= epoch, 413 monc->subs[CEPH_SUB_OSDMAP].have >= epoch,
331 ceph_timeout_jiffies(timeout)); 414 ceph_timeout_jiffies(timeout));
332 if (ret < 0) 415 if (ret < 0)
333 return ret; 416 return ret;
334 417
@@ -341,11 +424,14 @@ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
341EXPORT_SYMBOL(ceph_monc_wait_osdmap); 424EXPORT_SYMBOL(ceph_monc_wait_osdmap);
342 425
343/* 426/*
344 * 427 * Open a session with a random monitor. Request monmap and osdmap,
428 * which are waited upon in __ceph_open_session().
345 */ 429 */
346int ceph_monc_open_session(struct ceph_mon_client *monc) 430int ceph_monc_open_session(struct ceph_mon_client *monc)
347{ 431{
348 mutex_lock(&monc->mutex); 432 mutex_lock(&monc->mutex);
433 __ceph_monc_want_map(monc, CEPH_SUB_MONMAP, 0, true);
434 __ceph_monc_want_map(monc, CEPH_SUB_OSDMAP, 0, false);
349 __open_session(monc); 435 __open_session(monc);
350 __schedule_delayed(monc); 436 __schedule_delayed(monc);
351 mutex_unlock(&monc->mutex); 437 mutex_unlock(&monc->mutex);
@@ -353,29 +439,15 @@ int ceph_monc_open_session(struct ceph_mon_client *monc)
353} 439}
354EXPORT_SYMBOL(ceph_monc_open_session); 440EXPORT_SYMBOL(ceph_monc_open_session);
355 441
356/*
357 * We require the fsid and global_id in order to initialize our
358 * debugfs dir.
359 */
360static bool have_debugfs_info(struct ceph_mon_client *monc)
361{
362 dout("have_debugfs_info fsid %d globalid %lld\n",
363 (int)monc->client->have_fsid, monc->auth->global_id);
364 return monc->client->have_fsid && monc->auth->global_id > 0;
365}
366
367static void ceph_monc_handle_map(struct ceph_mon_client *monc, 442static void ceph_monc_handle_map(struct ceph_mon_client *monc,
368 struct ceph_msg *msg) 443 struct ceph_msg *msg)
369{ 444{
370 struct ceph_client *client = monc->client; 445 struct ceph_client *client = monc->client;
371 struct ceph_monmap *monmap = NULL, *old = monc->monmap; 446 struct ceph_monmap *monmap = NULL, *old = monc->monmap;
372 void *p, *end; 447 void *p, *end;
373 int had_debugfs_info, init_debugfs = 0;
374 448
375 mutex_lock(&monc->mutex); 449 mutex_lock(&monc->mutex);
376 450
377 had_debugfs_info = have_debugfs_info(monc);
378
379 dout("handle_monmap\n"); 451 dout("handle_monmap\n");
380 p = msg->front.iov_base; 452 p = msg->front.iov_base;
381 end = p + msg->front.iov_len; 453 end = p + msg->front.iov_len;
@@ -395,29 +467,11 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
395 client->monc.monmap = monmap; 467 client->monc.monmap = monmap;
396 kfree(old); 468 kfree(old);
397 469
398 if (!client->have_fsid) { 470 __ceph_monc_got_map(monc, CEPH_SUB_MONMAP, monc->monmap->epoch);
399 client->have_fsid = true; 471 client->have_fsid = true;
400 if (!had_debugfs_info && have_debugfs_info(monc)) {
401 pr_info("client%lld fsid %pU\n",
402 ceph_client_id(monc->client),
403 &monc->client->fsid);
404 init_debugfs = 1;
405 }
406 mutex_unlock(&monc->mutex);
407
408 if (init_debugfs) {
409 /*
410 * do debugfs initialization without mutex to avoid
411 * creating a locking dependency
412 */
413 ceph_debugfs_client_init(monc->client);
414 }
415 472
416 goto out_unlocked;
417 }
418out: 473out:
419 mutex_unlock(&monc->mutex); 474 mutex_unlock(&monc->mutex);
420out_unlocked:
421 wake_up_all(&client->auth_wq); 475 wake_up_all(&client->auth_wq);
422} 476}
423 477
@@ -745,18 +799,15 @@ static void delayed_work(struct work_struct *work)
745 dout("monc delayed_work\n"); 799 dout("monc delayed_work\n");
746 mutex_lock(&monc->mutex); 800 mutex_lock(&monc->mutex);
747 if (monc->hunting) { 801 if (monc->hunting) {
748 __close_session(monc); 802 dout("%s continuing hunt\n", __func__);
749 __open_session(monc); /* continue hunting */ 803 reopen_session(monc);
750 } else { 804 } else {
751 struct ceph_options *opt = monc->client->options;
752 int is_auth = ceph_auth_is_authenticated(monc->auth); 805 int is_auth = ceph_auth_is_authenticated(monc->auth);
753 if (ceph_con_keepalive_expired(&monc->con, 806 if (ceph_con_keepalive_expired(&monc->con,
754 opt->monc_ping_timeout)) { 807 CEPH_MONC_PING_TIMEOUT)) {
755 dout("monc keepalive timeout\n"); 808 dout("monc keepalive timeout\n");
756 is_auth = 0; 809 is_auth = 0;
757 __close_session(monc); 810 reopen_session(monc);
758 monc->hunting = true;
759 __open_session(monc);
760 } 811 }
761 812
762 if (!monc->hunting) { 813 if (!monc->hunting) {
@@ -764,8 +815,14 @@ static void delayed_work(struct work_struct *work)
764 __validate_auth(monc); 815 __validate_auth(monc);
765 } 816 }
766 817
767 if (is_auth) 818 if (is_auth) {
768 __send_subscribe(monc); 819 unsigned long now = jiffies;
820
821 dout("%s renew subs? now %lu renew after %lu\n",
822 __func__, now, monc->sub_renew_after);
823 if (time_after_eq(now, monc->sub_renew_after))
824 __send_subscribe(monc);
825 }
769 } 826 }
770 __schedule_delayed(monc); 827 __schedule_delayed(monc);
771 mutex_unlock(&monc->mutex); 828 mutex_unlock(&monc->mutex);
@@ -852,18 +909,14 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
852 &monc->client->msgr); 909 &monc->client->msgr);
853 910
854 monc->cur_mon = -1; 911 monc->cur_mon = -1;
855 monc->hunting = true; 912 monc->had_a_connection = false;
856 monc->sub_renew_after = jiffies; 913 monc->hunt_mult = 1;
857 monc->sub_sent = 0;
858 914
859 INIT_DELAYED_WORK(&monc->delayed_work, delayed_work); 915 INIT_DELAYED_WORK(&monc->delayed_work, delayed_work);
860 monc->generic_request_tree = RB_ROOT; 916 monc->generic_request_tree = RB_ROOT;
861 monc->num_generic_requests = 0; 917 monc->num_generic_requests = 0;
862 monc->last_tid = 0; 918 monc->last_tid = 0;
863 919
864 monc->have_mdsmap = 0;
865 monc->have_osdmap = 0;
866 monc->want_next_osdmap = 1;
867 return 0; 920 return 0;
868 921
869out_auth_reply: 922out_auth_reply:
@@ -888,7 +941,7 @@ void ceph_monc_stop(struct ceph_mon_client *monc)
888 941
889 mutex_lock(&monc->mutex); 942 mutex_lock(&monc->mutex);
890 __close_session(monc); 943 __close_session(monc);
891 944 monc->cur_mon = -1;
892 mutex_unlock(&monc->mutex); 945 mutex_unlock(&monc->mutex);
893 946
894 /* 947 /*
@@ -910,26 +963,40 @@ void ceph_monc_stop(struct ceph_mon_client *monc)
910} 963}
911EXPORT_SYMBOL(ceph_monc_stop); 964EXPORT_SYMBOL(ceph_monc_stop);
912 965
966static void finish_hunting(struct ceph_mon_client *monc)
967{
968 if (monc->hunting) {
969 dout("%s found mon%d\n", __func__, monc->cur_mon);
970 monc->hunting = false;
971 monc->had_a_connection = true;
972 monc->hunt_mult /= 2; /* reduce by 50% */
973 if (monc->hunt_mult < 1)
974 monc->hunt_mult = 1;
975 }
976}
977
913static void handle_auth_reply(struct ceph_mon_client *monc, 978static void handle_auth_reply(struct ceph_mon_client *monc,
914 struct ceph_msg *msg) 979 struct ceph_msg *msg)
915{ 980{
916 int ret; 981 int ret;
917 int was_auth = 0; 982 int was_auth = 0;
918 int had_debugfs_info, init_debugfs = 0;
919 983
920 mutex_lock(&monc->mutex); 984 mutex_lock(&monc->mutex);
921 had_debugfs_info = have_debugfs_info(monc);
922 was_auth = ceph_auth_is_authenticated(monc->auth); 985 was_auth = ceph_auth_is_authenticated(monc->auth);
923 monc->pending_auth = 0; 986 monc->pending_auth = 0;
924 ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base, 987 ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
925 msg->front.iov_len, 988 msg->front.iov_len,
926 monc->m_auth->front.iov_base, 989 monc->m_auth->front.iov_base,
927 monc->m_auth->front_alloc_len); 990 monc->m_auth->front_alloc_len);
991 if (ret > 0) {
992 __send_prepared_auth_request(monc, ret);
993 goto out;
994 }
995
996 finish_hunting(monc);
997
928 if (ret < 0) { 998 if (ret < 0) {
929 monc->client->auth_err = ret; 999 monc->client->auth_err = ret;
930 wake_up_all(&monc->client->auth_wq);
931 } else if (ret > 0) {
932 __send_prepared_auth_request(monc, ret);
933 } else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) { 1000 } else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
934 dout("authenticated, starting session\n"); 1001 dout("authenticated, starting session\n");
935 1002
@@ -939,23 +1006,15 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
939 1006
940 __send_subscribe(monc); 1007 __send_subscribe(monc);
941 __resend_generic_request(monc); 1008 __resend_generic_request(monc);
942 }
943 1009
944 if (!had_debugfs_info && have_debugfs_info(monc)) { 1010 pr_info("mon%d %s session established\n", monc->cur_mon,
945 pr_info("client%lld fsid %pU\n", 1011 ceph_pr_addr(&monc->con.peer_addr.in_addr));
946 ceph_client_id(monc->client),
947 &monc->client->fsid);
948 init_debugfs = 1;
949 } 1012 }
950 mutex_unlock(&monc->mutex);
951 1013
952 if (init_debugfs) { 1014out:
953 /* 1015 mutex_unlock(&monc->mutex);
954 * do debugfs initialization without mutex to avoid 1016 if (monc->client->auth_err < 0)
955 * creating a locking dependency 1017 wake_up_all(&monc->client->auth_wq);
956 */
957 ceph_debugfs_client_init(monc->client);
958 }
959} 1018}
960 1019
961static int __validate_auth(struct ceph_mon_client *monc) 1020static int __validate_auth(struct ceph_mon_client *monc)
@@ -1096,29 +1155,17 @@ static void mon_fault(struct ceph_connection *con)
1096{ 1155{
1097 struct ceph_mon_client *monc = con->private; 1156 struct ceph_mon_client *monc = con->private;
1098 1157
1099 if (!monc)
1100 return;
1101
1102 dout("mon_fault\n");
1103 mutex_lock(&monc->mutex); 1158 mutex_lock(&monc->mutex);
1104 if (!con->private) 1159 dout("%s mon%d\n", __func__, monc->cur_mon);
1105 goto out; 1160 if (monc->cur_mon >= 0) {
1106 1161 if (!monc->hunting) {
1107 if (!monc->hunting) 1162 dout("%s hunting for new mon\n", __func__);
1108 pr_info("mon%d %s session lost, " 1163 reopen_session(monc);
1109 "hunting for new mon\n", monc->cur_mon, 1164 __schedule_delayed(monc);
1110 ceph_pr_addr(&monc->con.peer_addr.in_addr)); 1165 } else {
1111 1166 dout("%s already hunting\n", __func__);
1112 __close_session(monc); 1167 }
1113 if (!monc->hunting) {
1114 /* start hunting */
1115 monc->hunting = true;
1116 __open_session(monc);
1117 } else {
1118 /* already hunting, let's wait a bit */
1119 __schedule_delayed(monc);
1120 } 1168 }
1121out:
1122 mutex_unlock(&monc->mutex); 1169 mutex_unlock(&monc->mutex);
1123} 1170}
1124 1171
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 5bc053778fed..40a53a70efdf 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -338,9 +338,10 @@ static void ceph_osdc_release_request(struct kref *kref)
338 ceph_put_snap_context(req->r_snapc); 338 ceph_put_snap_context(req->r_snapc);
339 if (req->r_mempool) 339 if (req->r_mempool)
340 mempool_free(req, req->r_osdc->req_mempool); 340 mempool_free(req, req->r_osdc->req_mempool);
341 else 341 else if (req->r_num_ops <= CEPH_OSD_SLAB_OPS)
342 kmem_cache_free(ceph_osd_request_cache, req); 342 kmem_cache_free(ceph_osd_request_cache, req);
343 343 else
344 kfree(req);
344} 345}
345 346
346void ceph_osdc_get_request(struct ceph_osd_request *req) 347void ceph_osdc_get_request(struct ceph_osd_request *req)
@@ -369,28 +370,22 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
369 struct ceph_msg *msg; 370 struct ceph_msg *msg;
370 size_t msg_size; 371 size_t msg_size;
371 372
372 BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX);
373 BUG_ON(num_ops > CEPH_OSD_MAX_OP);
374
375 msg_size = 4 + 4 + 8 + 8 + 4+8;
376 msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
377 msg_size += 1 + 8 + 4 + 4; /* pg_t */
378 msg_size += 4 + CEPH_MAX_OID_NAME_LEN; /* oid */
379 msg_size += 2 + num_ops*sizeof(struct ceph_osd_op);
380 msg_size += 8; /* snapid */
381 msg_size += 8; /* snap_seq */
382 msg_size += 8 * (snapc ? snapc->num_snaps : 0); /* snaps */
383 msg_size += 4;
384
385 if (use_mempool) { 373 if (use_mempool) {
374 BUG_ON(num_ops > CEPH_OSD_SLAB_OPS);
386 req = mempool_alloc(osdc->req_mempool, gfp_flags); 375 req = mempool_alloc(osdc->req_mempool, gfp_flags);
387 memset(req, 0, sizeof(*req)); 376 } else if (num_ops <= CEPH_OSD_SLAB_OPS) {
377 req = kmem_cache_alloc(ceph_osd_request_cache, gfp_flags);
388 } else { 378 } else {
389 req = kmem_cache_zalloc(ceph_osd_request_cache, gfp_flags); 379 BUG_ON(num_ops > CEPH_OSD_MAX_OPS);
380 req = kmalloc(sizeof(*req) + num_ops * sizeof(req->r_ops[0]),
381 gfp_flags);
390 } 382 }
391 if (req == NULL) 383 if (unlikely(!req))
392 return NULL; 384 return NULL;
393 385
386 /* req only, each op is zeroed in _osd_req_op_init() */
387 memset(req, 0, sizeof(*req));
388
394 req->r_osdc = osdc; 389 req->r_osdc = osdc;
395 req->r_mempool = use_mempool; 390 req->r_mempool = use_mempool;
396 req->r_num_ops = num_ops; 391 req->r_num_ops = num_ops;
@@ -408,18 +403,36 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
408 req->r_base_oloc.pool = -1; 403 req->r_base_oloc.pool = -1;
409 req->r_target_oloc.pool = -1; 404 req->r_target_oloc.pool = -1;
410 405
406 msg_size = OSD_OPREPLY_FRONT_LEN;
407 if (num_ops > CEPH_OSD_SLAB_OPS) {
408 /* ceph_osd_op and rval */
409 msg_size += (num_ops - CEPH_OSD_SLAB_OPS) *
410 (sizeof(struct ceph_osd_op) + 4);
411 }
412
411 /* create reply message */ 413 /* create reply message */
412 if (use_mempool) 414 if (use_mempool)
413 msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); 415 msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0);
414 else 416 else
415 msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, 417 msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, msg_size,
416 OSD_OPREPLY_FRONT_LEN, gfp_flags, true); 418 gfp_flags, true);
417 if (!msg) { 419 if (!msg) {
418 ceph_osdc_put_request(req); 420 ceph_osdc_put_request(req);
419 return NULL; 421 return NULL;
420 } 422 }
421 req->r_reply = msg; 423 req->r_reply = msg;
422 424
425 msg_size = 4 + 4 + 4; /* client_inc, osdmap_epoch, flags */
426 msg_size += 4 + 4 + 4 + 8; /* mtime, reassert_version */
427 msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
428 msg_size += 1 + 8 + 4 + 4; /* pgid */
429 msg_size += 4 + CEPH_MAX_OID_NAME_LEN; /* oid */
430 msg_size += 2 + num_ops * sizeof(struct ceph_osd_op);
431 msg_size += 8; /* snapid */
432 msg_size += 8; /* snap_seq */
433 msg_size += 4 + 8 * (snapc ? snapc->num_snaps : 0); /* snaps */
434 msg_size += 4; /* retry_attempt */
435
423 /* create request message; allow space for oid */ 436 /* create request message; allow space for oid */
424 if (use_mempool) 437 if (use_mempool)
425 msg = ceph_msgpool_get(&osdc->msgpool_op, 0); 438 msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
@@ -498,7 +511,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
498 if (opcode == CEPH_OSD_OP_WRITE || opcode == CEPH_OSD_OP_WRITEFULL) 511 if (opcode == CEPH_OSD_OP_WRITE || opcode == CEPH_OSD_OP_WRITEFULL)
499 payload_len += length; 512 payload_len += length;
500 513
501 op->payload_len = payload_len; 514 op->indata_len = payload_len;
502} 515}
503EXPORT_SYMBOL(osd_req_op_extent_init); 516EXPORT_SYMBOL(osd_req_op_extent_init);
504 517
@@ -517,10 +530,32 @@ void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
517 BUG_ON(length > previous); 530 BUG_ON(length > previous);
518 531
519 op->extent.length = length; 532 op->extent.length = length;
520 op->payload_len -= previous - length; 533 op->indata_len -= previous - length;
521} 534}
522EXPORT_SYMBOL(osd_req_op_extent_update); 535EXPORT_SYMBOL(osd_req_op_extent_update);
523 536
537void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
538 unsigned int which, u64 offset_inc)
539{
540 struct ceph_osd_req_op *op, *prev_op;
541
542 BUG_ON(which + 1 >= osd_req->r_num_ops);
543
544 prev_op = &osd_req->r_ops[which];
545 op = _osd_req_op_init(osd_req, which + 1, prev_op->op, prev_op->flags);
546 /* dup previous one */
547 op->indata_len = prev_op->indata_len;
548 op->outdata_len = prev_op->outdata_len;
549 op->extent = prev_op->extent;
550 /* adjust offset */
551 op->extent.offset += offset_inc;
552 op->extent.length -= offset_inc;
553
554 if (op->op == CEPH_OSD_OP_WRITE || op->op == CEPH_OSD_OP_WRITEFULL)
555 op->indata_len -= offset_inc;
556}
557EXPORT_SYMBOL(osd_req_op_extent_dup_last);
558
524void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, 559void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
525 u16 opcode, const char *class, const char *method) 560 u16 opcode, const char *class, const char *method)
526{ 561{
@@ -554,7 +589,7 @@ void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
554 589
555 op->cls.argc = 0; /* currently unused */ 590 op->cls.argc = 0; /* currently unused */
556 591
557 op->payload_len = payload_len; 592 op->indata_len = payload_len;
558} 593}
559EXPORT_SYMBOL(osd_req_op_cls_init); 594EXPORT_SYMBOL(osd_req_op_cls_init);
560 595
@@ -587,7 +622,7 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
587 op->xattr.cmp_mode = cmp_mode; 622 op->xattr.cmp_mode = cmp_mode;
588 623
589 ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist); 624 ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist);
590 op->payload_len = payload_len; 625 op->indata_len = payload_len;
591 return 0; 626 return 0;
592} 627}
593EXPORT_SYMBOL(osd_req_op_xattr_init); 628EXPORT_SYMBOL(osd_req_op_xattr_init);
@@ -707,7 +742,7 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
707 BUG_ON(osd_data->type == CEPH_OSD_DATA_TYPE_NONE); 742 BUG_ON(osd_data->type == CEPH_OSD_DATA_TYPE_NONE);
708 dst->cls.indata_len = cpu_to_le32(data_length); 743 dst->cls.indata_len = cpu_to_le32(data_length);
709 ceph_osdc_msg_data_add(req->r_request, osd_data); 744 ceph_osdc_msg_data_add(req->r_request, osd_data);
710 src->payload_len += data_length; 745 src->indata_len += data_length;
711 request_data_len += data_length; 746 request_data_len += data_length;
712 } 747 }
713 osd_data = &src->cls.response_data; 748 osd_data = &src->cls.response_data;
@@ -750,7 +785,7 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
750 785
751 dst->op = cpu_to_le16(src->op); 786 dst->op = cpu_to_le16(src->op);
752 dst->flags = cpu_to_le32(src->flags); 787 dst->flags = cpu_to_le32(src->flags);
753 dst->payload_len = cpu_to_le32(src->payload_len); 788 dst->payload_len = cpu_to_le32(src->indata_len);
754 789
755 return request_data_len; 790 return request_data_len;
756} 791}
@@ -1052,10 +1087,8 @@ static void put_osd(struct ceph_osd *osd)
1052 dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref), 1087 dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref),
1053 atomic_read(&osd->o_ref) - 1); 1088 atomic_read(&osd->o_ref) - 1);
1054 if (atomic_dec_and_test(&osd->o_ref)) { 1089 if (atomic_dec_and_test(&osd->o_ref)) {
1055 struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
1056
1057 if (osd->o_auth.authorizer) 1090 if (osd->o_auth.authorizer)
1058 ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer); 1091 ceph_auth_destroy_authorizer(osd->o_auth.authorizer);
1059 kfree(osd); 1092 kfree(osd);
1060 } 1093 }
1061} 1094}
@@ -1810,7 +1843,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1810 1843
1811 ceph_decode_need(&p, end, 4, bad_put); 1844 ceph_decode_need(&p, end, 4, bad_put);
1812 numops = ceph_decode_32(&p); 1845 numops = ceph_decode_32(&p);
1813 if (numops > CEPH_OSD_MAX_OP) 1846 if (numops > CEPH_OSD_MAX_OPS)
1814 goto bad_put; 1847 goto bad_put;
1815 if (numops != req->r_num_ops) 1848 if (numops != req->r_num_ops)
1816 goto bad_put; 1849 goto bad_put;
@@ -1821,7 +1854,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1821 int len; 1854 int len;
1822 1855
1823 len = le32_to_cpu(op->payload_len); 1856 len = le32_to_cpu(op->payload_len);
1824 req->r_reply_op_len[i] = len; 1857 req->r_ops[i].outdata_len = len;
1825 dout(" op %d has %d bytes\n", i, len); 1858 dout(" op %d has %d bytes\n", i, len);
1826 payload_len += len; 1859 payload_len += len;
1827 p += sizeof(*op); 1860 p += sizeof(*op);
@@ -1836,7 +1869,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1836 ceph_decode_need(&p, end, 4 + numops * 4, bad_put); 1869 ceph_decode_need(&p, end, 4 + numops * 4, bad_put);
1837 retry_attempt = ceph_decode_32(&p); 1870 retry_attempt = ceph_decode_32(&p);
1838 for (i = 0; i < numops; i++) 1871 for (i = 0; i < numops; i++)
1839 req->r_reply_op_result[i] = ceph_decode_32(&p); 1872 req->r_ops[i].rval = ceph_decode_32(&p);
1840 1873
1841 if (le16_to_cpu(msg->hdr.version) >= 6) { 1874 if (le16_to_cpu(msg->hdr.version) >= 6) {
1842 p += 8 + 4; /* skip replay_version */ 1875 p += 8 + 4; /* skip replay_version */
@@ -2187,7 +2220,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
2187 goto bad; 2220 goto bad;
2188done: 2221done:
2189 downgrade_write(&osdc->map_sem); 2222 downgrade_write(&osdc->map_sem);
2190 ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch); 2223 ceph_monc_got_map(&osdc->client->monc, CEPH_SUB_OSDMAP,
2224 osdc->osdmap->epoch);
2191 2225
2192 /* 2226 /*
2193 * subscribe to subsequent osdmap updates if full to ensure 2227 * subscribe to subsequent osdmap updates if full to ensure
@@ -2646,8 +2680,8 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
2646 round_jiffies_relative(osdc->client->options->osd_idle_ttl)); 2680 round_jiffies_relative(osdc->client->options->osd_idle_ttl));
2647 2681
2648 err = -ENOMEM; 2682 err = -ENOMEM;
2649 osdc->req_mempool = mempool_create_kmalloc_pool(10, 2683 osdc->req_mempool = mempool_create_slab_pool(10,
2650 sizeof(struct ceph_osd_request)); 2684 ceph_osd_request_cache);
2651 if (!osdc->req_mempool) 2685 if (!osdc->req_mempool)
2652 goto out; 2686 goto out;
2653 2687
@@ -2782,11 +2816,12 @@ EXPORT_SYMBOL(ceph_osdc_writepages);
2782 2816
2783int ceph_osdc_setup(void) 2817int ceph_osdc_setup(void)
2784{ 2818{
2819 size_t size = sizeof(struct ceph_osd_request) +
2820 CEPH_OSD_SLAB_OPS * sizeof(struct ceph_osd_req_op);
2821
2785 BUG_ON(ceph_osd_request_cache); 2822 BUG_ON(ceph_osd_request_cache);
2786 ceph_osd_request_cache = kmem_cache_create("ceph_osd_request", 2823 ceph_osd_request_cache = kmem_cache_create("ceph_osd_request", size,
2787 sizeof (struct ceph_osd_request), 2824 0, 0, NULL);
2788 __alignof__(struct ceph_osd_request),
2789 0, NULL);
2790 2825
2791 return ceph_osd_request_cache ? 0 : -ENOMEM; 2826 return ceph_osd_request_cache ? 0 : -ENOMEM;
2792} 2827}
@@ -2947,7 +2982,7 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
2947 struct ceph_auth_handshake *auth = &o->o_auth; 2982 struct ceph_auth_handshake *auth = &o->o_auth;
2948 2983
2949 if (force_new && auth->authorizer) { 2984 if (force_new && auth->authorizer) {
2950 ceph_auth_destroy_authorizer(ac, auth->authorizer); 2985 ceph_auth_destroy_authorizer(auth->authorizer);
2951 auth->authorizer = NULL; 2986 auth->authorizer = NULL;
2952 } 2987 }
2953 if (!auth->authorizer) { 2988 if (!auth->authorizer) {
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index c7c220a736e5..6864007e64fc 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -56,7 +56,7 @@ int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len)
56 size_t bit = pl->room; 56 size_t bit = pl->room;
57 int ret; 57 int ret;
58 58
59 memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), 59 memcpy(pl->mapped_tail + (pl->length & ~PAGE_MASK),
60 buf, bit); 60 buf, bit);
61 pl->length += bit; 61 pl->length += bit;
62 pl->room -= bit; 62 pl->room -= bit;
@@ -67,7 +67,7 @@ int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len)
67 return ret; 67 return ret;
68 } 68 }
69 69
70 memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len); 70 memcpy(pl->mapped_tail + (pl->length & ~PAGE_MASK), buf, len);
71 pl->length += len; 71 pl->length += len;
72 pl->room -= len; 72 pl->room -= len;
73 return 0; 73 return 0;
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index d4f5f220a8e5..00d2601407c5 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -24,7 +24,7 @@ struct page **ceph_get_direct_page_vector(const void __user *data,
24 return ERR_PTR(-ENOMEM); 24 return ERR_PTR(-ENOMEM);
25 25
26 while (got < num_pages) { 26 while (got < num_pages) {
27 rc = get_user_pages_unlocked(current, current->mm, 27 rc = get_user_pages_unlocked(
28 (unsigned long)data + ((unsigned long)got * PAGE_SIZE), 28 (unsigned long)data + ((unsigned long)got * PAGE_SIZE),
29 num_pages - got, write_page, 0, pages + got); 29 num_pages - got, write_page, 0, pages + got);
30 if (rc < 0) 30 if (rc < 0)
@@ -95,19 +95,19 @@ int ceph_copy_user_to_page_vector(struct page **pages,
95 loff_t off, size_t len) 95 loff_t off, size_t len)
96{ 96{
97 int i = 0; 97 int i = 0;
98 int po = off & ~PAGE_CACHE_MASK; 98 int po = off & ~PAGE_MASK;
99 int left = len; 99 int left = len;
100 int l, bad; 100 int l, bad;
101 101
102 while (left > 0) { 102 while (left > 0) {
103 l = min_t(int, PAGE_CACHE_SIZE-po, left); 103 l = min_t(int, PAGE_SIZE-po, left);
104 bad = copy_from_user(page_address(pages[i]) + po, data, l); 104 bad = copy_from_user(page_address(pages[i]) + po, data, l);
105 if (bad == l) 105 if (bad == l)
106 return -EFAULT; 106 return -EFAULT;
107 data += l - bad; 107 data += l - bad;
108 left -= l - bad; 108 left -= l - bad;
109 po += l - bad; 109 po += l - bad;
110 if (po == PAGE_CACHE_SIZE) { 110 if (po == PAGE_SIZE) {
111 po = 0; 111 po = 0;
112 i++; 112 i++;
113 } 113 }
@@ -121,17 +121,17 @@ void ceph_copy_to_page_vector(struct page **pages,
121 loff_t off, size_t len) 121 loff_t off, size_t len)
122{ 122{
123 int i = 0; 123 int i = 0;
124 size_t po = off & ~PAGE_CACHE_MASK; 124 size_t po = off & ~PAGE_MASK;
125 size_t left = len; 125 size_t left = len;
126 126
127 while (left > 0) { 127 while (left > 0) {
128 size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left); 128 size_t l = min_t(size_t, PAGE_SIZE-po, left);
129 129
130 memcpy(page_address(pages[i]) + po, data, l); 130 memcpy(page_address(pages[i]) + po, data, l);
131 data += l; 131 data += l;
132 left -= l; 132 left -= l;
133 po += l; 133 po += l;
134 if (po == PAGE_CACHE_SIZE) { 134 if (po == PAGE_SIZE) {
135 po = 0; 135 po = 0;
136 i++; 136 i++;
137 } 137 }
@@ -144,17 +144,17 @@ void ceph_copy_from_page_vector(struct page **pages,
144 loff_t off, size_t len) 144 loff_t off, size_t len)
145{ 145{
146 int i = 0; 146 int i = 0;
147 size_t po = off & ~PAGE_CACHE_MASK; 147 size_t po = off & ~PAGE_MASK;
148 size_t left = len; 148 size_t left = len;
149 149
150 while (left > 0) { 150 while (left > 0) {
151 size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left); 151 size_t l = min_t(size_t, PAGE_SIZE-po, left);
152 152
153 memcpy(data, page_address(pages[i]) + po, l); 153 memcpy(data, page_address(pages[i]) + po, l);
154 data += l; 154 data += l;
155 left -= l; 155 left -= l;
156 po += l; 156 po += l;
157 if (po == PAGE_CACHE_SIZE) { 157 if (po == PAGE_SIZE) {
158 po = 0; 158 po = 0;
159 i++; 159 i++;
160 } 160 }
@@ -168,25 +168,25 @@ EXPORT_SYMBOL(ceph_copy_from_page_vector);
168 */ 168 */
169void ceph_zero_page_vector_range(int off, int len, struct page **pages) 169void ceph_zero_page_vector_range(int off, int len, struct page **pages)
170{ 170{
171 int i = off >> PAGE_CACHE_SHIFT; 171 int i = off >> PAGE_SHIFT;
172 172
173 off &= ~PAGE_CACHE_MASK; 173 off &= ~PAGE_MASK;
174 174
175 dout("zero_page_vector_page %u~%u\n", off, len); 175 dout("zero_page_vector_page %u~%u\n", off, len);
176 176
177 /* leading partial page? */ 177 /* leading partial page? */
178 if (off) { 178 if (off) {
179 int end = min((int)PAGE_CACHE_SIZE, off + len); 179 int end = min((int)PAGE_SIZE, off + len);
180 dout("zeroing %d %p head from %d\n", i, pages[i], 180 dout("zeroing %d %p head from %d\n", i, pages[i],
181 (int)off); 181 (int)off);
182 zero_user_segment(pages[i], off, end); 182 zero_user_segment(pages[i], off, end);
183 len -= (end - off); 183 len -= (end - off);
184 i++; 184 i++;
185 } 185 }
186 while (len >= PAGE_CACHE_SIZE) { 186 while (len >= PAGE_SIZE) {
187 dout("zeroing %d %p len=%d\n", i, pages[i], len); 187 dout("zeroing %d %p len=%d\n", i, pages[i], len);
188 zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); 188 zero_user_segment(pages[i], 0, PAGE_SIZE);
189 len -= PAGE_CACHE_SIZE; 189 len -= PAGE_SIZE;
190 i++; 190 i++;
191 } 191 }
192 /* trailing partial page? */ 192 /* trailing partial page? */
diff --git a/net/core/Makefile b/net/core/Makefile
index 0b835de04de3..d6508c2ddca5 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -24,3 +24,6 @@ obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
24obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o 24obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
25obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o 25obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
26obj-$(CONFIG_LWTUNNEL) += lwtunnel.o 26obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
27obj-$(CONFIG_DST_CACHE) += dst_cache.o
28obj-$(CONFIG_HWBM) += hwbm.o
29obj-$(CONFIG_NET_DEVLINK) += devlink.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 0ef061b2badc..5c925ac50b95 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2802,7 +2802,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
2802 2802
2803 if (skb->ip_summed != CHECKSUM_NONE && 2803 if (skb->ip_summed != CHECKSUM_NONE &&
2804 !can_checksum_protocol(features, type)) { 2804 !can_checksum_protocol(features, type)) {
2805 features &= ~NETIF_F_CSUM_MASK; 2805 features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
2806 } else if (illegal_highdma(skb->dev, skb)) { 2806 } else if (illegal_highdma(skb->dev, skb)) {
2807 features &= ~NETIF_F_SG; 2807 features &= ~NETIF_F_SG;
2808 } 2808 }
@@ -3829,8 +3829,14 @@ static void net_tx_action(struct softirq_action *h)
3829 trace_consume_skb(skb); 3829 trace_consume_skb(skb);
3830 else 3830 else
3831 trace_kfree_skb(skb, net_tx_action); 3831 trace_kfree_skb(skb, net_tx_action);
3832 __kfree_skb(skb); 3832
3833 if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
3834 __kfree_skb(skb);
3835 else
3836 __kfree_skb_defer(skb);
3833 } 3837 }
3838
3839 __kfree_skb_flush();
3834 } 3840 }
3835 3841
3836 if (sd->output_queue) { 3842 if (sd->output_queue) {
@@ -4154,7 +4160,10 @@ ncls:
4154 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 4160 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
4155 } else { 4161 } else {
4156drop: 4162drop:
4157 atomic_long_inc(&skb->dev->rx_dropped); 4163 if (!deliver_exact)
4164 atomic_long_inc(&skb->dev->rx_dropped);
4165 else
4166 atomic_long_inc(&skb->dev->rx_nohandler);
4158 kfree_skb(skb); 4167 kfree_skb(skb);
4159 /* Jamal, now you will not able to escape explaining 4168 /* Jamal, now you will not able to escape explaining
4160 * me how you were going to use this. :-) 4169 * me how you were going to use this. :-)
@@ -4429,7 +4438,8 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
4429 NAPI_GRO_CB(skb)->same_flow = 0; 4438 NAPI_GRO_CB(skb)->same_flow = 0;
4430 NAPI_GRO_CB(skb)->flush = 0; 4439 NAPI_GRO_CB(skb)->flush = 0;
4431 NAPI_GRO_CB(skb)->free = 0; 4440 NAPI_GRO_CB(skb)->free = 0;
4432 NAPI_GRO_CB(skb)->udp_mark = 0; 4441 NAPI_GRO_CB(skb)->encap_mark = 0;
4442 NAPI_GRO_CB(skb)->is_fou = 0;
4433 NAPI_GRO_CB(skb)->gro_remcsum_start = 0; 4443 NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
4434 4444
4435 /* Setup for GRO checksum validation */ 4445 /* Setup for GRO checksum validation */
@@ -5152,6 +5162,7 @@ static void net_rx_action(struct softirq_action *h)
5152 } 5162 }
5153 } 5163 }
5154 5164
5165 __kfree_skb_flush();
5155 local_irq_disable(); 5166 local_irq_disable();
5156 5167
5157 list_splice_tail_init(&sd->poll_list, &list); 5168 list_splice_tail_init(&sd->poll_list, &list);
@@ -6435,6 +6446,7 @@ EXPORT_SYMBOL(dev_get_phys_port_id);
6435 * dev_get_phys_port_name - Get device physical port name 6446 * dev_get_phys_port_name - Get device physical port name
6436 * @dev: device 6447 * @dev: device
6437 * @name: port name 6448 * @name: port name
6449 * @len: limit of bytes to copy to name
6438 * 6450 *
6439 * Get device physical port name 6451 * Get device physical port name
6440 */ 6452 */
@@ -7253,24 +7265,31 @@ void netdev_run_todo(void)
7253 } 7265 }
7254} 7266}
7255 7267
7256/* Convert net_device_stats to rtnl_link_stats64. They have the same 7268/* Convert net_device_stats to rtnl_link_stats64. rtnl_link_stats64 has
7257 * fields in the same order, with only the type differing. 7269 * all the same fields in the same order as net_device_stats, with only
7270 * the type differing, but rtnl_link_stats64 may have additional fields
7271 * at the end for newer counters.
7258 */ 7272 */
7259void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, 7273void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
7260 const struct net_device_stats *netdev_stats) 7274 const struct net_device_stats *netdev_stats)
7261{ 7275{
7262#if BITS_PER_LONG == 64 7276#if BITS_PER_LONG == 64
7263 BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats)); 7277 BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
7264 memcpy(stats64, netdev_stats, sizeof(*stats64)); 7278 memcpy(stats64, netdev_stats, sizeof(*stats64));
7279 /* zero out counters that only exist in rtnl_link_stats64 */
7280 memset((char *)stats64 + sizeof(*netdev_stats), 0,
7281 sizeof(*stats64) - sizeof(*netdev_stats));
7265#else 7282#else
7266 size_t i, n = sizeof(*stats64) / sizeof(u64); 7283 size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long);
7267 const unsigned long *src = (const unsigned long *)netdev_stats; 7284 const unsigned long *src = (const unsigned long *)netdev_stats;
7268 u64 *dst = (u64 *)stats64; 7285 u64 *dst = (u64 *)stats64;
7269 7286
7270 BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) != 7287 BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
7271 sizeof(*stats64) / sizeof(u64));
7272 for (i = 0; i < n; i++) 7288 for (i = 0; i < n; i++)
7273 dst[i] = src[i]; 7289 dst[i] = src[i];
7290 /* zero out counters that only exist in rtnl_link_stats64 */
7291 memset((char *)stats64 + n * sizeof(u64), 0,
7292 sizeof(*stats64) - n * sizeof(u64));
7274#endif 7293#endif
7275} 7294}
7276EXPORT_SYMBOL(netdev_stats_to_stats64); 7295EXPORT_SYMBOL(netdev_stats_to_stats64);
@@ -7300,6 +7319,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
7300 } 7319 }
7301 storage->rx_dropped += atomic_long_read(&dev->rx_dropped); 7320 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
7302 storage->tx_dropped += atomic_long_read(&dev->tx_dropped); 7321 storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
7322 storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler);
7303 return storage; 7323 return storage;
7304} 7324}
7305EXPORT_SYMBOL(dev_get_stats); 7325EXPORT_SYMBOL(dev_get_stats);
diff --git a/net/core/devlink.c b/net/core/devlink.c
new file mode 100644
index 000000000000..590fa561cb7f
--- /dev/null
+++ b/net/core/devlink.c
@@ -0,0 +1,738 @@
1/*
2 * net/core/devlink.c - Network physical/parent device Netlink interface
3 *
4 * Heavily inspired by net/wireless/
5 * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
6 * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 */
13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/types.h>
17#include <linux/slab.h>
18#include <linux/gfp.h>
19#include <linux/device.h>
20#include <linux/list.h>
21#include <linux/netdevice.h>
22#include <rdma/ib_verbs.h>
23#include <net/netlink.h>
24#include <net/genetlink.h>
25#include <net/rtnetlink.h>
26#include <net/net_namespace.h>
27#include <net/sock.h>
28#include <net/devlink.h>
29
30static LIST_HEAD(devlink_list);
31
32/* devlink_mutex
33 *
34 * An overall lock guarding every operation coming from userspace.
35 * It also guards devlink devices list and it is taken when
36 * driver registers/unregisters it.
37 */
38static DEFINE_MUTEX(devlink_mutex);
39
40/* devlink_port_mutex
41 *
42 * Shared lock to guard lists of ports in all devlink devices.
43 */
44static DEFINE_MUTEX(devlink_port_mutex);
45
46static struct net *devlink_net(const struct devlink *devlink)
47{
48 return read_pnet(&devlink->_net);
49}
50
51static void devlink_net_set(struct devlink *devlink, struct net *net)
52{
53 write_pnet(&devlink->_net, net);
54}
55
56static struct devlink *devlink_get_from_attrs(struct net *net,
57 struct nlattr **attrs)
58{
59 struct devlink *devlink;
60 char *busname;
61 char *devname;
62
63 if (!attrs[DEVLINK_ATTR_BUS_NAME] || !attrs[DEVLINK_ATTR_DEV_NAME])
64 return ERR_PTR(-EINVAL);
65
66 busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]);
67 devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]);
68
69 list_for_each_entry(devlink, &devlink_list, list) {
70 if (strcmp(devlink->dev->bus->name, busname) == 0 &&
71 strcmp(dev_name(devlink->dev), devname) == 0 &&
72 net_eq(devlink_net(devlink), net))
73 return devlink;
74 }
75
76 return ERR_PTR(-ENODEV);
77}
78
79static struct devlink *devlink_get_from_info(struct genl_info *info)
80{
81 return devlink_get_from_attrs(genl_info_net(info), info->attrs);
82}
83
84static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
85 int port_index)
86{
87 struct devlink_port *devlink_port;
88
89 list_for_each_entry(devlink_port, &devlink->port_list, list) {
90 if (devlink_port->index == port_index)
91 return devlink_port;
92 }
93 return NULL;
94}
95
96static bool devlink_port_index_exists(struct devlink *devlink, int port_index)
97{
98 return devlink_port_get_by_index(devlink, port_index);
99}
100
101static struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
102 struct nlattr **attrs)
103{
104 if (attrs[DEVLINK_ATTR_PORT_INDEX]) {
105 u32 port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
106 struct devlink_port *devlink_port;
107
108 devlink_port = devlink_port_get_by_index(devlink, port_index);
109 if (!devlink_port)
110 return ERR_PTR(-ENODEV);
111 return devlink_port;
112 }
113 return ERR_PTR(-EINVAL);
114}
115
116static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
117 struct genl_info *info)
118{
119 return devlink_port_get_from_attrs(devlink, info->attrs);
120}
121
122#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
123
124static int devlink_nl_pre_doit(const struct genl_ops *ops,
125 struct sk_buff *skb, struct genl_info *info)
126{
127 struct devlink *devlink;
128
129 mutex_lock(&devlink_mutex);
130 devlink = devlink_get_from_info(info);
131 if (IS_ERR(devlink)) {
132 mutex_unlock(&devlink_mutex);
133 return PTR_ERR(devlink);
134 }
135 info->user_ptr[0] = devlink;
136 if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
137 struct devlink_port *devlink_port;
138
139 mutex_lock(&devlink_port_mutex);
140 devlink_port = devlink_port_get_from_info(devlink, info);
141 if (IS_ERR(devlink_port)) {
142 mutex_unlock(&devlink_port_mutex);
143 mutex_unlock(&devlink_mutex);
144 return PTR_ERR(devlink_port);
145 }
146 info->user_ptr[1] = devlink_port;
147 }
148 return 0;
149}
150
151static void devlink_nl_post_doit(const struct genl_ops *ops,
152 struct sk_buff *skb, struct genl_info *info)
153{
154 if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT)
155 mutex_unlock(&devlink_port_mutex);
156 mutex_unlock(&devlink_mutex);
157}
158
159static struct genl_family devlink_nl_family = {
160 .id = GENL_ID_GENERATE,
161 .name = DEVLINK_GENL_NAME,
162 .version = DEVLINK_GENL_VERSION,
163 .maxattr = DEVLINK_ATTR_MAX,
164 .netnsok = true,
165 .pre_doit = devlink_nl_pre_doit,
166 .post_doit = devlink_nl_post_doit,
167};
168
169enum devlink_multicast_groups {
170 DEVLINK_MCGRP_CONFIG,
171};
172
173static const struct genl_multicast_group devlink_nl_mcgrps[] = {
174 [DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME },
175};
176
177static int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
178{
179 if (nla_put_string(msg, DEVLINK_ATTR_BUS_NAME, devlink->dev->bus->name))
180 return -EMSGSIZE;
181 if (nla_put_string(msg, DEVLINK_ATTR_DEV_NAME, dev_name(devlink->dev)))
182 return -EMSGSIZE;
183 return 0;
184}
185
186static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
187 enum devlink_command cmd, u32 portid,
188 u32 seq, int flags)
189{
190 void *hdr;
191
192 hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
193 if (!hdr)
194 return -EMSGSIZE;
195
196 if (devlink_nl_put_handle(msg, devlink))
197 goto nla_put_failure;
198
199 genlmsg_end(msg, hdr);
200 return 0;
201
202nla_put_failure:
203 genlmsg_cancel(msg, hdr);
204 return -EMSGSIZE;
205}
206
207static void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
208{
209 struct sk_buff *msg;
210 int err;
211
212 WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL);
213
214 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
215 if (!msg)
216 return;
217
218 err = devlink_nl_fill(msg, devlink, cmd, 0, 0, 0);
219 if (err) {
220 nlmsg_free(msg);
221 return;
222 }
223
224 genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
225 msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
226}
227
228static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
229 struct devlink_port *devlink_port,
230 enum devlink_command cmd, u32 portid,
231 u32 seq, int flags)
232{
233 void *hdr;
234
235 hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
236 if (!hdr)
237 return -EMSGSIZE;
238
239 if (devlink_nl_put_handle(msg, devlink))
240 goto nla_put_failure;
241 if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
242 goto nla_put_failure;
243 if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
244 goto nla_put_failure;
245 if (devlink_port->desired_type != DEVLINK_PORT_TYPE_NOTSET &&
246 nla_put_u16(msg, DEVLINK_ATTR_PORT_DESIRED_TYPE,
247 devlink_port->desired_type))
248 goto nla_put_failure;
249 if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) {
250 struct net_device *netdev = devlink_port->type_dev;
251
252 if (netdev &&
253 (nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX,
254 netdev->ifindex) ||
255 nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME,
256 netdev->name)))
257 goto nla_put_failure;
258 }
259 if (devlink_port->type == DEVLINK_PORT_TYPE_IB) {
260 struct ib_device *ibdev = devlink_port->type_dev;
261
262 if (ibdev &&
263 nla_put_string(msg, DEVLINK_ATTR_PORT_IBDEV_NAME,
264 ibdev->name))
265 goto nla_put_failure;
266 }
267 if (devlink_port->split &&
268 nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP,
269 devlink_port->split_group))
270 goto nla_put_failure;
271
272 genlmsg_end(msg, hdr);
273 return 0;
274
275nla_put_failure:
276 genlmsg_cancel(msg, hdr);
277 return -EMSGSIZE;
278}
279
280static void devlink_port_notify(struct devlink_port *devlink_port,
281 enum devlink_command cmd)
282{
283 struct devlink *devlink = devlink_port->devlink;
284 struct sk_buff *msg;
285 int err;
286
287 if (!devlink_port->registered)
288 return;
289
290 WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL);
291
292 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
293 if (!msg)
294 return;
295
296 err = devlink_nl_port_fill(msg, devlink, devlink_port, cmd, 0, 0, 0);
297 if (err) {
298 nlmsg_free(msg);
299 return;
300 }
301
302 genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
303 msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
304}
305
306static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
307{
308 struct devlink *devlink = info->user_ptr[0];
309 struct sk_buff *msg;
310 int err;
311
312 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
313 if (!msg)
314 return -ENOMEM;
315
316 err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
317 info->snd_portid, info->snd_seq, 0);
318 if (err) {
319 nlmsg_free(msg);
320 return err;
321 }
322
323 return genlmsg_reply(msg, info);
324}
325
326static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg,
327 struct netlink_callback *cb)
328{
329 struct devlink *devlink;
330 int start = cb->args[0];
331 int idx = 0;
332 int err;
333
334 mutex_lock(&devlink_mutex);
335 list_for_each_entry(devlink, &devlink_list, list) {
336 if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
337 continue;
338 if (idx < start) {
339 idx++;
340 continue;
341 }
342 err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
343 NETLINK_CB(cb->skb).portid,
344 cb->nlh->nlmsg_seq, NLM_F_MULTI);
345 if (err)
346 goto out;
347 idx++;
348 }
349out:
350 mutex_unlock(&devlink_mutex);
351
352 cb->args[0] = idx;
353 return msg->len;
354}
355
356static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
357 struct genl_info *info)
358{
359 struct devlink *devlink = info->user_ptr[0];
360 struct devlink_port *devlink_port = info->user_ptr[1];
361 struct sk_buff *msg;
362 int err;
363
364 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
365 if (!msg)
366 return -ENOMEM;
367
368 err = devlink_nl_port_fill(msg, devlink, devlink_port,
369 DEVLINK_CMD_PORT_NEW,
370 info->snd_portid, info->snd_seq, 0);
371 if (err) {
372 nlmsg_free(msg);
373 return err;
374 }
375
376 return genlmsg_reply(msg, info);
377}
378
379static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
380 struct netlink_callback *cb)
381{
382 struct devlink *devlink;
383 struct devlink_port *devlink_port;
384 int start = cb->args[0];
385 int idx = 0;
386 int err;
387
388 mutex_lock(&devlink_mutex);
389 mutex_lock(&devlink_port_mutex);
390 list_for_each_entry(devlink, &devlink_list, list) {
391 if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
392 continue;
393 list_for_each_entry(devlink_port, &devlink->port_list, list) {
394 if (idx < start) {
395 idx++;
396 continue;
397 }
398 err = devlink_nl_port_fill(msg, devlink, devlink_port,
399 DEVLINK_CMD_NEW,
400 NETLINK_CB(cb->skb).portid,
401 cb->nlh->nlmsg_seq,
402 NLM_F_MULTI);
403 if (err)
404 goto out;
405 idx++;
406 }
407 }
408out:
409 mutex_unlock(&devlink_port_mutex);
410 mutex_unlock(&devlink_mutex);
411
412 cb->args[0] = idx;
413 return msg->len;
414}
415
416static int devlink_port_type_set(struct devlink *devlink,
417 struct devlink_port *devlink_port,
418 enum devlink_port_type port_type)
419
420{
421 int err;
422
423 if (devlink->ops && devlink->ops->port_type_set) {
424 if (port_type == DEVLINK_PORT_TYPE_NOTSET)
425 return -EINVAL;
426 err = devlink->ops->port_type_set(devlink_port, port_type);
427 if (err)
428 return err;
429 devlink_port->desired_type = port_type;
430 devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
431 return 0;
432 }
433 return -EOPNOTSUPP;
434}
435
436static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
437 struct genl_info *info)
438{
439 struct devlink *devlink = info->user_ptr[0];
440 struct devlink_port *devlink_port = info->user_ptr[1];
441 int err;
442
443 if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) {
444 enum devlink_port_type port_type;
445
446 port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]);
447 err = devlink_port_type_set(devlink, devlink_port, port_type);
448 if (err)
449 return err;
450 }
451 return 0;
452}
453
454static int devlink_port_split(struct devlink *devlink,
455 u32 port_index, u32 count)
456
457{
458 if (devlink->ops && devlink->ops->port_split)
459 return devlink->ops->port_split(devlink, port_index, count);
460 return -EOPNOTSUPP;
461}
462
463static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
464 struct genl_info *info)
465{
466 struct devlink *devlink = info->user_ptr[0];
467 u32 port_index;
468 u32 count;
469
470 if (!info->attrs[DEVLINK_ATTR_PORT_INDEX] ||
471 !info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT])
472 return -EINVAL;
473
474 port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
475 count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]);
476 return devlink_port_split(devlink, port_index, count);
477}
478
479static int devlink_port_unsplit(struct devlink *devlink, u32 port_index)
480
481{
482 if (devlink->ops && devlink->ops->port_unsplit)
483 return devlink->ops->port_unsplit(devlink, port_index);
484 return -EOPNOTSUPP;
485}
486
487static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
488 struct genl_info *info)
489{
490 struct devlink *devlink = info->user_ptr[0];
491 u32 port_index;
492
493 if (!info->attrs[DEVLINK_ATTR_PORT_INDEX])
494 return -EINVAL;
495
496 port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
497 return devlink_port_unsplit(devlink, port_index);
498}
499
500static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
501 [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
502 [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
503 [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 },
504 [DEVLINK_ATTR_PORT_TYPE] = { .type = NLA_U16 },
505 [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 },
506};
507
508static const struct genl_ops devlink_nl_ops[] = {
509 {
510 .cmd = DEVLINK_CMD_GET,
511 .doit = devlink_nl_cmd_get_doit,
512 .dumpit = devlink_nl_cmd_get_dumpit,
513 .policy = devlink_nl_policy,
514 /* can be retrieved by unprivileged users */
515 },
516 {
517 .cmd = DEVLINK_CMD_PORT_GET,
518 .doit = devlink_nl_cmd_port_get_doit,
519 .dumpit = devlink_nl_cmd_port_get_dumpit,
520 .policy = devlink_nl_policy,
521 .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
522 /* can be retrieved by unprivileged users */
523 },
524 {
525 .cmd = DEVLINK_CMD_PORT_SET,
526 .doit = devlink_nl_cmd_port_set_doit,
527 .policy = devlink_nl_policy,
528 .flags = GENL_ADMIN_PERM,
529 .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
530 },
531 {
532 .cmd = DEVLINK_CMD_PORT_SPLIT,
533 .doit = devlink_nl_cmd_port_split_doit,
534 .policy = devlink_nl_policy,
535 .flags = GENL_ADMIN_PERM,
536 },
537 {
538 .cmd = DEVLINK_CMD_PORT_UNSPLIT,
539 .doit = devlink_nl_cmd_port_unsplit_doit,
540 .policy = devlink_nl_policy,
541 .flags = GENL_ADMIN_PERM,
542 },
543};
544
545/**
546 * devlink_alloc - Allocate new devlink instance resources
547 *
548 * @ops: ops
549 * @priv_size: size of user private data
550 *
551 * Allocate new devlink instance resources, including devlink index
552 * and name.
553 */
554struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
555{
556 struct devlink *devlink;
557
558 devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
559 if (!devlink)
560 return NULL;
561 devlink->ops = ops;
562 devlink_net_set(devlink, &init_net);
563 INIT_LIST_HEAD(&devlink->port_list);
564 return devlink;
565}
566EXPORT_SYMBOL_GPL(devlink_alloc);
567
568/**
569 * devlink_register - Register devlink instance
570 *
571 * @devlink: devlink
572 */
573int devlink_register(struct devlink *devlink, struct device *dev)
574{
575 mutex_lock(&devlink_mutex);
576 devlink->dev = dev;
577 list_add_tail(&devlink->list, &devlink_list);
578 devlink_notify(devlink, DEVLINK_CMD_NEW);
579 mutex_unlock(&devlink_mutex);
580 return 0;
581}
582EXPORT_SYMBOL_GPL(devlink_register);
583
584/**
585 * devlink_unregister - Unregister devlink instance
586 *
587 * @devlink: devlink
588 */
589void devlink_unregister(struct devlink *devlink)
590{
591 mutex_lock(&devlink_mutex);
592 devlink_notify(devlink, DEVLINK_CMD_DEL);
593 list_del(&devlink->list);
594 mutex_unlock(&devlink_mutex);
595}
596EXPORT_SYMBOL_GPL(devlink_unregister);
597
598/**
599 * devlink_free - Free devlink instance resources
600 *
601 * @devlink: devlink
602 */
603void devlink_free(struct devlink *devlink)
604{
605 kfree(devlink);
606}
607EXPORT_SYMBOL_GPL(devlink_free);
608
609/**
610 * devlink_port_register - Register devlink port
611 *
612 * @devlink: devlink
613 * @devlink_port: devlink port
614 * @port_index
615 *
616 * Register devlink port with provided port index. User can use
617 * any indexing, even hw-related one. devlink_port structure
618 * is convenient to be embedded inside user driver private structure.
619 * Note that the caller should take care of zeroing the devlink_port
620 * structure.
621 */
622int devlink_port_register(struct devlink *devlink,
623 struct devlink_port *devlink_port,
624 unsigned int port_index)
625{
626 mutex_lock(&devlink_port_mutex);
627 if (devlink_port_index_exists(devlink, port_index)) {
628 mutex_unlock(&devlink_port_mutex);
629 return -EEXIST;
630 }
631 devlink_port->devlink = devlink;
632 devlink_port->index = port_index;
633 devlink_port->type = DEVLINK_PORT_TYPE_NOTSET;
634 devlink_port->registered = true;
635 list_add_tail(&devlink_port->list, &devlink->port_list);
636 mutex_unlock(&devlink_port_mutex);
637 devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
638 return 0;
639}
640EXPORT_SYMBOL_GPL(devlink_port_register);
641
642/**
643 * devlink_port_unregister - Unregister devlink port
644 *
645 * @devlink_port: devlink port
646 */
647void devlink_port_unregister(struct devlink_port *devlink_port)
648{
649 devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
650 mutex_lock(&devlink_port_mutex);
651 list_del(&devlink_port->list);
652 mutex_unlock(&devlink_port_mutex);
653}
654EXPORT_SYMBOL_GPL(devlink_port_unregister);
655
656static void __devlink_port_type_set(struct devlink_port *devlink_port,
657 enum devlink_port_type type,
658 void *type_dev)
659{
660 devlink_port->type = type;
661 devlink_port->type_dev = type_dev;
662 devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
663}
664
665/**
666 * devlink_port_type_eth_set - Set port type to Ethernet
667 *
668 * @devlink_port: devlink port
669 * @netdev: related netdevice
670 */
671void devlink_port_type_eth_set(struct devlink_port *devlink_port,
672 struct net_device *netdev)
673{
674 return __devlink_port_type_set(devlink_port,
675 DEVLINK_PORT_TYPE_ETH, netdev);
676}
677EXPORT_SYMBOL_GPL(devlink_port_type_eth_set);
678
679/**
680 * devlink_port_type_ib_set - Set port type to InfiniBand
681 *
682 * @devlink_port: devlink port
683 * @ibdev: related IB device
684 */
685void devlink_port_type_ib_set(struct devlink_port *devlink_port,
686 struct ib_device *ibdev)
687{
688 return __devlink_port_type_set(devlink_port,
689 DEVLINK_PORT_TYPE_IB, ibdev);
690}
691EXPORT_SYMBOL_GPL(devlink_port_type_ib_set);
692
693/**
694 * devlink_port_type_clear - Clear port type
695 *
696 * @devlink_port: devlink port
697 */
698void devlink_port_type_clear(struct devlink_port *devlink_port)
699{
700 return __devlink_port_type_set(devlink_port,
701 DEVLINK_PORT_TYPE_NOTSET, NULL);
702}
703EXPORT_SYMBOL_GPL(devlink_port_type_clear);
704
705/**
706 * devlink_port_split_set - Set port is split
707 *
708 * @devlink_port: devlink port
709 * @split_group: split group - identifies group split port is part of
710 */
711void devlink_port_split_set(struct devlink_port *devlink_port,
712 u32 split_group)
713{
714 devlink_port->split = true;
715 devlink_port->split_group = split_group;
716 devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
717}
718EXPORT_SYMBOL_GPL(devlink_port_split_set);
719
720static int __init devlink_module_init(void)
721{
722 return genl_register_family_with_ops_groups(&devlink_nl_family,
723 devlink_nl_ops,
724 devlink_nl_mcgrps);
725}
726
727static void __exit devlink_module_exit(void)
728{
729 genl_unregister_family(&devlink_nl_family);
730}
731
732module_init(devlink_module_init);
733module_exit(devlink_module_exit);
734
735MODULE_LICENSE("GPL v2");
736MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
737MODULE_DESCRIPTION("Network physical device Netlink interface");
738MODULE_ALIAS_GENL_FAMILY(DEVLINK_GENL_NAME);
diff --git a/net/core/dst.c b/net/core/dst.c
index a1656e3b8d72..b5cbbe07f786 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -265,7 +265,7 @@ again:
265 lwtstate_put(dst->lwtstate); 265 lwtstate_put(dst->lwtstate);
266 266
267 if (dst->flags & DST_METADATA) 267 if (dst->flags & DST_METADATA)
268 kfree(dst); 268 metadata_dst_free((struct metadata_dst *)dst);
269 else 269 else
270 kmem_cache_free(dst->ops->kmem_cachep, dst); 270 kmem_cache_free(dst->ops->kmem_cachep, dst);
271 271
@@ -395,6 +395,14 @@ struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags)
395} 395}
396EXPORT_SYMBOL_GPL(metadata_dst_alloc); 396EXPORT_SYMBOL_GPL(metadata_dst_alloc);
397 397
398void metadata_dst_free(struct metadata_dst *md_dst)
399{
400#ifdef CONFIG_DST_CACHE
401 dst_cache_destroy(&md_dst->u.tun_info.dst_cache);
402#endif
403 kfree(md_dst);
404}
405
398struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags) 406struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags)
399{ 407{
400 int cpu; 408 int cpu;
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
new file mode 100644
index 000000000000..554d36449231
--- /dev/null
+++ b/net/core/dst_cache.c
@@ -0,0 +1,168 @@
1/*
2 * net/core/dst_cache.c - dst entry cache
3 *
4 * Copyright (c) 2016 Paolo Abeni <pabeni@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/percpu.h>
14#include <net/dst_cache.h>
15#include <net/route.h>
16#if IS_ENABLED(CONFIG_IPV6)
17#include <net/ip6_fib.h>
18#endif
19#include <uapi/linux/in.h>
20
21struct dst_cache_pcpu {
22 unsigned long refresh_ts;
23 struct dst_entry *dst;
24 u32 cookie;
25 union {
26 struct in_addr in_saddr;
27 struct in6_addr in6_saddr;
28 };
29};
30
31static void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache,
32 struct dst_entry *dst, u32 cookie)
33{
34 dst_release(dst_cache->dst);
35 if (dst)
36 dst_hold(dst);
37
38 dst_cache->cookie = cookie;
39 dst_cache->dst = dst;
40}
41
42static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
43 struct dst_cache_pcpu *idst)
44{
45 struct dst_entry *dst;
46
47 dst = idst->dst;
48 if (!dst)
49 goto fail;
50
51 /* the cache already hold a dst reference; it can't go away */
52 dst_hold(dst);
53
54 if (unlikely(!time_after(idst->refresh_ts, dst_cache->reset_ts) ||
55 (dst->obsolete && !dst->ops->check(dst, idst->cookie)))) {
56 dst_cache_per_cpu_dst_set(idst, NULL, 0);
57 dst_release(dst);
58 goto fail;
59 }
60 return dst;
61
62fail:
63 idst->refresh_ts = jiffies;
64 return NULL;
65}
66
67struct dst_entry *dst_cache_get(struct dst_cache *dst_cache)
68{
69 if (!dst_cache->cache)
70 return NULL;
71
72 return dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache));
73}
74EXPORT_SYMBOL_GPL(dst_cache_get);
75
76struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr)
77{
78 struct dst_cache_pcpu *idst;
79 struct dst_entry *dst;
80
81 if (!dst_cache->cache)
82 return NULL;
83
84 idst = this_cpu_ptr(dst_cache->cache);
85 dst = dst_cache_per_cpu_get(dst_cache, idst);
86 if (!dst)
87 return NULL;
88
89 *saddr = idst->in_saddr.s_addr;
90 return container_of(dst, struct rtable, dst);
91}
92EXPORT_SYMBOL_GPL(dst_cache_get_ip4);
93
94void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
95 __be32 saddr)
96{
97 struct dst_cache_pcpu *idst;
98
99 if (!dst_cache->cache)
100 return;
101
102 idst = this_cpu_ptr(dst_cache->cache);
103 dst_cache_per_cpu_dst_set(idst, dst, 0);
104 idst->in_saddr.s_addr = saddr;
105}
106EXPORT_SYMBOL_GPL(dst_cache_set_ip4);
107
108#if IS_ENABLED(CONFIG_IPV6)
109void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
110 const struct in6_addr *addr)
111{
112 struct dst_cache_pcpu *idst;
113
114 if (!dst_cache->cache)
115 return;
116
117 idst = this_cpu_ptr(dst_cache->cache);
118 dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
119 rt6_get_cookie((struct rt6_info *)dst));
120 idst->in6_saddr = *addr;
121}
122EXPORT_SYMBOL_GPL(dst_cache_set_ip6);
123
124struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
125 struct in6_addr *saddr)
126{
127 struct dst_cache_pcpu *idst;
128 struct dst_entry *dst;
129
130 if (!dst_cache->cache)
131 return NULL;
132
133 idst = this_cpu_ptr(dst_cache->cache);
134 dst = dst_cache_per_cpu_get(dst_cache, idst);
135 if (!dst)
136 return NULL;
137
138 *saddr = idst->in6_saddr;
139 return dst;
140}
141EXPORT_SYMBOL_GPL(dst_cache_get_ip6);
142#endif
143
144int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp)
145{
146 dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu,
147 gfp | __GFP_ZERO);
148 if (!dst_cache->cache)
149 return -ENOMEM;
150
151 dst_cache_reset(dst_cache);
152 return 0;
153}
154EXPORT_SYMBOL_GPL(dst_cache_init);
155
156void dst_cache_destroy(struct dst_cache *dst_cache)
157{
158 int i;
159
160 if (!dst_cache->cache)
161 return;
162
163 for_each_possible_cpu(i)
164 dst_release(per_cpu_ptr(dst_cache->cache, i)->dst);
165
166 free_percpu(dst_cache->cache);
167}
168EXPORT_SYMBOL_GPL(dst_cache_destroy);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index daf04709dd3c..f426c5ad6149 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -98,6 +98,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
98 [NETIF_F_RXALL_BIT] = "rx-all", 98 [NETIF_F_RXALL_BIT] = "rx-all",
99 [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", 99 [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
100 [NETIF_F_BUSY_POLL_BIT] = "busy-poll", 100 [NETIF_F_BUSY_POLL_BIT] = "busy-poll",
101 [NETIF_F_HW_TC_BIT] = "hw-tc-offload",
101}; 102};
102 103
103static const char 104static const char
@@ -386,43 +387,461 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)
386 return 0; 387 return 0;
387} 388}
388 389
389int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) 390static void convert_legacy_u32_to_link_mode(unsigned long *dst, u32 legacy_u32)
390{ 391{
392 bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS);
393 dst[0] = legacy_u32;
394}
395
396/* return false if src had higher bits set. lower bits always updated. */
397static bool convert_link_mode_to_legacy_u32(u32 *legacy_u32,
398 const unsigned long *src)
399{
400 bool retval = true;
401
402 /* TODO: following test will soon always be true */
403 if (__ETHTOOL_LINK_MODE_MASK_NBITS > 32) {
404 __ETHTOOL_DECLARE_LINK_MODE_MASK(ext);
405
406 bitmap_zero(ext, __ETHTOOL_LINK_MODE_MASK_NBITS);
407 bitmap_fill(ext, 32);
408 bitmap_complement(ext, ext, __ETHTOOL_LINK_MODE_MASK_NBITS);
409 if (bitmap_intersects(ext, src,
410 __ETHTOOL_LINK_MODE_MASK_NBITS)) {
411 /* src mask goes beyond bit 31 */
412 retval = false;
413 }
414 }
415 *legacy_u32 = src[0];
416 return retval;
417}
418
419/* return false if legacy contained non-0 deprecated fields
420 * transceiver/maxtxpkt/maxrxpkt. rest of ksettings always updated
421 */
422static bool
423convert_legacy_settings_to_link_ksettings(
424 struct ethtool_link_ksettings *link_ksettings,
425 const struct ethtool_cmd *legacy_settings)
426{
427 bool retval = true;
428
429 memset(link_ksettings, 0, sizeof(*link_ksettings));
430
431 /* This is used to tell users that driver is still using these
432 * deprecated legacy fields, and they should not use
433 * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS
434 */
435 if (legacy_settings->transceiver ||
436 legacy_settings->maxtxpkt ||
437 legacy_settings->maxrxpkt)
438 retval = false;
439
440 convert_legacy_u32_to_link_mode(
441 link_ksettings->link_modes.supported,
442 legacy_settings->supported);
443 convert_legacy_u32_to_link_mode(
444 link_ksettings->link_modes.advertising,
445 legacy_settings->advertising);
446 convert_legacy_u32_to_link_mode(
447 link_ksettings->link_modes.lp_advertising,
448 legacy_settings->lp_advertising);
449 link_ksettings->base.speed
450 = ethtool_cmd_speed(legacy_settings);
451 link_ksettings->base.duplex
452 = legacy_settings->duplex;
453 link_ksettings->base.port
454 = legacy_settings->port;
455 link_ksettings->base.phy_address
456 = legacy_settings->phy_address;
457 link_ksettings->base.autoneg
458 = legacy_settings->autoneg;
459 link_ksettings->base.mdio_support
460 = legacy_settings->mdio_support;
461 link_ksettings->base.eth_tp_mdix
462 = legacy_settings->eth_tp_mdix;
463 link_ksettings->base.eth_tp_mdix_ctrl
464 = legacy_settings->eth_tp_mdix_ctrl;
465 return retval;
466}
467
468/* return false if ksettings link modes had higher bits
469 * set. legacy_settings always updated (best effort)
470 */
471static bool
472convert_link_ksettings_to_legacy_settings(
473 struct ethtool_cmd *legacy_settings,
474 const struct ethtool_link_ksettings *link_ksettings)
475{
476 bool retval = true;
477
478 memset(legacy_settings, 0, sizeof(*legacy_settings));
479 /* this also clears the deprecated fields in legacy structure:
480 * __u8 transceiver;
481 * __u32 maxtxpkt;
482 * __u32 maxrxpkt;
483 */
484
485 retval &= convert_link_mode_to_legacy_u32(
486 &legacy_settings->supported,
487 link_ksettings->link_modes.supported);
488 retval &= convert_link_mode_to_legacy_u32(
489 &legacy_settings->advertising,
490 link_ksettings->link_modes.advertising);
491 retval &= convert_link_mode_to_legacy_u32(
492 &legacy_settings->lp_advertising,
493 link_ksettings->link_modes.lp_advertising);
494 ethtool_cmd_speed_set(legacy_settings, link_ksettings->base.speed);
495 legacy_settings->duplex
496 = link_ksettings->base.duplex;
497 legacy_settings->port
498 = link_ksettings->base.port;
499 legacy_settings->phy_address
500 = link_ksettings->base.phy_address;
501 legacy_settings->autoneg
502 = link_ksettings->base.autoneg;
503 legacy_settings->mdio_support
504 = link_ksettings->base.mdio_support;
505 legacy_settings->eth_tp_mdix
506 = link_ksettings->base.eth_tp_mdix;
507 legacy_settings->eth_tp_mdix_ctrl
508 = link_ksettings->base.eth_tp_mdix_ctrl;
509 return retval;
510}
511
512/* number of 32-bit words to store the user's link mode bitmaps */
513#define __ETHTOOL_LINK_MODE_MASK_NU32 \
514 DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32)
515
516/* layout of the struct passed from/to userland */
517struct ethtool_link_usettings {
518 struct ethtool_link_settings base;
519 struct {
520 __u32 supported[__ETHTOOL_LINK_MODE_MASK_NU32];
521 __u32 advertising[__ETHTOOL_LINK_MODE_MASK_NU32];
522 __u32 lp_advertising[__ETHTOOL_LINK_MODE_MASK_NU32];
523 } link_modes;
524};
525
526/* Internal kernel helper to query a device ethtool_link_settings.
527 *
528 * Backward compatibility note: for compatibility with legacy drivers
529 * that implement only the ethtool_cmd API, this has to work with both
530 * drivers implementing get_link_ksettings API and drivers
531 * implementing get_settings API. When drivers implement get_settings
532 * and report ethtool_cmd deprecated fields
533 * (transceiver/maxrxpkt/maxtxpkt), these fields are silently ignored
534 * because the resulting struct ethtool_link_settings does not report them.
535 */
536int __ethtool_get_link_ksettings(struct net_device *dev,
537 struct ethtool_link_ksettings *link_ksettings)
538{
539 int err;
540 struct ethtool_cmd cmd;
541
391 ASSERT_RTNL(); 542 ASSERT_RTNL();
392 543
544 if (dev->ethtool_ops->get_link_ksettings) {
545 memset(link_ksettings, 0, sizeof(*link_ksettings));
546 return dev->ethtool_ops->get_link_ksettings(dev,
547 link_ksettings);
548 }
549
550 /* driver doesn't support %ethtool_link_ksettings API. revert to
551 * legacy %ethtool_cmd API, unless it's not supported either.
552 * TODO: remove when ethtool_ops::get_settings disappears internally
553 */
393 if (!dev->ethtool_ops->get_settings) 554 if (!dev->ethtool_ops->get_settings)
394 return -EOPNOTSUPP; 555 return -EOPNOTSUPP;
395 556
396 memset(cmd, 0, sizeof(struct ethtool_cmd)); 557 memset(&cmd, 0, sizeof(cmd));
397 cmd->cmd = ETHTOOL_GSET; 558 cmd.cmd = ETHTOOL_GSET;
398 return dev->ethtool_ops->get_settings(dev, cmd); 559 err = dev->ethtool_ops->get_settings(dev, &cmd);
560 if (err < 0)
561 return err;
562
563 /* we ignore deprecated fields transceiver/maxrxpkt/maxtxpkt
564 */
565 convert_legacy_settings_to_link_ksettings(link_ksettings, &cmd);
566 return err;
399} 567}
400EXPORT_SYMBOL(__ethtool_get_settings); 568EXPORT_SYMBOL(__ethtool_get_link_ksettings);
401 569
402static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) 570/* convert ethtool_link_usettings in user space to a kernel internal
571 * ethtool_link_ksettings. return 0 on success, errno on error.
572 */
573static int load_link_ksettings_from_user(struct ethtool_link_ksettings *to,
574 const void __user *from)
403{ 575{
404 int err; 576 struct ethtool_link_usettings link_usettings;
405 struct ethtool_cmd cmd; 577
578 if (copy_from_user(&link_usettings, from, sizeof(link_usettings)))
579 return -EFAULT;
580
581 memcpy(&to->base, &link_usettings.base, sizeof(to->base));
582 bitmap_from_u32array(to->link_modes.supported,
583 __ETHTOOL_LINK_MODE_MASK_NBITS,
584 link_usettings.link_modes.supported,
585 __ETHTOOL_LINK_MODE_MASK_NU32);
586 bitmap_from_u32array(to->link_modes.advertising,
587 __ETHTOOL_LINK_MODE_MASK_NBITS,
588 link_usettings.link_modes.advertising,
589 __ETHTOOL_LINK_MODE_MASK_NU32);
590 bitmap_from_u32array(to->link_modes.lp_advertising,
591 __ETHTOOL_LINK_MODE_MASK_NBITS,
592 link_usettings.link_modes.lp_advertising,
593 __ETHTOOL_LINK_MODE_MASK_NU32);
594
595 return 0;
596}
597
598/* convert a kernel internal ethtool_link_ksettings to
599 * ethtool_link_usettings in user space. return 0 on success, errno on
600 * error.
601 */
602static int
603store_link_ksettings_for_user(void __user *to,
604 const struct ethtool_link_ksettings *from)
605{
606 struct ethtool_link_usettings link_usettings;
607
608 memcpy(&link_usettings.base, &from->base, sizeof(link_usettings));
609 bitmap_to_u32array(link_usettings.link_modes.supported,
610 __ETHTOOL_LINK_MODE_MASK_NU32,
611 from->link_modes.supported,
612 __ETHTOOL_LINK_MODE_MASK_NBITS);
613 bitmap_to_u32array(link_usettings.link_modes.advertising,
614 __ETHTOOL_LINK_MODE_MASK_NU32,
615 from->link_modes.advertising,
616 __ETHTOOL_LINK_MODE_MASK_NBITS);
617 bitmap_to_u32array(link_usettings.link_modes.lp_advertising,
618 __ETHTOOL_LINK_MODE_MASK_NU32,
619 from->link_modes.lp_advertising,
620 __ETHTOOL_LINK_MODE_MASK_NBITS);
621
622 if (copy_to_user(to, &link_usettings, sizeof(link_usettings)))
623 return -EFAULT;
624
625 return 0;
626}
627
628/* Query device for its ethtool_link_settings.
629 *
630 * Backward compatibility note: this function must fail when driver
631 * does not implement ethtool::get_link_ksettings, even if legacy
632 * ethtool_ops::get_settings is implemented. This tells new versions
633 * of ethtool that they should use the legacy API %ETHTOOL_GSET for
634 * this driver, so that they can correctly access the ethtool_cmd
635 * deprecated fields (transceiver/maxrxpkt/maxtxpkt), until no driver
636 * implements ethtool_ops::get_settings anymore.
637 */
638static int ethtool_get_link_ksettings(struct net_device *dev,
639 void __user *useraddr)
640{
641 int err = 0;
642 struct ethtool_link_ksettings link_ksettings;
406 643
407 err = __ethtool_get_settings(dev, &cmd); 644 ASSERT_RTNL();
645
646 if (!dev->ethtool_ops->get_link_ksettings)
647 return -EOPNOTSUPP;
648
649 /* handle bitmap nbits handshake */
650 if (copy_from_user(&link_ksettings.base, useraddr,
651 sizeof(link_ksettings.base)))
652 return -EFAULT;
653
654 if (__ETHTOOL_LINK_MODE_MASK_NU32
655 != link_ksettings.base.link_mode_masks_nwords) {
656 /* wrong link mode nbits requested */
657 memset(&link_ksettings, 0, sizeof(link_ksettings));
658 link_ksettings.base.cmd = ETHTOOL_GLINKSETTINGS;
659 /* send back number of words required as negative val */
660 compiletime_assert(__ETHTOOL_LINK_MODE_MASK_NU32 <= S8_MAX,
661 "need too many bits for link modes!");
662 link_ksettings.base.link_mode_masks_nwords
663 = -((s8)__ETHTOOL_LINK_MODE_MASK_NU32);
664
665 /* copy the base fields back to user, not the link
666 * mode bitmaps
667 */
668 if (copy_to_user(useraddr, &link_ksettings.base,
669 sizeof(link_ksettings.base)))
670 return -EFAULT;
671
672 return 0;
673 }
674
675 /* handshake successful: user/kernel agree on
676 * link_mode_masks_nwords
677 */
678
679 memset(&link_ksettings, 0, sizeof(link_ksettings));
680 err = dev->ethtool_ops->get_link_ksettings(dev, &link_ksettings);
408 if (err < 0) 681 if (err < 0)
409 return err; 682 return err;
410 683
684 /* make sure we tell the right values to user */
685 link_ksettings.base.cmd = ETHTOOL_GLINKSETTINGS;
686 link_ksettings.base.link_mode_masks_nwords
687 = __ETHTOOL_LINK_MODE_MASK_NU32;
688
689 return store_link_ksettings_for_user(useraddr, &link_ksettings);
690}
691
692/* Update device ethtool_link_settings.
693 *
694 * Backward compatibility note: this function must fail when driver
695 * does not implement ethtool::set_link_ksettings, even if legacy
696 * ethtool_ops::set_settings is implemented. This tells new versions
697 * of ethtool that they should use the legacy API %ETHTOOL_SSET for
698 * this driver, so that they can correctly update the ethtool_cmd
699 * deprecated fields (transceiver/maxrxpkt/maxtxpkt), until no driver
700 * implements ethtool_ops::get_settings anymore.
701 */
702static int ethtool_set_link_ksettings(struct net_device *dev,
703 void __user *useraddr)
704{
705 int err;
706 struct ethtool_link_ksettings link_ksettings;
707
708 ASSERT_RTNL();
709
710 if (!dev->ethtool_ops->set_link_ksettings)
711 return -EOPNOTSUPP;
712
713 /* make sure nbits field has expected value */
714 if (copy_from_user(&link_ksettings.base, useraddr,
715 sizeof(link_ksettings.base)))
716 return -EFAULT;
717
718 if (__ETHTOOL_LINK_MODE_MASK_NU32
719 != link_ksettings.base.link_mode_masks_nwords)
720 return -EINVAL;
721
722 /* copy the whole structure, now that we know it has expected
723 * format
724 */
725 err = load_link_ksettings_from_user(&link_ksettings, useraddr);
726 if (err)
727 return err;
728
729 /* re-check nwords field, just in case */
730 if (__ETHTOOL_LINK_MODE_MASK_NU32
731 != link_ksettings.base.link_mode_masks_nwords)
732 return -EINVAL;
733
734 return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
735}
736
737static void
738warn_incomplete_ethtool_legacy_settings_conversion(const char *details)
739{
740 char name[sizeof(current->comm)];
741
742 pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n",
743 get_task_comm(name, current), details);
744}
745
746/* Query device for its ethtool_cmd settings.
747 *
748 * Backward compatibility note: for compatibility with legacy ethtool,
749 * this has to work with both drivers implementing get_link_ksettings
750 * API and drivers implementing get_settings API. When drivers
751 * implement get_link_ksettings and report higher link mode bits, a
752 * kernel warning is logged once (with name of 1st driver/device) to
753 * recommend user to upgrade ethtool, but the command is successful
754 * (only the lower link mode bits reported back to user).
755 */
756static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
757{
758 struct ethtool_cmd cmd;
759
760 ASSERT_RTNL();
761
762 if (dev->ethtool_ops->get_link_ksettings) {
763 /* First, use link_ksettings API if it is supported */
764 int err;
765 struct ethtool_link_ksettings link_ksettings;
766
767 memset(&link_ksettings, 0, sizeof(link_ksettings));
768 err = dev->ethtool_ops->get_link_ksettings(dev,
769 &link_ksettings);
770 if (err < 0)
771 return err;
772 if (!convert_link_ksettings_to_legacy_settings(&cmd,
773 &link_ksettings))
774 warn_incomplete_ethtool_legacy_settings_conversion(
775 "link modes are only partially reported");
776
777 /* send a sensible cmd tag back to user */
778 cmd.cmd = ETHTOOL_GSET;
779 } else {
780 /* driver doesn't support %ethtool_link_ksettings
781 * API. revert to legacy %ethtool_cmd API, unless it's
782 * not supported either.
783 */
784 int err;
785
786 if (!dev->ethtool_ops->get_settings)
787 return -EOPNOTSUPP;
788
789 memset(&cmd, 0, sizeof(cmd));
790 cmd.cmd = ETHTOOL_GSET;
791 err = dev->ethtool_ops->get_settings(dev, &cmd);
792 if (err < 0)
793 return err;
794 }
795
411 if (copy_to_user(useraddr, &cmd, sizeof(cmd))) 796 if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
412 return -EFAULT; 797 return -EFAULT;
798
413 return 0; 799 return 0;
414} 800}
415 801
802/* Update device link settings with given ethtool_cmd.
803 *
804 * Backward compatibility note: for compatibility with legacy ethtool,
805 * this has to work with both drivers implementing set_link_ksettings
806 * API and drivers implementing set_settings API. When drivers
807 * implement set_link_ksettings and user's request updates deprecated
808 * ethtool_cmd fields (transceiver/maxrxpkt/maxtxpkt), a kernel
809 * warning is logged once (with name of 1st driver/device) to
810 * recommend user to upgrade ethtool, and the request is rejected.
811 */
416static int ethtool_set_settings(struct net_device *dev, void __user *useraddr) 812static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
417{ 813{
418 struct ethtool_cmd cmd; 814 struct ethtool_cmd cmd;
419 815
420 if (!dev->ethtool_ops->set_settings) 816 ASSERT_RTNL();
421 return -EOPNOTSUPP;
422 817
423 if (copy_from_user(&cmd, useraddr, sizeof(cmd))) 818 if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
424 return -EFAULT; 819 return -EFAULT;
425 820
821 /* first, try new %ethtool_link_ksettings API. */
822 if (dev->ethtool_ops->set_link_ksettings) {
823 struct ethtool_link_ksettings link_ksettings;
824
825 if (!convert_legacy_settings_to_link_ksettings(&link_ksettings,
826 &cmd))
827 return -EINVAL;
828
829 link_ksettings.base.cmd = ETHTOOL_SLINKSETTINGS;
830 link_ksettings.base.link_mode_masks_nwords
831 = __ETHTOOL_LINK_MODE_MASK_NU32;
832 return dev->ethtool_ops->set_link_ksettings(dev,
833 &link_ksettings);
834 }
835
836 /* legacy %ethtool_cmd API */
837
838 /* TODO: return -EOPNOTSUPP when ethtool_ops::get_settings
839 * disappears internally
840 */
841
842 if (!dev->ethtool_ops->set_settings)
843 return -EOPNOTSUPP;
844
426 return dev->ethtool_ops->set_settings(dev, &cmd); 845 return dev->ethtool_ops->set_settings(dev, &cmd);
427} 846}
428 847
@@ -632,7 +1051,7 @@ static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr,
632 return 0; 1051 return 0;
633} 1052}
634 1053
635u8 netdev_rss_key[NETDEV_RSS_KEY_LEN]; 1054u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly;
636 1055
637void netdev_rss_key_fill(void *buffer, size_t len) 1056void netdev_rss_key_fill(void *buffer, size_t len)
638{ 1057{
@@ -642,6 +1061,37 @@ void netdev_rss_key_fill(void *buffer, size_t len)
642} 1061}
643EXPORT_SYMBOL(netdev_rss_key_fill); 1062EXPORT_SYMBOL(netdev_rss_key_fill);
644 1063
1064static int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max)
1065{
1066 u32 dev_size, current_max = 0;
1067 u32 *indir;
1068 int ret;
1069
1070 if (!dev->ethtool_ops->get_rxfh_indir_size ||
1071 !dev->ethtool_ops->get_rxfh)
1072 return -EOPNOTSUPP;
1073 dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
1074 if (dev_size == 0)
1075 return -EOPNOTSUPP;
1076
1077 indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
1078 if (!indir)
1079 return -ENOMEM;
1080
1081 ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL);
1082 if (ret)
1083 goto out;
1084
1085 while (dev_size--)
1086 current_max = max(current_max, indir[dev_size]);
1087
1088 *max = current_max;
1089
1090out:
1091 kfree(indir);
1092 return ret;
1093}
1094
645static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, 1095static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
646 void __user *useraddr) 1096 void __user *useraddr)
647{ 1097{
@@ -738,6 +1188,14 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
738 } 1188 }
739 1189
740 ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE); 1190 ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE);
1191 if (ret)
1192 goto out;
1193
1194 /* indicate whether rxfh was set to default */
1195 if (user_size == 0)
1196 dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
1197 else
1198 dev->priv_flags |= IFF_RXFH_CONFIGURED;
741 1199
742out: 1200out:
743 kfree(indir); 1201 kfree(indir);
@@ -897,6 +1355,14 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
897 } 1355 }
898 1356
899 ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc); 1357 ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
1358 if (ret)
1359 goto out;
1360
1361 /* indicate whether rxfh was set to default */
1362 if (rxfh.indir_size == 0)
1363 dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
1364 else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
1365 dev->priv_flags |= IFF_RXFH_CONFIGURED;
900 1366
901out: 1367out:
902 kfree(rss_config); 1368 kfree(rss_config);
@@ -1227,14 +1693,31 @@ static noinline_for_stack int ethtool_get_channels(struct net_device *dev,
1227static noinline_for_stack int ethtool_set_channels(struct net_device *dev, 1693static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
1228 void __user *useraddr) 1694 void __user *useraddr)
1229{ 1695{
1230 struct ethtool_channels channels; 1696 struct ethtool_channels channels, max;
1697 u32 max_rx_in_use = 0;
1231 1698
1232 if (!dev->ethtool_ops->set_channels) 1699 if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels)
1233 return -EOPNOTSUPP; 1700 return -EOPNOTSUPP;
1234 1701
1235 if (copy_from_user(&channels, useraddr, sizeof(channels))) 1702 if (copy_from_user(&channels, useraddr, sizeof(channels)))
1236 return -EFAULT; 1703 return -EFAULT;
1237 1704
1705 dev->ethtool_ops->get_channels(dev, &max);
1706
1707 /* ensure new counts are within the maximums */
1708 if ((channels.rx_count > max.max_rx) ||
1709 (channels.tx_count > max.max_tx) ||
1710 (channels.combined_count > max.max_combined) ||
1711 (channels.other_count > max.max_other))
1712 return -EINVAL;
1713
1714 /* ensure the new Rx count fits within the configured Rx flow
1715 * indirection table settings */
1716 if (netif_is_rxfh_configured(dev) &&
1717 !ethtool_get_max_rxfh_channel(dev, &max_rx_in_use) &&
1718 (channels.combined_count + channels.rx_count) <= max_rx_in_use)
1719 return -EINVAL;
1720
1238 return dev->ethtool_ops->set_channels(dev, &channels); 1721 return dev->ethtool_ops->set_channels(dev, &channels);
1239} 1722}
1240 1723
@@ -1823,13 +2306,121 @@ out:
1823 return ret; 2306 return ret;
1824} 2307}
1825 2308
2309static int ethtool_get_per_queue_coalesce(struct net_device *dev,
2310 void __user *useraddr,
2311 struct ethtool_per_queue_op *per_queue_opt)
2312{
2313 u32 bit;
2314 int ret;
2315 DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE);
2316
2317 if (!dev->ethtool_ops->get_per_queue_coalesce)
2318 return -EOPNOTSUPP;
2319
2320 useraddr += sizeof(*per_queue_opt);
2321
2322 bitmap_from_u32array(queue_mask,
2323 MAX_NUM_QUEUE,
2324 per_queue_opt->queue_mask,
2325 DIV_ROUND_UP(MAX_NUM_QUEUE, 32));
2326
2327 for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) {
2328 struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
2329
2330 ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, &coalesce);
2331 if (ret != 0)
2332 return ret;
2333 if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
2334 return -EFAULT;
2335 useraddr += sizeof(coalesce);
2336 }
2337
2338 return 0;
2339}
2340
2341static int ethtool_set_per_queue_coalesce(struct net_device *dev,
2342 void __user *useraddr,
2343 struct ethtool_per_queue_op *per_queue_opt)
2344{
2345 u32 bit;
2346 int i, ret = 0;
2347 int n_queue;
2348 struct ethtool_coalesce *backup = NULL, *tmp = NULL;
2349 DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE);
2350
2351 if ((!dev->ethtool_ops->set_per_queue_coalesce) ||
2352 (!dev->ethtool_ops->get_per_queue_coalesce))
2353 return -EOPNOTSUPP;
2354
2355 useraddr += sizeof(*per_queue_opt);
2356
2357 bitmap_from_u32array(queue_mask,
2358 MAX_NUM_QUEUE,
2359 per_queue_opt->queue_mask,
2360 DIV_ROUND_UP(MAX_NUM_QUEUE, 32));
2361 n_queue = bitmap_weight(queue_mask, MAX_NUM_QUEUE);
2362 tmp = backup = kmalloc_array(n_queue, sizeof(*backup), GFP_KERNEL);
2363 if (!backup)
2364 return -ENOMEM;
2365
2366 for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) {
2367 struct ethtool_coalesce coalesce;
2368
2369 ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, tmp);
2370 if (ret != 0)
2371 goto roll_back;
2372
2373 tmp++;
2374
2375 if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) {
2376 ret = -EFAULT;
2377 goto roll_back;
2378 }
2379
2380 ret = dev->ethtool_ops->set_per_queue_coalesce(dev, bit, &coalesce);
2381 if (ret != 0)
2382 goto roll_back;
2383
2384 useraddr += sizeof(coalesce);
2385 }
2386
2387roll_back:
2388 if (ret != 0) {
2389 tmp = backup;
2390 for_each_set_bit(i, queue_mask, bit) {
2391 dev->ethtool_ops->set_per_queue_coalesce(dev, i, tmp);
2392 tmp++;
2393 }
2394 }
2395 kfree(backup);
2396
2397 return ret;
2398}
2399
2400static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr)
2401{
2402 struct ethtool_per_queue_op per_queue_opt;
2403
2404 if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt)))
2405 return -EFAULT;
2406
2407 switch (per_queue_opt.sub_command) {
2408 case ETHTOOL_GCOALESCE:
2409 return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt);
2410 case ETHTOOL_SCOALESCE:
2411 return ethtool_set_per_queue_coalesce(dev, useraddr, &per_queue_opt);
2412 default:
2413 return -EOPNOTSUPP;
2414 };
2415}
2416
1826/* The main entry point in this file. Called from net/core/dev_ioctl.c */ 2417/* The main entry point in this file. Called from net/core/dev_ioctl.c */
1827 2418
1828int dev_ethtool(struct net *net, struct ifreq *ifr) 2419int dev_ethtool(struct net *net, struct ifreq *ifr)
1829{ 2420{
1830 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); 2421 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
1831 void __user *useraddr = ifr->ifr_data; 2422 void __user *useraddr = ifr->ifr_data;
1832 u32 ethcmd; 2423 u32 ethcmd, sub_cmd;
1833 int rc; 2424 int rc;
1834 netdev_features_t old_features; 2425 netdev_features_t old_features;
1835 2426
@@ -1839,8 +2430,14 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1839 if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd))) 2430 if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
1840 return -EFAULT; 2431 return -EFAULT;
1841 2432
2433 if (ethcmd == ETHTOOL_PERQUEUE) {
2434 if (copy_from_user(&sub_cmd, useraddr + sizeof(ethcmd), sizeof(sub_cmd)))
2435 return -EFAULT;
2436 } else {
2437 sub_cmd = ethcmd;
2438 }
1842 /* Allow some commands to be done by anyone */ 2439 /* Allow some commands to be done by anyone */
1843 switch (ethcmd) { 2440 switch (sub_cmd) {
1844 case ETHTOOL_GSET: 2441 case ETHTOOL_GSET:
1845 case ETHTOOL_GDRVINFO: 2442 case ETHTOOL_GDRVINFO:
1846 case ETHTOOL_GMSGLVL: 2443 case ETHTOOL_GMSGLVL:
@@ -2070,6 +2667,15 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
2070 case ETHTOOL_GPHYSTATS: 2667 case ETHTOOL_GPHYSTATS:
2071 rc = ethtool_get_phy_stats(dev, useraddr); 2668 rc = ethtool_get_phy_stats(dev, useraddr);
2072 break; 2669 break;
2670 case ETHTOOL_PERQUEUE:
2671 rc = ethtool_set_per_queue(dev, useraddr);
2672 break;
2673 case ETHTOOL_GLINKSETTINGS:
2674 rc = ethtool_get_link_ksettings(dev, useraddr);
2675 break;
2676 case ETHTOOL_SLINKSETTINGS:
2677 rc = ethtool_set_link_ksettings(dev, useraddr);
2678 break;
2073 default: 2679 default:
2074 rc = -EOPNOTSUPP; 2680 rc = -EOPNOTSUPP;
2075 } 2681 }
diff --git a/net/core/filter.c b/net/core/filter.c
index bba502f7cd57..ca7f832b2980 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -530,12 +530,14 @@ do_pass:
530 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); 530 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
531 break; 531 break;
532 532
533 /* RET_K, RET_A are remaped into 2 insns. */ 533 /* RET_K is remaped into 2 insns. RET_A case doesn't need an
534 * extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
535 */
534 case BPF_RET | BPF_A: 536 case BPF_RET | BPF_A:
535 case BPF_RET | BPF_K: 537 case BPF_RET | BPF_K:
536 *insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ? 538 if (BPF_RVAL(fp->code) == BPF_K)
537 BPF_K : BPF_X, BPF_REG_0, 539 *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
538 BPF_REG_A, fp->k); 540 0, fp->k);
539 *insn = BPF_EXIT_INSN(); 541 *insn = BPF_EXIT_INSN();
540 break; 542 break;
541 543
@@ -1147,7 +1149,8 @@ void bpf_prog_destroy(struct bpf_prog *fp)
1147} 1149}
1148EXPORT_SYMBOL_GPL(bpf_prog_destroy); 1150EXPORT_SYMBOL_GPL(bpf_prog_destroy);
1149 1151
1150static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) 1152static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk,
1153 bool locked)
1151{ 1154{
1152 struct sk_filter *fp, *old_fp; 1155 struct sk_filter *fp, *old_fp;
1153 1156
@@ -1163,10 +1166,8 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
1163 return -ENOMEM; 1166 return -ENOMEM;
1164 } 1167 }
1165 1168
1166 old_fp = rcu_dereference_protected(sk->sk_filter, 1169 old_fp = rcu_dereference_protected(sk->sk_filter, locked);
1167 sock_owned_by_user(sk));
1168 rcu_assign_pointer(sk->sk_filter, fp); 1170 rcu_assign_pointer(sk->sk_filter, fp);
1169
1170 if (old_fp) 1171 if (old_fp)
1171 sk_filter_uncharge(sk, old_fp); 1172 sk_filter_uncharge(sk, old_fp);
1172 1173
@@ -1181,7 +1182,7 @@ static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
1181 if (bpf_prog_size(prog->len) > sysctl_optmem_max) 1182 if (bpf_prog_size(prog->len) > sysctl_optmem_max)
1182 return -ENOMEM; 1183 return -ENOMEM;
1183 1184
1184 if (sk_unhashed(sk)) { 1185 if (sk_unhashed(sk) && sk->sk_reuseport) {
1185 err = reuseport_alloc(sk); 1186 err = reuseport_alloc(sk);
1186 if (err) 1187 if (err)
1187 return err; 1188 return err;
@@ -1245,7 +1246,8 @@ struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
1245 * occurs or there is insufficient memory for the filter a negative 1246 * occurs or there is insufficient memory for the filter a negative
1246 * errno code is returned. On success the return is zero. 1247 * errno code is returned. On success the return is zero.
1247 */ 1248 */
1248int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) 1249int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk,
1250 bool locked)
1249{ 1251{
1250 struct bpf_prog *prog = __get_filter(fprog, sk); 1252 struct bpf_prog *prog = __get_filter(fprog, sk);
1251 int err; 1253 int err;
@@ -1253,7 +1255,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1253 if (IS_ERR(prog)) 1255 if (IS_ERR(prog))
1254 return PTR_ERR(prog); 1256 return PTR_ERR(prog);
1255 1257
1256 err = __sk_attach_prog(prog, sk); 1258 err = __sk_attach_prog(prog, sk, locked);
1257 if (err < 0) { 1259 if (err < 0) {
1258 __bpf_prog_release(prog); 1260 __bpf_prog_release(prog);
1259 return err; 1261 return err;
@@ -1261,7 +1263,12 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1261 1263
1262 return 0; 1264 return 0;
1263} 1265}
1264EXPORT_SYMBOL_GPL(sk_attach_filter); 1266EXPORT_SYMBOL_GPL(__sk_attach_filter);
1267
1268int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1269{
1270 return __sk_attach_filter(fprog, sk, sock_owned_by_user(sk));
1271}
1265 1272
1266int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) 1273int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1267{ 1274{
@@ -1307,7 +1314,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
1307 if (IS_ERR(prog)) 1314 if (IS_ERR(prog))
1308 return PTR_ERR(prog); 1315 return PTR_ERR(prog);
1309 1316
1310 err = __sk_attach_prog(prog, sk); 1317 err = __sk_attach_prog(prog, sk, sock_owned_by_user(sk));
1311 if (err < 0) { 1318 if (err < 0) {
1312 bpf_prog_put(prog); 1319 bpf_prog_put(prog);
1313 return err; 1320 return err;
@@ -1333,18 +1340,25 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
1333 return 0; 1340 return 0;
1334} 1341}
1335 1342
1336#define BPF_LDST_LEN 16U 1343struct bpf_scratchpad {
1344 union {
1345 __be32 diff[MAX_BPF_STACK / sizeof(__be32)];
1346 u8 buff[MAX_BPF_STACK];
1347 };
1348};
1349
1350static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
1337 1351
1338static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) 1352static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
1339{ 1353{
1354 struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
1340 struct sk_buff *skb = (struct sk_buff *) (long) r1; 1355 struct sk_buff *skb = (struct sk_buff *) (long) r1;
1341 int offset = (int) r2; 1356 int offset = (int) r2;
1342 void *from = (void *) (long) r3; 1357 void *from = (void *) (long) r3;
1343 unsigned int len = (unsigned int) r4; 1358 unsigned int len = (unsigned int) r4;
1344 char buf[BPF_LDST_LEN];
1345 void *ptr; 1359 void *ptr;
1346 1360
1347 if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM))) 1361 if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
1348 return -EINVAL; 1362 return -EINVAL;
1349 1363
1350 /* bpf verifier guarantees that: 1364 /* bpf verifier guarantees that:
@@ -1355,14 +1369,12 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
1355 * 1369 *
1356 * so check for invalid 'offset' and too large 'len' 1370 * so check for invalid 'offset' and too large 'len'
1357 */ 1371 */
1358 if (unlikely((u32) offset > 0xffff || len > sizeof(buf))) 1372 if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff)))
1359 return -EFAULT; 1373 return -EFAULT;
1360 1374 if (unlikely(skb_try_make_writable(skb, offset + len)))
1361 if (unlikely(skb_cloned(skb) &&
1362 !skb_clone_writable(skb, offset + len)))
1363 return -EFAULT; 1375 return -EFAULT;
1364 1376
1365 ptr = skb_header_pointer(skb, offset, len, buf); 1377 ptr = skb_header_pointer(skb, offset, len, sp->buff);
1366 if (unlikely(!ptr)) 1378 if (unlikely(!ptr))
1367 return -EFAULT; 1379 return -EFAULT;
1368 1380
@@ -1371,17 +1383,19 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
1371 1383
1372 memcpy(ptr, from, len); 1384 memcpy(ptr, from, len);
1373 1385
1374 if (ptr == buf) 1386 if (ptr == sp->buff)
1375 /* skb_store_bits cannot return -EFAULT here */ 1387 /* skb_store_bits cannot return -EFAULT here */
1376 skb_store_bits(skb, offset, ptr, len); 1388 skb_store_bits(skb, offset, ptr, len);
1377 1389
1378 if (flags & BPF_F_RECOMPUTE_CSUM) 1390 if (flags & BPF_F_RECOMPUTE_CSUM)
1379 skb_postpush_rcsum(skb, ptr, len); 1391 skb_postpush_rcsum(skb, ptr, len);
1392 if (flags & BPF_F_INVALIDATE_HASH)
1393 skb_clear_hash(skb);
1380 1394
1381 return 0; 1395 return 0;
1382} 1396}
1383 1397
1384const struct bpf_func_proto bpf_skb_store_bytes_proto = { 1398static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
1385 .func = bpf_skb_store_bytes, 1399 .func = bpf_skb_store_bytes,
1386 .gpl_only = false, 1400 .gpl_only = false,
1387 .ret_type = RET_INTEGER, 1401 .ret_type = RET_INTEGER,
@@ -1400,7 +1414,7 @@ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
1400 unsigned int len = (unsigned int) r4; 1414 unsigned int len = (unsigned int) r4;
1401 void *ptr; 1415 void *ptr;
1402 1416
1403 if (unlikely((u32) offset > 0xffff || len > BPF_LDST_LEN)) 1417 if (unlikely((u32) offset > 0xffff || len > MAX_BPF_STACK))
1404 return -EFAULT; 1418 return -EFAULT;
1405 1419
1406 ptr = skb_header_pointer(skb, offset, len, to); 1420 ptr = skb_header_pointer(skb, offset, len, to);
@@ -1412,7 +1426,7 @@ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
1412 return 0; 1426 return 0;
1413} 1427}
1414 1428
1415const struct bpf_func_proto bpf_skb_load_bytes_proto = { 1429static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
1416 .func = bpf_skb_load_bytes, 1430 .func = bpf_skb_load_bytes,
1417 .gpl_only = false, 1431 .gpl_only = false,
1418 .ret_type = RET_INTEGER, 1432 .ret_type = RET_INTEGER,
@@ -1432,9 +1446,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1432 return -EINVAL; 1446 return -EINVAL;
1433 if (unlikely((u32) offset > 0xffff)) 1447 if (unlikely((u32) offset > 0xffff))
1434 return -EFAULT; 1448 return -EFAULT;
1435 1449 if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
1436 if (unlikely(skb_cloned(skb) &&
1437 !skb_clone_writable(skb, offset + sizeof(sum))))
1438 return -EFAULT; 1450 return -EFAULT;
1439 1451
1440 ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); 1452 ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1442,6 +1454,12 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1442 return -EFAULT; 1454 return -EFAULT;
1443 1455
1444 switch (flags & BPF_F_HDR_FIELD_MASK) { 1456 switch (flags & BPF_F_HDR_FIELD_MASK) {
1457 case 0:
1458 if (unlikely(from != 0))
1459 return -EINVAL;
1460
1461 csum_replace_by_diff(ptr, to);
1462 break;
1445 case 2: 1463 case 2:
1446 csum_replace2(ptr, from, to); 1464 csum_replace2(ptr, from, to);
1447 break; 1465 break;
@@ -1459,7 +1477,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1459 return 0; 1477 return 0;
1460} 1478}
1461 1479
1462const struct bpf_func_proto bpf_l3_csum_replace_proto = { 1480static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
1463 .func = bpf_l3_csum_replace, 1481 .func = bpf_l3_csum_replace,
1464 .gpl_only = false, 1482 .gpl_only = false,
1465 .ret_type = RET_INTEGER, 1483 .ret_type = RET_INTEGER,
@@ -1474,23 +1492,31 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1474{ 1492{
1475 struct sk_buff *skb = (struct sk_buff *) (long) r1; 1493 struct sk_buff *skb = (struct sk_buff *) (long) r1;
1476 bool is_pseudo = flags & BPF_F_PSEUDO_HDR; 1494 bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
1495 bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
1477 int offset = (int) r2; 1496 int offset = (int) r2;
1478 __sum16 sum, *ptr; 1497 __sum16 sum, *ptr;
1479 1498
1480 if (unlikely(flags & ~(BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK))) 1499 if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR |
1500 BPF_F_HDR_FIELD_MASK)))
1481 return -EINVAL; 1501 return -EINVAL;
1482 if (unlikely((u32) offset > 0xffff)) 1502 if (unlikely((u32) offset > 0xffff))
1483 return -EFAULT; 1503 return -EFAULT;
1484 1504 if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
1485 if (unlikely(skb_cloned(skb) &&
1486 !skb_clone_writable(skb, offset + sizeof(sum))))
1487 return -EFAULT; 1505 return -EFAULT;
1488 1506
1489 ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); 1507 ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
1490 if (unlikely(!ptr)) 1508 if (unlikely(!ptr))
1491 return -EFAULT; 1509 return -EFAULT;
1510 if (is_mmzero && !*ptr)
1511 return 0;
1492 1512
1493 switch (flags & BPF_F_HDR_FIELD_MASK) { 1513 switch (flags & BPF_F_HDR_FIELD_MASK) {
1514 case 0:
1515 if (unlikely(from != 0))
1516 return -EINVAL;
1517
1518 inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
1519 break;
1494 case 2: 1520 case 2:
1495 inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); 1521 inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
1496 break; 1522 break;
@@ -1501,6 +1527,8 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1501 return -EINVAL; 1527 return -EINVAL;
1502 } 1528 }
1503 1529
1530 if (is_mmzero && !*ptr)
1531 *ptr = CSUM_MANGLED_0;
1504 if (ptr == &sum) 1532 if (ptr == &sum)
1505 /* skb_store_bits guaranteed to not return -EFAULT here */ 1533 /* skb_store_bits guaranteed to not return -EFAULT here */
1506 skb_store_bits(skb, offset, ptr, sizeof(sum)); 1534 skb_store_bits(skb, offset, ptr, sizeof(sum));
@@ -1508,7 +1536,7 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1508 return 0; 1536 return 0;
1509} 1537}
1510 1538
1511const struct bpf_func_proto bpf_l4_csum_replace_proto = { 1539static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
1512 .func = bpf_l4_csum_replace, 1540 .func = bpf_l4_csum_replace,
1513 .gpl_only = false, 1541 .gpl_only = false,
1514 .ret_type = RET_INTEGER, 1542 .ret_type = RET_INTEGER,
@@ -1519,6 +1547,45 @@ const struct bpf_func_proto bpf_l4_csum_replace_proto = {
1519 .arg5_type = ARG_ANYTHING, 1547 .arg5_type = ARG_ANYTHING,
1520}; 1548};
1521 1549
1550static u64 bpf_csum_diff(u64 r1, u64 from_size, u64 r3, u64 to_size, u64 seed)
1551{
1552 struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
1553 u64 diff_size = from_size + to_size;
1554 __be32 *from = (__be32 *) (long) r1;
1555 __be32 *to = (__be32 *) (long) r3;
1556 int i, j = 0;
1557
1558 /* This is quite flexible, some examples:
1559 *
1560 * from_size == 0, to_size > 0, seed := csum --> pushing data
1561 * from_size > 0, to_size == 0, seed := csum --> pulling data
1562 * from_size > 0, to_size > 0, seed := 0 --> diffing data
1563 *
1564 * Even for diffing, from_size and to_size don't need to be equal.
1565 */
1566 if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
1567 diff_size > sizeof(sp->diff)))
1568 return -EINVAL;
1569
1570 for (i = 0; i < from_size / sizeof(__be32); i++, j++)
1571 sp->diff[j] = ~from[i];
1572 for (i = 0; i < to_size / sizeof(__be32); i++, j++)
1573 sp->diff[j] = to[i];
1574
1575 return csum_partial(sp->diff, diff_size, seed);
1576}
1577
1578static const struct bpf_func_proto bpf_csum_diff_proto = {
1579 .func = bpf_csum_diff,
1580 .gpl_only = false,
1581 .ret_type = RET_INTEGER,
1582 .arg1_type = ARG_PTR_TO_STACK,
1583 .arg2_type = ARG_CONST_STACK_SIZE_OR_ZERO,
1584 .arg3_type = ARG_PTR_TO_STACK,
1585 .arg4_type = ARG_CONST_STACK_SIZE_OR_ZERO,
1586 .arg5_type = ARG_ANYTHING,
1587};
1588
1522static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) 1589static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
1523{ 1590{
1524 struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2; 1591 struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2;
@@ -1543,11 +1610,10 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
1543 } 1610 }
1544 1611
1545 skb2->dev = dev; 1612 skb2->dev = dev;
1546 skb_sender_cpu_clear(skb2);
1547 return dev_queue_xmit(skb2); 1613 return dev_queue_xmit(skb2);
1548} 1614}
1549 1615
1550const struct bpf_func_proto bpf_clone_redirect_proto = { 1616static const struct bpf_func_proto bpf_clone_redirect_proto = {
1551 .func = bpf_clone_redirect, 1617 .func = bpf_clone_redirect,
1552 .gpl_only = false, 1618 .gpl_only = false,
1553 .ret_type = RET_INTEGER, 1619 .ret_type = RET_INTEGER,
@@ -1596,11 +1662,10 @@ int skb_do_redirect(struct sk_buff *skb)
1596 } 1662 }
1597 1663
1598 skb->dev = dev; 1664 skb->dev = dev;
1599 skb_sender_cpu_clear(skb);
1600 return dev_queue_xmit(skb); 1665 return dev_queue_xmit(skb);
1601} 1666}
1602 1667
1603const struct bpf_func_proto bpf_redirect_proto = { 1668static const struct bpf_func_proto bpf_redirect_proto = {
1604 .func = bpf_redirect, 1669 .func = bpf_redirect,
1605 .gpl_only = false, 1670 .gpl_only = false,
1606 .ret_type = RET_INTEGER, 1671 .ret_type = RET_INTEGER,
@@ -1622,14 +1687,7 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
1622 1687
1623static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 1688static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
1624{ 1689{
1625#ifdef CONFIG_IP_ROUTE_CLASSID 1690 return dst_tclassid((struct sk_buff *) (unsigned long) r1);
1626 const struct dst_entry *dst;
1627
1628 dst = skb_dst((struct sk_buff *) (unsigned long) r1);
1629 if (dst)
1630 return dst->tclassid;
1631#endif
1632 return 0;
1633} 1691}
1634 1692
1635static const struct bpf_func_proto bpf_get_route_realm_proto = { 1693static const struct bpf_func_proto bpf_get_route_realm_proto = {
@@ -1682,6 +1740,13 @@ bool bpf_helper_changes_skb_data(void *func)
1682 return true; 1740 return true;
1683 if (func == bpf_skb_vlan_pop) 1741 if (func == bpf_skb_vlan_pop)
1684 return true; 1742 return true;
1743 if (func == bpf_skb_store_bytes)
1744 return true;
1745 if (func == bpf_l3_csum_replace)
1746 return true;
1747 if (func == bpf_l4_csum_replace)
1748 return true;
1749
1685 return false; 1750 return false;
1686} 1751}
1687 1752
@@ -1703,12 +1768,16 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1703 return -EPROTO; 1768 return -EPROTO;
1704 if (unlikely(size != sizeof(struct bpf_tunnel_key))) { 1769 if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
1705 switch (size) { 1770 switch (size) {
1771 case offsetof(struct bpf_tunnel_key, tunnel_label):
1772 case offsetof(struct bpf_tunnel_key, tunnel_ext):
1773 goto set_compat;
1706 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): 1774 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
1707 /* Fixup deprecated structure layouts here, so we have 1775 /* Fixup deprecated structure layouts here, so we have
1708 * a common path later on. 1776 * a common path later on.
1709 */ 1777 */
1710 if (ip_tunnel_info_af(info) != AF_INET) 1778 if (ip_tunnel_info_af(info) != AF_INET)
1711 return -EINVAL; 1779 return -EINVAL;
1780set_compat:
1712 to = (struct bpf_tunnel_key *)compat; 1781 to = (struct bpf_tunnel_key *)compat;
1713 break; 1782 break;
1714 default: 1783 default:
@@ -1720,11 +1789,13 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1720 to->tunnel_tos = info->key.tos; 1789 to->tunnel_tos = info->key.tos;
1721 to->tunnel_ttl = info->key.ttl; 1790 to->tunnel_ttl = info->key.ttl;
1722 1791
1723 if (flags & BPF_F_TUNINFO_IPV6) 1792 if (flags & BPF_F_TUNINFO_IPV6) {
1724 memcpy(to->remote_ipv6, &info->key.u.ipv6.src, 1793 memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
1725 sizeof(to->remote_ipv6)); 1794 sizeof(to->remote_ipv6));
1726 else 1795 to->tunnel_label = be32_to_cpu(info->key.label);
1796 } else {
1727 to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); 1797 to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
1798 }
1728 1799
1729 if (unlikely(size != sizeof(struct bpf_tunnel_key))) 1800 if (unlikely(size != sizeof(struct bpf_tunnel_key)))
1730 memcpy((void *)(long) r2, to, size); 1801 memcpy((void *)(long) r2, to, size);
@@ -1732,7 +1803,7 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1732 return 0; 1803 return 0;
1733} 1804}
1734 1805
1735const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = { 1806static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
1736 .func = bpf_skb_get_tunnel_key, 1807 .func = bpf_skb_get_tunnel_key,
1737 .gpl_only = false, 1808 .gpl_only = false,
1738 .ret_type = RET_INTEGER, 1809 .ret_type = RET_INTEGER,
@@ -1742,6 +1813,32 @@ const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
1742 .arg4_type = ARG_ANYTHING, 1813 .arg4_type = ARG_ANYTHING,
1743}; 1814};
1744 1815
1816static u64 bpf_skb_get_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5)
1817{
1818 struct sk_buff *skb = (struct sk_buff *) (long) r1;
1819 u8 *to = (u8 *) (long) r2;
1820 const struct ip_tunnel_info *info = skb_tunnel_info(skb);
1821
1822 if (unlikely(!info ||
1823 !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT)))
1824 return -ENOENT;
1825 if (unlikely(size < info->options_len))
1826 return -ENOMEM;
1827
1828 ip_tunnel_info_opts_get(to, info);
1829
1830 return info->options_len;
1831}
1832
1833static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
1834 .func = bpf_skb_get_tunnel_opt,
1835 .gpl_only = false,
1836 .ret_type = RET_INTEGER,
1837 .arg1_type = ARG_PTR_TO_CTX,
1838 .arg2_type = ARG_PTR_TO_STACK,
1839 .arg3_type = ARG_CONST_STACK_SIZE,
1840};
1841
1745static struct metadata_dst __percpu *md_dst; 1842static struct metadata_dst __percpu *md_dst;
1746 1843
1747static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) 1844static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
@@ -1752,10 +1849,13 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1752 u8 compat[sizeof(struct bpf_tunnel_key)]; 1849 u8 compat[sizeof(struct bpf_tunnel_key)];
1753 struct ip_tunnel_info *info; 1850 struct ip_tunnel_info *info;
1754 1851
1755 if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX))) 1852 if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
1853 BPF_F_DONT_FRAGMENT)))
1756 return -EINVAL; 1854 return -EINVAL;
1757 if (unlikely(size != sizeof(struct bpf_tunnel_key))) { 1855 if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
1758 switch (size) { 1856 switch (size) {
1857 case offsetof(struct bpf_tunnel_key, tunnel_label):
1858 case offsetof(struct bpf_tunnel_key, tunnel_ext):
1759 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): 1859 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
1760 /* Fixup deprecated structure layouts here, so we have 1860 /* Fixup deprecated structure layouts here, so we have
1761 * a common path later on. 1861 * a common path later on.
@@ -1768,6 +1868,9 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1768 return -EINVAL; 1868 return -EINVAL;
1769 } 1869 }
1770 } 1870 }
1871 if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) ||
1872 from->tunnel_ext))
1873 return -EINVAL;
1771 1874
1772 skb_dst_drop(skb); 1875 skb_dst_drop(skb);
1773 dst_hold((struct dst_entry *) md); 1876 dst_hold((struct dst_entry *) md);
@@ -1776,7 +1879,10 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1776 info = &md->u.tun_info; 1879 info = &md->u.tun_info;
1777 info->mode = IP_TUNNEL_INFO_TX; 1880 info->mode = IP_TUNNEL_INFO_TX;
1778 1881
1779 info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM; 1882 info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
1883 if (flags & BPF_F_DONT_FRAGMENT)
1884 info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
1885
1780 info->key.tun_id = cpu_to_be64(from->tunnel_id); 1886 info->key.tun_id = cpu_to_be64(from->tunnel_id);
1781 info->key.tos = from->tunnel_tos; 1887 info->key.tos = from->tunnel_tos;
1782 info->key.ttl = from->tunnel_ttl; 1888 info->key.ttl = from->tunnel_ttl;
@@ -1785,6 +1891,8 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1785 info->mode |= IP_TUNNEL_INFO_IPV6; 1891 info->mode |= IP_TUNNEL_INFO_IPV6;
1786 memcpy(&info->key.u.ipv6.dst, from->remote_ipv6, 1892 memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
1787 sizeof(from->remote_ipv6)); 1893 sizeof(from->remote_ipv6));
1894 info->key.label = cpu_to_be32(from->tunnel_label) &
1895 IPV6_FLOWLABEL_MASK;
1788 } else { 1896 } else {
1789 info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); 1897 info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
1790 if (flags & BPF_F_ZERO_CSUM_TX) 1898 if (flags & BPF_F_ZERO_CSUM_TX)
@@ -1794,7 +1902,7 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1794 return 0; 1902 return 0;
1795} 1903}
1796 1904
1797const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = { 1905static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
1798 .func = bpf_skb_set_tunnel_key, 1906 .func = bpf_skb_set_tunnel_key,
1799 .gpl_only = false, 1907 .gpl_only = false,
1800 .ret_type = RET_INTEGER, 1908 .ret_type = RET_INTEGER,
@@ -1804,17 +1912,53 @@ const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
1804 .arg4_type = ARG_ANYTHING, 1912 .arg4_type = ARG_ANYTHING,
1805}; 1913};
1806 1914
1807static const struct bpf_func_proto *bpf_get_skb_set_tunnel_key_proto(void) 1915static u64 bpf_skb_set_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5)
1916{
1917 struct sk_buff *skb = (struct sk_buff *) (long) r1;
1918 u8 *from = (u8 *) (long) r2;
1919 struct ip_tunnel_info *info = skb_tunnel_info(skb);
1920 const struct metadata_dst *md = this_cpu_ptr(md_dst);
1921
1922 if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
1923 return -EINVAL;
1924 if (unlikely(size > IP_TUNNEL_OPTS_MAX))
1925 return -ENOMEM;
1926
1927 ip_tunnel_info_opts_set(info, from, size);
1928
1929 return 0;
1930}
1931
1932static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
1933 .func = bpf_skb_set_tunnel_opt,
1934 .gpl_only = false,
1935 .ret_type = RET_INTEGER,
1936 .arg1_type = ARG_PTR_TO_CTX,
1937 .arg2_type = ARG_PTR_TO_STACK,
1938 .arg3_type = ARG_CONST_STACK_SIZE,
1939};
1940
1941static const struct bpf_func_proto *
1942bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
1808{ 1943{
1809 if (!md_dst) { 1944 if (!md_dst) {
1810 /* race is not possible, since it's called from 1945 /* Race is not possible, since it's called from verifier
1811 * verifier that is holding verifier mutex 1946 * that is holding verifier mutex.
1812 */ 1947 */
1813 md_dst = metadata_dst_alloc_percpu(0, GFP_KERNEL); 1948 md_dst = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
1949 GFP_KERNEL);
1814 if (!md_dst) 1950 if (!md_dst)
1815 return NULL; 1951 return NULL;
1816 } 1952 }
1817 return &bpf_skb_set_tunnel_key_proto; 1953
1954 switch (which) {
1955 case BPF_FUNC_skb_set_tunnel_key:
1956 return &bpf_skb_set_tunnel_key_proto;
1957 case BPF_FUNC_skb_set_tunnel_opt:
1958 return &bpf_skb_set_tunnel_opt_proto;
1959 default:
1960 return NULL;
1961 }
1818} 1962}
1819 1963
1820static const struct bpf_func_proto * 1964static const struct bpf_func_proto *
@@ -1851,6 +1995,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
1851 return &bpf_skb_store_bytes_proto; 1995 return &bpf_skb_store_bytes_proto;
1852 case BPF_FUNC_skb_load_bytes: 1996 case BPF_FUNC_skb_load_bytes:
1853 return &bpf_skb_load_bytes_proto; 1997 return &bpf_skb_load_bytes_proto;
1998 case BPF_FUNC_csum_diff:
1999 return &bpf_csum_diff_proto;
1854 case BPF_FUNC_l3_csum_replace: 2000 case BPF_FUNC_l3_csum_replace:
1855 return &bpf_l3_csum_replace_proto; 2001 return &bpf_l3_csum_replace_proto;
1856 case BPF_FUNC_l4_csum_replace: 2002 case BPF_FUNC_l4_csum_replace:
@@ -1866,7 +2012,11 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
1866 case BPF_FUNC_skb_get_tunnel_key: 2012 case BPF_FUNC_skb_get_tunnel_key:
1867 return &bpf_skb_get_tunnel_key_proto; 2013 return &bpf_skb_get_tunnel_key_proto;
1868 case BPF_FUNC_skb_set_tunnel_key: 2014 case BPF_FUNC_skb_set_tunnel_key:
1869 return bpf_get_skb_set_tunnel_key_proto(); 2015 return bpf_get_skb_set_tunnel_proto(func_id);
2016 case BPF_FUNC_skb_get_tunnel_opt:
2017 return &bpf_skb_get_tunnel_opt_proto;
2018 case BPF_FUNC_skb_set_tunnel_opt:
2019 return bpf_get_skb_set_tunnel_proto(func_id);
1870 case BPF_FUNC_redirect: 2020 case BPF_FUNC_redirect:
1871 return &bpf_redirect_proto; 2021 return &bpf_redirect_proto;
1872 case BPF_FUNC_get_route_realm: 2022 case BPF_FUNC_get_route_realm:
@@ -1915,16 +2065,14 @@ static bool sk_filter_is_valid_access(int off, int size,
1915static bool tc_cls_act_is_valid_access(int off, int size, 2065static bool tc_cls_act_is_valid_access(int off, int size,
1916 enum bpf_access_type type) 2066 enum bpf_access_type type)
1917{ 2067{
1918 if (off == offsetof(struct __sk_buff, tc_classid))
1919 return type == BPF_WRITE ? true : false;
1920
1921 if (type == BPF_WRITE) { 2068 if (type == BPF_WRITE) {
1922 switch (off) { 2069 switch (off) {
1923 case offsetof(struct __sk_buff, mark): 2070 case offsetof(struct __sk_buff, mark):
1924 case offsetof(struct __sk_buff, tc_index): 2071 case offsetof(struct __sk_buff, tc_index):
1925 case offsetof(struct __sk_buff, priority): 2072 case offsetof(struct __sk_buff, priority):
1926 case offsetof(struct __sk_buff, cb[0]) ... 2073 case offsetof(struct __sk_buff, cb[0]) ...
1927 offsetof(struct __sk_buff, cb[4]): 2074 offsetof(struct __sk_buff, cb[4]):
2075 case offsetof(struct __sk_buff, tc_classid):
1928 break; 2076 break;
1929 default: 2077 default:
1930 return false; 2078 return false;
@@ -2041,8 +2189,10 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
2041 ctx_off -= offsetof(struct __sk_buff, tc_classid); 2189 ctx_off -= offsetof(struct __sk_buff, tc_classid);
2042 ctx_off += offsetof(struct sk_buff, cb); 2190 ctx_off += offsetof(struct sk_buff, cb);
2043 ctx_off += offsetof(struct qdisc_skb_cb, tc_classid); 2191 ctx_off += offsetof(struct qdisc_skb_cb, tc_classid);
2044 WARN_ON(type != BPF_WRITE); 2192 if (type == BPF_WRITE)
2045 *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off); 2193 *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
2194 else
2195 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
2046 break; 2196 break;
2047 2197
2048 case offsetof(struct __sk_buff, tc_index): 2198 case offsetof(struct __sk_buff, tc_index):
@@ -2105,7 +2255,7 @@ static int __init register_sk_filter_ops(void)
2105} 2255}
2106late_initcall(register_sk_filter_ops); 2256late_initcall(register_sk_filter_ops);
2107 2257
2108int sk_detach_filter(struct sock *sk) 2258int __sk_detach_filter(struct sock *sk, bool locked)
2109{ 2259{
2110 int ret = -ENOENT; 2260 int ret = -ENOENT;
2111 struct sk_filter *filter; 2261 struct sk_filter *filter;
@@ -2113,8 +2263,7 @@ int sk_detach_filter(struct sock *sk)
2113 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 2263 if (sock_flag(sk, SOCK_FILTER_LOCKED))
2114 return -EPERM; 2264 return -EPERM;
2115 2265
2116 filter = rcu_dereference_protected(sk->sk_filter, 2266 filter = rcu_dereference_protected(sk->sk_filter, locked);
2117 sock_owned_by_user(sk));
2118 if (filter) { 2267 if (filter) {
2119 RCU_INIT_POINTER(sk->sk_filter, NULL); 2268 RCU_INIT_POINTER(sk->sk_filter, NULL);
2120 sk_filter_uncharge(sk, filter); 2269 sk_filter_uncharge(sk, filter);
@@ -2123,7 +2272,12 @@ int sk_detach_filter(struct sock *sk)
2123 2272
2124 return ret; 2273 return ret;
2125} 2274}
2126EXPORT_SYMBOL_GPL(sk_detach_filter); 2275EXPORT_SYMBOL_GPL(__sk_detach_filter);
2276
2277int sk_detach_filter(struct sock *sk)
2278{
2279 return __sk_detach_filter(sk, sock_owned_by_user(sk));
2280}
2127 2281
2128int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, 2282int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
2129 unsigned int len) 2283 unsigned int len)
diff --git a/net/core/flow.c b/net/core/flow.c
index 1033725be40b..3937b1b68d5b 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -92,8 +92,11 @@ static void flow_cache_gc_task(struct work_struct *work)
92 list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list); 92 list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list);
93 spin_unlock_bh(&xfrm->flow_cache_gc_lock); 93 spin_unlock_bh(&xfrm->flow_cache_gc_lock);
94 94
95 list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) 95 list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) {
96 flow_entry_kill(fce, xfrm); 96 flow_entry_kill(fce, xfrm);
97 atomic_dec(&xfrm->flow_cache_gc_count);
98 WARN_ON(atomic_read(&xfrm->flow_cache_gc_count) < 0);
99 }
97} 100}
98 101
99static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, 102static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
@@ -101,6 +104,7 @@ static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
101 struct netns_xfrm *xfrm) 104 struct netns_xfrm *xfrm)
102{ 105{
103 if (deleted) { 106 if (deleted) {
107 atomic_add(deleted, &xfrm->flow_cache_gc_count);
104 fcp->hash_count -= deleted; 108 fcp->hash_count -= deleted;
105 spin_lock_bh(&xfrm->flow_cache_gc_lock); 109 spin_lock_bh(&xfrm->flow_cache_gc_lock);
106 list_splice_tail(gc_list, &xfrm->flow_cache_gc_list); 110 list_splice_tail(gc_list, &xfrm->flow_cache_gc_list);
@@ -232,6 +236,13 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
232 if (fcp->hash_count > fc->high_watermark) 236 if (fcp->hash_count > fc->high_watermark)
233 flow_cache_shrink(fc, fcp); 237 flow_cache_shrink(fc, fcp);
234 238
239 if (fcp->hash_count > 2 * fc->high_watermark ||
240 atomic_read(&net->xfrm.flow_cache_gc_count) > fc->high_watermark) {
241 atomic_inc(&net->xfrm.flow_cache_genid);
242 flo = ERR_PTR(-ENOBUFS);
243 goto ret_object;
244 }
245
235 fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); 246 fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
236 if (fle) { 247 if (fle) {
237 fle->net = net; 248 fle->net = net;
@@ -446,6 +457,7 @@ int flow_cache_init(struct net *net)
446 INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task); 457 INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task);
447 INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task); 458 INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task);
448 mutex_init(&net->xfrm.flow_flush_sem); 459 mutex_init(&net->xfrm.flow_flush_sem);
460 atomic_set(&net->xfrm.flow_cache_gc_count, 0);
449 461
450 fc->hash_shift = 10; 462 fc->hash_shift = 10;
451 fc->low_watermark = 2 * flow_cache_hash_size(fc); 463 fc->low_watermark = 2 * flow_cache_hash_size(fc);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 12e700332010..a669dea146c6 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -19,25 +19,12 @@
19#include <net/flow_dissector.h> 19#include <net/flow_dissector.h>
20#include <scsi/fc/fc_fcoe.h> 20#include <scsi/fc/fc_fcoe.h>
21 21
22static bool dissector_uses_key(const struct flow_dissector *flow_dissector,
23 enum flow_dissector_key_id key_id)
24{
25 return flow_dissector->used_keys & (1 << key_id);
26}
27
28static void dissector_set_key(struct flow_dissector *flow_dissector, 22static void dissector_set_key(struct flow_dissector *flow_dissector,
29 enum flow_dissector_key_id key_id) 23 enum flow_dissector_key_id key_id)
30{ 24{
31 flow_dissector->used_keys |= (1 << key_id); 25 flow_dissector->used_keys |= (1 << key_id);
32} 26}
33 27
34static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
35 enum flow_dissector_key_id key_id,
36 void *target_container)
37{
38 return ((char *) target_container) + flow_dissector->offset[key_id];
39}
40
41void skb_flow_dissector_init(struct flow_dissector *flow_dissector, 28void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
42 const struct flow_dissector_key *key, 29 const struct flow_dissector_key *key,
43 unsigned int key_count) 30 unsigned int key_count)
@@ -178,15 +165,16 @@ ip:
178 165
179 ip_proto = iph->protocol; 166 ip_proto = iph->protocol;
180 167
181 if (!dissector_uses_key(flow_dissector, 168 if (dissector_uses_key(flow_dissector,
182 FLOW_DISSECTOR_KEY_IPV4_ADDRS)) 169 FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
183 break; 170 key_addrs = skb_flow_dissector_target(flow_dissector,
171 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
172 target_container);
184 173
185 key_addrs = skb_flow_dissector_target(flow_dissector, 174 memcpy(&key_addrs->v4addrs, &iph->saddr,
186 FLOW_DISSECTOR_KEY_IPV4_ADDRS, target_container); 175 sizeof(key_addrs->v4addrs));
187 memcpy(&key_addrs->v4addrs, &iph->saddr, 176 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
188 sizeof(key_addrs->v4addrs)); 177 }
189 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
190 178
191 if (ip_is_fragment(iph)) { 179 if (ip_is_fragment(iph)) {
192 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 180 key_control->flags |= FLOW_DIS_IS_FRAGMENT;
@@ -219,13 +207,12 @@ ipv6:
219 207
220 if (dissector_uses_key(flow_dissector, 208 if (dissector_uses_key(flow_dissector,
221 FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { 209 FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
222 struct flow_dissector_key_ipv6_addrs *key_ipv6_addrs; 210 key_addrs = skb_flow_dissector_target(flow_dissector,
223 211 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
224 key_ipv6_addrs = skb_flow_dissector_target(flow_dissector, 212 target_container);
225 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
226 target_container);
227 213
228 memcpy(key_ipv6_addrs, &iph->saddr, sizeof(*key_ipv6_addrs)); 214 memcpy(&key_addrs->v6addrs, &iph->saddr,
215 sizeof(key_addrs->v6addrs));
229 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 216 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
230 } 217 }
231 218
@@ -339,8 +326,11 @@ mpls:
339 } 326 }
340 327
341 case htons(ETH_P_FCOE): 328 case htons(ETH_P_FCOE):
342 key_control->thoff = (u16)(nhoff + FCOE_HEADER_LEN); 329 if ((hlen - nhoff) < FCOE_HEADER_LEN)
343 /* fall through */ 330 goto out_bad;
331
332 nhoff += FCOE_HEADER_LEN;
333 goto out_good;
344 default: 334 default:
345 goto out_bad; 335 goto out_bad;
346 } 336 }
@@ -447,13 +437,12 @@ ip_proto_again:
447 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 437 key_control->flags |= FLOW_DIS_IS_FRAGMENT;
448 438
449 nhoff += sizeof(_fh); 439 nhoff += sizeof(_fh);
440 ip_proto = fh->nexthdr;
450 441
451 if (!(fh->frag_off & htons(IP6_OFFSET))) { 442 if (!(fh->frag_off & htons(IP6_OFFSET))) {
452 key_control->flags |= FLOW_DIS_FIRST_FRAG; 443 key_control->flags |= FLOW_DIS_FIRST_FRAG;
453 if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) { 444 if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG)
454 ip_proto = fh->nexthdr;
455 goto ip_proto_again; 445 goto ip_proto_again;
456 }
457 } 446 }
458 goto out_good; 447 goto out_good;
459 } 448 }
@@ -740,6 +729,11 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data,
740{ 729{
741 u32 poff = keys->control.thoff; 730 u32 poff = keys->control.thoff;
742 731
732 /* skip L4 headers for fragments after the first */
733 if ((keys->control.flags & FLOW_DIS_IS_FRAGMENT) &&
734 !(keys->control.flags & FLOW_DIS_FIRST_FRAG))
735 return poff;
736
743 switch (keys->basic.ip_proto) { 737 switch (keys->basic.ip_proto) {
744 case IPPROTO_TCP: { 738 case IPPROTO_TCP: {
745 /* access doff as u8 to avoid unaligned access */ 739 /* access doff as u8 to avoid unaligned access */
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 92d886f4adcb..4573d81093fe 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -191,6 +191,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
191/** 191/**
192 * gen_new_estimator - create a new rate estimator 192 * gen_new_estimator - create a new rate estimator
193 * @bstats: basic statistics 193 * @bstats: basic statistics
194 * @cpu_bstats: bstats per cpu
194 * @rate_est: rate estimator statistics 195 * @rate_est: rate estimator statistics
195 * @stats_lock: statistics lock 196 * @stats_lock: statistics lock
196 * @opt: rate estimator configuration TLV 197 * @opt: rate estimator configuration TLV
@@ -287,6 +288,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
287/** 288/**
288 * gen_replace_estimator - replace rate estimator configuration 289 * gen_replace_estimator - replace rate estimator configuration
289 * @bstats: basic statistics 290 * @bstats: basic statistics
291 * @cpu_bstats: bstats per cpu
290 * @rate_est: rate estimator statistics 292 * @rate_est: rate estimator statistics
291 * @stats_lock: statistics lock 293 * @stats_lock: statistics lock
292 * @opt: rate estimator configuration TLV 294 * @opt: rate estimator configuration TLV
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 1e2f46a69d50..e640462ea8bf 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -140,6 +140,7 @@ EXPORT_SYMBOL(__gnet_stats_copy_basic);
140/** 140/**
141 * gnet_stats_copy_basic - copy basic statistics into statistic TLV 141 * gnet_stats_copy_basic - copy basic statistics into statistic TLV
142 * @d: dumping handle 142 * @d: dumping handle
143 * @cpu: copy statistic per cpu
143 * @b: basic statistics 144 * @b: basic statistics
144 * 145 *
145 * Appends the basic statistics to the top level TLV created by 146 * Appends the basic statistics to the top level TLV created by
diff --git a/net/core/hwbm.c b/net/core/hwbm.c
new file mode 100644
index 000000000000..941c28486896
--- /dev/null
+++ b/net/core/hwbm.c
@@ -0,0 +1,87 @@
1/* Support for hardware buffer manager.
2 *
3 * Copyright (C) 2016 Marvell
4 *
5 * Gregory CLEMENT <gregory.clement@free-electrons.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 */
12#include <linux/kernel.h>
13#include <linux/printk.h>
14#include <linux/skbuff.h>
15#include <net/hwbm.h>
16
17void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf)
18{
19 if (likely(bm_pool->frag_size <= PAGE_SIZE))
20 skb_free_frag(buf);
21 else
22 kfree(buf);
23}
24EXPORT_SYMBOL_GPL(hwbm_buf_free);
25
26/* Refill processing for HW buffer management */
27int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp)
28{
29 int frag_size = bm_pool->frag_size;
30 void *buf;
31
32 if (likely(frag_size <= PAGE_SIZE))
33 buf = netdev_alloc_frag(frag_size);
34 else
35 buf = kmalloc(frag_size, gfp);
36
37 if (!buf)
38 return -ENOMEM;
39
40 if (bm_pool->construct)
41 if (bm_pool->construct(bm_pool, buf)) {
42 hwbm_buf_free(bm_pool, buf);
43 return -ENOMEM;
44 }
45
46 return 0;
47}
48EXPORT_SYMBOL_GPL(hwbm_pool_refill);
49
50int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp)
51{
52 int err, i;
53 unsigned long flags;
54
55 spin_lock_irqsave(&bm_pool->lock, flags);
56 if (bm_pool->buf_num == bm_pool->size) {
57 pr_warn("pool already filled\n");
58 return bm_pool->buf_num;
59 }
60
61 if (buf_num + bm_pool->buf_num > bm_pool->size) {
62 pr_warn("cannot allocate %d buffers for pool\n",
63 buf_num);
64 return 0;
65 }
66
67 if ((buf_num + bm_pool->buf_num) < bm_pool->buf_num) {
68 pr_warn("Adding %d buffers to the %d current buffers will overflow\n",
69 buf_num, bm_pool->buf_num);
70 return 0;
71 }
72
73 for (i = 0; i < buf_num; i++) {
74 err = hwbm_pool_refill(bm_pool, gfp);
75 if (err < 0)
76 break;
77 }
78
79 /* Update BM driver with number of buffers added to pool */
80 bm_pool->buf_num += i;
81
82 pr_debug("hwpm pool: %d of %d buffers added\n", i, buf_num);
83 spin_unlock_irqrestore(&bm_pool->lock, flags);
84
85 return i;
86}
87EXPORT_SYMBOL_GPL(hwbm_pool_add);
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 299cfc24d888..669ecc9f884e 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -27,6 +27,31 @@
27#include <net/rtnetlink.h> 27#include <net/rtnetlink.h>
28#include <net/ip6_fib.h> 28#include <net/ip6_fib.h>
29 29
30#ifdef CONFIG_MODULES
31
32static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
33{
34 /* Only lwt encaps implemented without using an interface for
35 * the encap need to return a string here.
36 */
37 switch (encap_type) {
38 case LWTUNNEL_ENCAP_MPLS:
39 return "MPLS";
40 case LWTUNNEL_ENCAP_ILA:
41 return "ILA";
42 case LWTUNNEL_ENCAP_IP6:
43 case LWTUNNEL_ENCAP_IP:
44 case LWTUNNEL_ENCAP_NONE:
45 case __LWTUNNEL_ENCAP_MAX:
46 /* should not have got here */
47 WARN_ON(1);
48 break;
49 }
50 return NULL;
51}
52
53#endif /* CONFIG_MODULES */
54
30struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) 55struct lwtunnel_state *lwtunnel_state_alloc(int encap_len)
31{ 56{
32 struct lwtunnel_state *lws; 57 struct lwtunnel_state *lws;
@@ -85,6 +110,18 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
85 ret = -EOPNOTSUPP; 110 ret = -EOPNOTSUPP;
86 rcu_read_lock(); 111 rcu_read_lock();
87 ops = rcu_dereference(lwtun_encaps[encap_type]); 112 ops = rcu_dereference(lwtun_encaps[encap_type]);
113#ifdef CONFIG_MODULES
114 if (!ops) {
115 const char *encap_type_str = lwtunnel_encap_str(encap_type);
116
117 if (encap_type_str) {
118 rcu_read_unlock();
119 request_module("rtnl-lwt-%s", encap_type_str);
120 rcu_read_lock();
121 ops = rcu_dereference(lwtun_encaps[encap_type]);
122 }
123 }
124#endif
88 if (likely(ops && ops->build_state)) 125 if (likely(ops && ops->build_state))
89 ret = ops->build_state(dev, encap, family, cfg, lws); 126 ret = ops->build_state(dev, encap, family, cfg, lws);
90 rcu_read_unlock(); 127 rcu_read_unlock();
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b6c8a6629b39..2b3f76fe65f4 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -29,7 +29,6 @@
29 29
30#ifdef CONFIG_SYSFS 30#ifdef CONFIG_SYSFS
31static const char fmt_hex[] = "%#x\n"; 31static const char fmt_hex[] = "%#x\n";
32static const char fmt_long_hex[] = "%#lx\n";
33static const char fmt_dec[] = "%d\n"; 32static const char fmt_dec[] = "%d\n";
34static const char fmt_ulong[] = "%lu\n"; 33static const char fmt_ulong[] = "%lu\n";
35static const char fmt_u64[] = "%llu\n"; 34static const char fmt_u64[] = "%llu\n";
@@ -199,9 +198,10 @@ static ssize_t speed_show(struct device *dev,
199 return restart_syscall(); 198 return restart_syscall();
200 199
201 if (netif_running(netdev)) { 200 if (netif_running(netdev)) {
202 struct ethtool_cmd cmd; 201 struct ethtool_link_ksettings cmd;
203 if (!__ethtool_get_settings(netdev, &cmd)) 202
204 ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd)); 203 if (!__ethtool_get_link_ksettings(netdev, &cmd))
204 ret = sprintf(buf, fmt_dec, cmd.base.speed);
205 } 205 }
206 rtnl_unlock(); 206 rtnl_unlock();
207 return ret; 207 return ret;
@@ -218,10 +218,12 @@ static ssize_t duplex_show(struct device *dev,
218 return restart_syscall(); 218 return restart_syscall();
219 219
220 if (netif_running(netdev)) { 220 if (netif_running(netdev)) {
221 struct ethtool_cmd cmd; 221 struct ethtool_link_ksettings cmd;
222 if (!__ethtool_get_settings(netdev, &cmd)) { 222
223 if (!__ethtool_get_link_ksettings(netdev, &cmd)) {
223 const char *duplex; 224 const char *duplex;
224 switch (cmd.duplex) { 225
226 switch (cmd.base.duplex) {
225 case DUPLEX_HALF: 227 case DUPLEX_HALF:
226 duplex = "half"; 228 duplex = "half";
227 break; 229 break;
@@ -574,6 +576,7 @@ NETSTAT_ENTRY(tx_heartbeat_errors);
574NETSTAT_ENTRY(tx_window_errors); 576NETSTAT_ENTRY(tx_window_errors);
575NETSTAT_ENTRY(rx_compressed); 577NETSTAT_ENTRY(rx_compressed);
576NETSTAT_ENTRY(tx_compressed); 578NETSTAT_ENTRY(tx_compressed);
579NETSTAT_ENTRY(rx_nohandler);
577 580
578static struct attribute *netstat_attrs[] = { 581static struct attribute *netstat_attrs[] = {
579 &dev_attr_rx_packets.attr, 582 &dev_attr_rx_packets.attr,
@@ -599,6 +602,7 @@ static struct attribute *netstat_attrs[] = {
599 &dev_attr_tx_window_errors.attr, 602 &dev_attr_tx_window_errors.attr,
600 &dev_attr_rx_compressed.attr, 603 &dev_attr_rx_compressed.attr,
601 &dev_attr_tx_compressed.attr, 604 &dev_attr_tx_compressed.attr,
605 &dev_attr_rx_nohandler.attr,
602 NULL 606 NULL
603}; 607};
604 608
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 0260c84ed83c..11fce17274f6 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -9,7 +9,6 @@
9 * Authors: Thomas Graf <tgraf@suug.ch> 9 * Authors: Thomas Graf <tgraf@suug.ch>
10 */ 10 */
11 11
12#include <linux/module.h>
13#include <linux/slab.h> 12#include <linux/slab.h>
14#include <linux/cgroup.h> 13#include <linux/cgroup.h>
15#include <linux/fdtable.h> 14#include <linux/fdtable.h>
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index f1efbc39ef6b..2ec86fc552df 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -11,7 +11,6 @@
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 13
14#include <linux/module.h>
15#include <linux/slab.h> 14#include <linux/slab.h>
16#include <linux/types.h> 15#include <linux/types.h>
17#include <linux/string.h> 16#include <linux/string.h>
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 1474cfd2dc1c..20999aa596dd 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2856,7 +2856,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2856 *vlan_encapsulated_proto = htons(ETH_P_IP); 2856 *vlan_encapsulated_proto = htons(ETH_P_IP);
2857 } 2857 }
2858 2858
2859 skb_set_mac_header(skb, 0); 2859 skb_reset_mac_header(skb);
2860 skb_set_network_header(skb, skb->len); 2860 skb_set_network_header(skb, skb->len);
2861 iph = (struct iphdr *) skb_put(skb, sizeof(struct iphdr)); 2861 iph = (struct iphdr *) skb_put(skb, sizeof(struct iphdr));
2862 2862
@@ -2983,7 +2983,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2983 *vlan_encapsulated_proto = htons(ETH_P_IPV6); 2983 *vlan_encapsulated_proto = htons(ETH_P_IPV6);
2984 } 2984 }
2985 2985
2986 skb_set_mac_header(skb, 0); 2986 skb_reset_mac_header(skb);
2987 skb_set_network_header(skb, skb->len); 2987 skb_set_network_header(skb, skb->len);
2988 iph = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr)); 2988 iph = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
2989 2989
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 8261d95dd846..65763c29f845 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -804,6 +804,8 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
804 804
805 a->rx_compressed = b->rx_compressed; 805 a->rx_compressed = b->rx_compressed;
806 a->tx_compressed = b->tx_compressed; 806 a->tx_compressed = b->tx_compressed;
807
808 a->rx_nohandler = b->rx_nohandler;
807} 809}
808 810
809static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) 811static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
@@ -893,6 +895,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
893 + nla_total_size(4) /* IFLA_PROMISCUITY */ 895 + nla_total_size(4) /* IFLA_PROMISCUITY */
894 + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */ 896 + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */
895 + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ 897 + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */
898 + nla_total_size(4) /* IFLA_MAX_GSO_SEGS */
899 + nla_total_size(4) /* IFLA_MAX_GSO_SIZE */
896 + nla_total_size(1) /* IFLA_OPERSTATE */ 900 + nla_total_size(1) /* IFLA_OPERSTATE */
897 + nla_total_size(1) /* IFLA_LINKMODE */ 901 + nla_total_size(1) /* IFLA_LINKMODE */
898 + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ 902 + nla_total_size(4) /* IFLA_CARRIER_CHANGES */
@@ -905,6 +909,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
905 + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */ 909 + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */
906 + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ 910 + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
907 + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ 911 + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
912 + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
908 + nla_total_size(1); /* IFLA_PROTO_DOWN */ 913 + nla_total_size(1); /* IFLA_PROTO_DOWN */
909 914
910} 915}
@@ -1175,14 +1180,16 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
1175 1180
1176static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) 1181static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
1177{ 1182{
1178 struct rtnl_link_ifmap map = { 1183 struct rtnl_link_ifmap map;
1179 .mem_start = dev->mem_start, 1184
1180 .mem_end = dev->mem_end, 1185 memset(&map, 0, sizeof(map));
1181 .base_addr = dev->base_addr, 1186 map.mem_start = dev->mem_start;
1182 .irq = dev->irq, 1187 map.mem_end = dev->mem_end;
1183 .dma = dev->dma, 1188 map.base_addr = dev->base_addr;
1184 .port = dev->if_port, 1189 map.irq = dev->irq;
1185 }; 1190 map.dma = dev->dma;
1191 map.port = dev->if_port;
1192
1186 if (nla_put(skb, IFLA_MAP, sizeof(map), &map)) 1193 if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
1187 return -EMSGSIZE; 1194 return -EMSGSIZE;
1188 1195
@@ -1221,6 +1228,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1221 nla_put_u32(skb, IFLA_GROUP, dev->group) || 1228 nla_put_u32(skb, IFLA_GROUP, dev->group) ||
1222 nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) || 1229 nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) ||
1223 nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) || 1230 nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) ||
1231 nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) ||
1232 nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) ||
1224#ifdef CONFIG_RPS 1233#ifdef CONFIG_RPS
1225 nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || 1234 nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
1226#endif 1235#endif
@@ -1387,15 +1396,8 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
1387 [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) }, 1396 [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) },
1388 [IFLA_VF_STATS] = { .type = NLA_NESTED }, 1397 [IFLA_VF_STATS] = { .type = NLA_NESTED },
1389 [IFLA_VF_TRUST] = { .len = sizeof(struct ifla_vf_trust) }, 1398 [IFLA_VF_TRUST] = { .len = sizeof(struct ifla_vf_trust) },
1390}; 1399 [IFLA_VF_IB_NODE_GUID] = { .len = sizeof(struct ifla_vf_guid) },
1391 1400 [IFLA_VF_IB_PORT_GUID] = { .len = sizeof(struct ifla_vf_guid) },
1392static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = {
1393 [IFLA_VF_STATS_RX_PACKETS] = { .type = NLA_U64 },
1394 [IFLA_VF_STATS_TX_PACKETS] = { .type = NLA_U64 },
1395 [IFLA_VF_STATS_RX_BYTES] = { .type = NLA_U64 },
1396 [IFLA_VF_STATS_TX_BYTES] = { .type = NLA_U64 },
1397 [IFLA_VF_STATS_BROADCAST] = { .type = NLA_U64 },
1398 [IFLA_VF_STATS_MULTICAST] = { .type = NLA_U64 },
1399}; 1401};
1400 1402
1401static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { 1403static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
@@ -1412,6 +1414,58 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
1412 [IFLA_PORT_RESPONSE] = { .type = NLA_U16, }, 1414 [IFLA_PORT_RESPONSE] = { .type = NLA_U16, },
1413}; 1415};
1414 1416
1417static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
1418{
1419 const struct rtnl_link_ops *ops = NULL;
1420 struct nlattr *linfo[IFLA_INFO_MAX + 1];
1421
1422 if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla, ifla_info_policy) < 0)
1423 return NULL;
1424
1425 if (linfo[IFLA_INFO_KIND]) {
1426 char kind[MODULE_NAME_LEN];
1427
1428 nla_strlcpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind));
1429 ops = rtnl_link_ops_get(kind);
1430 }
1431
1432 return ops;
1433}
1434
1435static bool link_master_filtered(struct net_device *dev, int master_idx)
1436{
1437 struct net_device *master;
1438
1439 if (!master_idx)
1440 return false;
1441
1442 master = netdev_master_upper_dev_get(dev);
1443 if (!master || master->ifindex != master_idx)
1444 return true;
1445
1446 return false;
1447}
1448
1449static bool link_kind_filtered(const struct net_device *dev,
1450 const struct rtnl_link_ops *kind_ops)
1451{
1452 if (kind_ops && dev->rtnl_link_ops != kind_ops)
1453 return true;
1454
1455 return false;
1456}
1457
1458static bool link_dump_filtered(struct net_device *dev,
1459 int master_idx,
1460 const struct rtnl_link_ops *kind_ops)
1461{
1462 if (link_master_filtered(dev, master_idx) ||
1463 link_kind_filtered(dev, kind_ops))
1464 return true;
1465
1466 return false;
1467}
1468
1415static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) 1469static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1416{ 1470{
1417 struct net *net = sock_net(skb->sk); 1471 struct net *net = sock_net(skb->sk);
@@ -1421,6 +1475,9 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1421 struct hlist_head *head; 1475 struct hlist_head *head;
1422 struct nlattr *tb[IFLA_MAX+1]; 1476 struct nlattr *tb[IFLA_MAX+1];
1423 u32 ext_filter_mask = 0; 1477 u32 ext_filter_mask = 0;
1478 const struct rtnl_link_ops *kind_ops = NULL;
1479 unsigned int flags = NLM_F_MULTI;
1480 int master_idx = 0;
1424 int err; 1481 int err;
1425 int hdrlen; 1482 int hdrlen;
1426 1483
@@ -1443,18 +1500,29 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1443 1500
1444 if (tb[IFLA_EXT_MASK]) 1501 if (tb[IFLA_EXT_MASK])
1445 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); 1502 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
1503
1504 if (tb[IFLA_MASTER])
1505 master_idx = nla_get_u32(tb[IFLA_MASTER]);
1506
1507 if (tb[IFLA_LINKINFO])
1508 kind_ops = linkinfo_to_kind_ops(tb[IFLA_LINKINFO]);
1509
1510 if (master_idx || kind_ops)
1511 flags |= NLM_F_DUMP_FILTERED;
1446 } 1512 }
1447 1513
1448 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1514 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1449 idx = 0; 1515 idx = 0;
1450 head = &net->dev_index_head[h]; 1516 head = &net->dev_index_head[h];
1451 hlist_for_each_entry(dev, head, index_hlist) { 1517 hlist_for_each_entry(dev, head, index_hlist) {
1518 if (link_dump_filtered(dev, master_idx, kind_ops))
1519 continue;
1452 if (idx < s_idx) 1520 if (idx < s_idx)
1453 goto cont; 1521 goto cont;
1454 err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, 1522 err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
1455 NETLINK_CB(cb->skb).portid, 1523 NETLINK_CB(cb->skb).portid,
1456 cb->nlh->nlmsg_seq, 0, 1524 cb->nlh->nlmsg_seq, 0,
1457 NLM_F_MULTI, 1525 flags,
1458 ext_filter_mask); 1526 ext_filter_mask);
1459 /* If we ran out of room on the first message, 1527 /* If we ran out of room on the first message,
1460 * we're in trouble 1528 * we're in trouble
@@ -1534,6 +1602,22 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
1534 return 0; 1602 return 0;
1535} 1603}
1536 1604
1605static int handle_infiniband_guid(struct net_device *dev, struct ifla_vf_guid *ivt,
1606 int guid_type)
1607{
1608 const struct net_device_ops *ops = dev->netdev_ops;
1609
1610 return ops->ndo_set_vf_guid(dev, ivt->vf, ivt->guid, guid_type);
1611}
1612
1613static int handle_vf_guid(struct net_device *dev, struct ifla_vf_guid *ivt, int guid_type)
1614{
1615 if (dev->type != ARPHRD_INFINIBAND)
1616 return -EOPNOTSUPP;
1617
1618 return handle_infiniband_guid(dev, ivt, guid_type);
1619}
1620
1537static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) 1621static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
1538{ 1622{
1539 const struct net_device_ops *ops = dev->netdev_ops; 1623 const struct net_device_ops *ops = dev->netdev_ops;
@@ -1636,6 +1720,24 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
1636 return err; 1720 return err;
1637 } 1721 }
1638 1722
1723 if (tb[IFLA_VF_IB_NODE_GUID]) {
1724 struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_NODE_GUID]);
1725
1726 if (!ops->ndo_set_vf_guid)
1727 return -EOPNOTSUPP;
1728
1729 return handle_vf_guid(dev, ivt, IFLA_VF_IB_NODE_GUID);
1730 }
1731
1732 if (tb[IFLA_VF_IB_PORT_GUID]) {
1733 struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_PORT_GUID]);
1734
1735 if (!ops->ndo_set_vf_guid)
1736 return -EOPNOTSUPP;
1737
1738 return handle_vf_guid(dev, ivt, IFLA_VF_IB_PORT_GUID);
1739 }
1740
1639 return err; 1741 return err;
1640} 1742}
1641 1743
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8616d1147c93..e561f9f07d6d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -349,8 +349,16 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
349} 349}
350EXPORT_SYMBOL(build_skb); 350EXPORT_SYMBOL(build_skb);
351 351
352#define NAPI_SKB_CACHE_SIZE 64
353
354struct napi_alloc_cache {
355 struct page_frag_cache page;
356 size_t skb_count;
357 void *skb_cache[NAPI_SKB_CACHE_SIZE];
358};
359
352static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); 360static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
353static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache); 361static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
354 362
355static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) 363static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
356{ 364{
@@ -380,9 +388,9 @@ EXPORT_SYMBOL(netdev_alloc_frag);
380 388
381static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) 389static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
382{ 390{
383 struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); 391 struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
384 392
385 return __alloc_page_frag(nc, fragsz, gfp_mask); 393 return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
386} 394}
387 395
388void *napi_alloc_frag(unsigned int fragsz) 396void *napi_alloc_frag(unsigned int fragsz)
@@ -476,7 +484,7 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
476struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, 484struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
477 gfp_t gfp_mask) 485 gfp_t gfp_mask)
478{ 486{
479 struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); 487 struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
480 struct sk_buff *skb; 488 struct sk_buff *skb;
481 void *data; 489 void *data;
482 490
@@ -496,7 +504,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
496 if (sk_memalloc_socks()) 504 if (sk_memalloc_socks())
497 gfp_mask |= __GFP_MEMALLOC; 505 gfp_mask |= __GFP_MEMALLOC;
498 506
499 data = __alloc_page_frag(nc, len, gfp_mask); 507 data = __alloc_page_frag(&nc->page, len, gfp_mask);
500 if (unlikely(!data)) 508 if (unlikely(!data))
501 return NULL; 509 return NULL;
502 510
@@ -507,7 +515,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
507 } 515 }
508 516
509 /* use OR instead of assignment to avoid clearing of bits in mask */ 517 /* use OR instead of assignment to avoid clearing of bits in mask */
510 if (nc->pfmemalloc) 518 if (nc->page.pfmemalloc)
511 skb->pfmemalloc = 1; 519 skb->pfmemalloc = 1;
512 skb->head_frag = 1; 520 skb->head_frag = 1;
513 521
@@ -749,6 +757,73 @@ void consume_skb(struct sk_buff *skb)
749} 757}
750EXPORT_SYMBOL(consume_skb); 758EXPORT_SYMBOL(consume_skb);
751 759
760void __kfree_skb_flush(void)
761{
762 struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
763
764 /* flush skb_cache if containing objects */
765 if (nc->skb_count) {
766 kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
767 nc->skb_cache);
768 nc->skb_count = 0;
769 }
770}
771
772static inline void _kfree_skb_defer(struct sk_buff *skb)
773{
774 struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
775
776 /* drop skb->head and call any destructors for packet */
777 skb_release_all(skb);
778
779 /* record skb to CPU local list */
780 nc->skb_cache[nc->skb_count++] = skb;
781
782#ifdef CONFIG_SLUB
783 /* SLUB writes into objects when freeing */
784 prefetchw(skb);
785#endif
786
787 /* flush skb_cache if it is filled */
788 if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
789 kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_SIZE,
790 nc->skb_cache);
791 nc->skb_count = 0;
792 }
793}
794void __kfree_skb_defer(struct sk_buff *skb)
795{
796 _kfree_skb_defer(skb);
797}
798
799void napi_consume_skb(struct sk_buff *skb, int budget)
800{
801 if (unlikely(!skb))
802 return;
803
804 /* Zero budget indicate non-NAPI context called us, like netpoll */
805 if (unlikely(!budget)) {
806 dev_consume_skb_any(skb);
807 return;
808 }
809
810 if (likely(atomic_read(&skb->users) == 1))
811 smp_rmb();
812 else if (likely(!atomic_dec_and_test(&skb->users)))
813 return;
814 /* if reaching here SKB is ready to free */
815 trace_consume_skb(skb);
816
817 /* if SKB is a clone, don't handle this case */
818 if (skb->fclone != SKB_FCLONE_UNAVAILABLE) {
819 __kfree_skb(skb);
820 return;
821 }
822
823 _kfree_skb_defer(skb);
824}
825EXPORT_SYMBOL(napi_consume_skb);
826
752/* Make sure a field is enclosed inside headers_start/headers_end section */ 827/* Make sure a field is enclosed inside headers_start/headers_end section */
753#define CHECK_SKB_FIELD(field) \ 828#define CHECK_SKB_FIELD(field) \
754 BUILD_BUG_ON(offsetof(struct sk_buff, field) < \ 829 BUILD_BUG_ON(offsetof(struct sk_buff, field) < \
@@ -1843,6 +1918,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
1843 struct splice_pipe_desc *spd, struct sock *sk) 1918 struct splice_pipe_desc *spd, struct sock *sk)
1844{ 1919{
1845 int seg; 1920 int seg;
1921 struct sk_buff *iter;
1846 1922
1847 /* map the linear part : 1923 /* map the linear part :
1848 * If skb->head_frag is set, this 'linear' part is backed by a 1924 * If skb->head_frag is set, this 'linear' part is backed by a
@@ -1869,6 +1945,19 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
1869 return true; 1945 return true;
1870 } 1946 }
1871 1947
1948 skb_walk_frags(skb, iter) {
1949 if (*offset >= iter->len) {
1950 *offset -= iter->len;
1951 continue;
1952 }
1953 /* __skb_splice_bits() only fails if the output has no room
1954 * left, so no point in going over the frag_list for the error
1955 * case.
1956 */
1957 if (__skb_splice_bits(iter, pipe, offset, len, spd, sk))
1958 return true;
1959 }
1960
1872 return false; 1961 return false;
1873} 1962}
1874 1963
@@ -1895,9 +1984,7 @@ ssize_t skb_socket_splice(struct sock *sk,
1895 1984
1896/* 1985/*
1897 * Map data from the skb to a pipe. Should handle both the linear part, 1986 * Map data from the skb to a pipe. Should handle both the linear part,
1898 * the fragments, and the frag list. It does NOT handle frag lists within 1987 * the fragments, and the frag list.
1899 * the frag list, if such a thing exists. We'd probably need to recurse to
1900 * handle that cleanly.
1901 */ 1988 */
1902int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, 1989int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
1903 struct pipe_inode_info *pipe, unsigned int tlen, 1990 struct pipe_inode_info *pipe, unsigned int tlen,
@@ -1916,29 +2003,10 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
1916 .ops = &nosteal_pipe_buf_ops, 2003 .ops = &nosteal_pipe_buf_ops,
1917 .spd_release = sock_spd_release, 2004 .spd_release = sock_spd_release,
1918 }; 2005 };
1919 struct sk_buff *frag_iter;
1920 int ret = 0; 2006 int ret = 0;
1921 2007
1922 /* 2008 __skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk);
1923 * __skb_splice_bits() only fails if the output has no room left,
1924 * so no point in going over the frag_list for the error case.
1925 */
1926 if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk))
1927 goto done;
1928 else if (!tlen)
1929 goto done;
1930
1931 /*
1932 * now see if we have a frag_list to map
1933 */
1934 skb_walk_frags(skb, frag_iter) {
1935 if (!tlen)
1936 break;
1937 if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk))
1938 break;
1939 }
1940 2009
1941done:
1942 if (spd.nr_pages) 2010 if (spd.nr_pages)
1943 ret = splice_cb(sk, pipe, &spd); 2011 ret = splice_cb(sk, pipe, &spd);
1944 2012
@@ -3024,8 +3092,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
3024 if (unlikely(!proto)) 3092 if (unlikely(!proto))
3025 return ERR_PTR(-EINVAL); 3093 return ERR_PTR(-EINVAL);
3026 3094
3027 csum = !head_skb->encap_hdr_csum && 3095 csum = !!can_checksum_protocol(features, proto);
3028 !!can_checksum_protocol(features, proto);
3029 3096
3030 headroom = skb_headroom(head_skb); 3097 headroom = skb_headroom(head_skb);
3031 pos = skb_headlen(head_skb); 3098 pos = skb_headlen(head_skb);
@@ -3118,13 +3185,15 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
3118 if (nskb->len == len + doffset) 3185 if (nskb->len == len + doffset)
3119 goto perform_csum_check; 3186 goto perform_csum_check;
3120 3187
3121 if (!sg && !nskb->remcsum_offload) { 3188 if (!sg) {
3122 nskb->ip_summed = CHECKSUM_NONE; 3189 if (!nskb->remcsum_offload)
3123 nskb->csum = skb_copy_and_csum_bits(head_skb, offset, 3190 nskb->ip_summed = CHECKSUM_NONE;
3124 skb_put(nskb, len), 3191 SKB_GSO_CB(nskb)->csum =
3125 len, 0); 3192 skb_copy_and_csum_bits(head_skb, offset,
3193 skb_put(nskb, len),
3194 len, 0);
3126 SKB_GSO_CB(nskb)->csum_start = 3195 SKB_GSO_CB(nskb)->csum_start =
3127 skb_headroom(nskb) + doffset; 3196 skb_headroom(nskb) + doffset;
3128 continue; 3197 continue;
3129 } 3198 }
3130 3199
@@ -3190,12 +3259,19 @@ skip_fraglist:
3190 nskb->truesize += nskb->data_len; 3259 nskb->truesize += nskb->data_len;
3191 3260
3192perform_csum_check: 3261perform_csum_check:
3193 if (!csum && !nskb->remcsum_offload) { 3262 if (!csum) {
3194 nskb->csum = skb_checksum(nskb, doffset, 3263 if (skb_has_shared_frag(nskb)) {
3195 nskb->len - doffset, 0); 3264 err = __skb_linearize(nskb);
3196 nskb->ip_summed = CHECKSUM_NONE; 3265 if (err)
3266 goto err;
3267 }
3268 if (!nskb->remcsum_offload)
3269 nskb->ip_summed = CHECKSUM_NONE;
3270 SKB_GSO_CB(nskb)->csum =
3271 skb_checksum(nskb, doffset,
3272 nskb->len - doffset, 0);
3197 SKB_GSO_CB(nskb)->csum_start = 3273 SKB_GSO_CB(nskb)->csum_start =
3198 skb_headroom(nskb) + doffset; 3274 skb_headroom(nskb) + doffset;
3199 } 3275 }
3200 } while ((offset += len) < head_skb->len); 3276 } while ((offset += len) < head_skb->len);
3201 3277
@@ -4237,7 +4313,6 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
4237 skb->skb_iif = 0; 4313 skb->skb_iif = 0;
4238 skb->ignore_df = 0; 4314 skb->ignore_df = 0;
4239 skb_dst_drop(skb); 4315 skb_dst_drop(skb);
4240 skb_sender_cpu_clear(skb);
4241 secpath_reset(skb); 4316 secpath_reset(skb);
4242 nf_reset(skb); 4317 nf_reset(skb);
4243 nf_reset_trace(skb); 4318 nf_reset_trace(skb);
@@ -4427,15 +4502,16 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
4427 __skb_push(skb, offset); 4502 __skb_push(skb, offset);
4428 err = __vlan_insert_tag(skb, skb->vlan_proto, 4503 err = __vlan_insert_tag(skb, skb->vlan_proto,
4429 skb_vlan_tag_get(skb)); 4504 skb_vlan_tag_get(skb));
4430 if (err) 4505 if (err) {
4506 __skb_pull(skb, offset);
4431 return err; 4507 return err;
4508 }
4509
4432 skb->protocol = skb->vlan_proto; 4510 skb->protocol = skb->vlan_proto;
4433 skb->mac_len += VLAN_HLEN; 4511 skb->mac_len += VLAN_HLEN;
4434 __skb_pull(skb, offset);
4435 4512
4436 if (skb->ip_summed == CHECKSUM_COMPLETE) 4513 skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
4437 skb->csum = csum_add(skb->csum, csum_partial(skb->data 4514 __skb_pull(skb, offset);
4438 + (2 * ETH_ALEN), VLAN_HLEN, 0));
4439 } 4515 }
4440 __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); 4516 __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
4441 return 0; 4517 return 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 6c1c8bc93412..7e73c26b6bb4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -221,7 +221,8 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
221 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , 221 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
222 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , 222 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
223 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , 223 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
224 "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX" 224 "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" ,
225 "sk_lock-AF_MAX"
225}; 226};
226static const char *const af_family_slock_key_strings[AF_MAX+1] = { 227static const char *const af_family_slock_key_strings[AF_MAX+1] = {
227 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , 228 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
@@ -237,7 +238,8 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
237 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , 238 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
238 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , 239 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
239 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , 240 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
240 "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX" 241 "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" ,
242 "slock-AF_MAX"
241}; 243};
242static const char *const af_family_clock_key_strings[AF_MAX+1] = { 244static const char *const af_family_clock_key_strings[AF_MAX+1] = {
243 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , 245 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
@@ -253,7 +255,8 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
253 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , 255 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
254 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , 256 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
255 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , 257 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
256 "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX" 258 "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" ,
259 "clock-AF_MAX"
257}; 260};
258 261
259/* 262/*
@@ -987,6 +990,10 @@ set_rcvbuf:
987 sk->sk_incoming_cpu = val; 990 sk->sk_incoming_cpu = val;
988 break; 991 break;
989 992
993 case SO_CNX_ADVICE:
994 if (val == 1)
995 dst_negative_advice(sk);
996 break;
990 default: 997 default:
991 ret = -ENOPROTOOPT; 998 ret = -ENOPROTOOPT;
992 break; 999 break;
@@ -1531,6 +1538,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1531 newsk = NULL; 1538 newsk = NULL;
1532 goto out; 1539 goto out;
1533 } 1540 }
1541 RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
1534 1542
1535 newsk->sk_err = 0; 1543 newsk->sk_err = 0;
1536 newsk->sk_priority = 0; 1544 newsk->sk_priority = 0;
@@ -1903,7 +1911,7 @@ EXPORT_SYMBOL(sock_cmsg_send);
1903bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp) 1911bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
1904{ 1912{
1905 if (pfrag->page) { 1913 if (pfrag->page) {
1906 if (atomic_read(&pfrag->page->_count) == 1) { 1914 if (page_ref_count(pfrag->page) == 1) {
1907 pfrag->offset = 0; 1915 pfrag->offset = 0;
1908 return true; 1916 return true;
1909 } 1917 }
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 902d606324a0..9c67a961ba53 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -204,8 +204,6 @@ void dccp_req_err(struct sock *sk, u64 seq)
204 * ICMPs are not backlogged, hence we cannot get an established 204 * ICMPs are not backlogged, hence we cannot get an established
205 * socket here. 205 * socket here.
206 */ 206 */
207 WARN_ON(req->sk);
208
209 if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { 207 if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) {
210 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 208 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
211 } else { 209 } else {
@@ -802,7 +800,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
802 } 800 }
803 801
804lookup: 802lookup:
805 sk = __inet_lookup_skb(&dccp_hashinfo, skb, 803 sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
806 dh->dccph_sport, dh->dccph_dport); 804 dh->dccph_sport, dh->dccph_dport);
807 if (!sk) { 805 if (!sk) {
808 dccp_pr_debug("failed to look up flow ID in table and " 806 dccp_pr_debug("failed to look up flow ID in table and "
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index b8608b71a66d..4663a01d5039 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -668,7 +668,7 @@ static int dccp_v6_rcv(struct sk_buff *skb)
668 DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); 668 DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
669 669
670lookup: 670lookup:
671 sk = __inet6_lookup_skb(&dccp_hashinfo, skb, 671 sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
672 dh->dccph_sport, dh->dccph_dport, 672 dh->dccph_sport, dh->dccph_dport,
673 inet6_iif(skb)); 673 inet6_iif(skb));
674 if (!sk) { 674 if (!sk) {
@@ -993,7 +993,7 @@ static struct proto dccp_v6_prot = {
993 .sendmsg = dccp_sendmsg, 993 .sendmsg = dccp_sendmsg,
994 .recvmsg = dccp_recvmsg, 994 .recvmsg = dccp_recvmsg,
995 .backlog_rcv = dccp_v6_do_rcv, 995 .backlog_rcv = dccp_v6_do_rcv,
996 .hash = inet_hash, 996 .hash = inet6_hash,
997 .unhash = inet_unhash, 997 .unhash = inet_unhash,
998 .accept = inet_csk_accept, 998 .accept = inet_csk_accept,
999 .get_port = inet_csk_get_port, 999 .get_port = inet_csk_get_port,
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 607a14f20d88..b1dc096d22f8 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1034,10 +1034,13 @@ source_ok:
1034 if (!fld.daddr) { 1034 if (!fld.daddr) {
1035 fld.daddr = fld.saddr; 1035 fld.daddr = fld.saddr;
1036 1036
1037 err = -EADDRNOTAVAIL;
1038 if (dev_out) 1037 if (dev_out)
1039 dev_put(dev_out); 1038 dev_put(dev_out);
1039 err = -EINVAL;
1040 dev_out = init_net.loopback_dev; 1040 dev_out = init_net.loopback_dev;
1041 if (!dev_out->dn_ptr)
1042 goto out;
1043 err = -EADDRNOTAVAIL;
1041 dev_hold(dev_out); 1044 dev_hold(dev_out);
1042 if (!fld.daddr) { 1045 if (!fld.daddr) {
1043 fld.daddr = 1046 fld.daddr =
@@ -1110,6 +1113,8 @@ source_ok:
1110 if (dev_out == NULL) 1113 if (dev_out == NULL)
1111 goto out; 1114 goto out;
1112 dn_db = rcu_dereference_raw(dev_out->dn_ptr); 1115 dn_db = rcu_dereference_raw(dev_out->dn_ptr);
1116 if (!dn_db)
1117 goto e_inval;
1113 /* Possible improvement - check all devices for local addr */ 1118 /* Possible improvement - check all devices for local addr */
1114 if (dn_dev_islocal(dev_out, fld.daddr)) { 1119 if (dn_dev_islocal(dev_out, fld.daddr)) {
1115 dev_put(dev_out); 1120 dev_put(dev_out);
@@ -1151,6 +1156,8 @@ select_source:
1151 dev_put(dev_out); 1156 dev_put(dev_out);
1152 dev_out = init_net.loopback_dev; 1157 dev_out = init_net.loopback_dev;
1153 dev_hold(dev_out); 1158 dev_hold(dev_out);
1159 if (!dev_out->dn_ptr)
1160 goto e_inval;
1154 fld.flowidn_oif = dev_out->ifindex; 1161 fld.flowidn_oif = dev_out->ifindex;
1155 if (res.fi) 1162 if (res.fi)
1156 dn_fib_info_put(res.fi); 1163 dn_fib_info_put(res.fi);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index fa4daba8db55..c28c47463b7e 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -430,35 +430,30 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
430 hwmon_device_unregister(ds->hwmon_dev); 430 hwmon_device_unregister(ds->hwmon_dev);
431#endif 431#endif
432 432
433 /* Disable configuration of the CPU and DSA ports */ 433 /* Destroy network devices for physical switch ports. */
434 for (port = 0; port < DSA_MAX_PORTS; port++) { 434 for (port = 0; port < DSA_MAX_PORTS; port++) {
435 if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) 435 if (!(ds->phys_port_mask & (1 << port)))
436 continue;
437
438 if (!ds->ports[port])
436 continue; 439 continue;
437 440
441 dsa_slave_destroy(ds->ports[port]);
442 }
443
444 /* Remove any fixed link PHYs */
445 for (port = 0; port < DSA_MAX_PORTS; port++) {
438 port_dn = cd->port_dn[port]; 446 port_dn = cd->port_dn[port];
439 if (of_phy_is_fixed_link(port_dn)) { 447 if (of_phy_is_fixed_link(port_dn)) {
440 phydev = of_phy_find_device(port_dn); 448 phydev = of_phy_find_device(port_dn);
441 if (phydev) { 449 if (phydev) {
442 int addr = phydev->mdio.addr;
443
444 phy_device_free(phydev); 450 phy_device_free(phydev);
445 of_node_put(port_dn); 451 of_node_put(port_dn);
446 fixed_phy_del(addr); 452 fixed_phy_unregister(phydev);
447 } 453 }
448 } 454 }
449 } 455 }
450 456
451 /* Destroy network devices for physical switch ports. */
452 for (port = 0; port < DSA_MAX_PORTS; port++) {
453 if (!(ds->phys_port_mask & (1 << port)))
454 continue;
455
456 if (!ds->ports[port])
457 continue;
458
459 dsa_slave_destroy(ds->ports[port]);
460 }
461
462 mdiobus_unregister(ds->slave_mii_bus); 457 mdiobus_unregister(ds->slave_mii_bus);
463} 458}
464 459
@@ -935,6 +930,14 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
935{ 930{
936 int i; 931 int i;
937 932
933 dst->master_netdev->dsa_ptr = NULL;
934
935 /* If we used a tagging format that doesn't have an ethertype
936 * field, make sure that all packets from this point get sent
937 * without the tag and go through the regular receive path.
938 */
939 wmb();
940
938 for (i = 0; i < dst->pd->nr_chips; i++) { 941 for (i = 0; i < dst->pd->nr_chips; i++) {
939 struct dsa_switch *ds = dst->ds[i]; 942 struct dsa_switch *ds = dst->ds[i];
940 943
@@ -988,14 +991,6 @@ static int dsa_suspend(struct device *d)
988 struct dsa_switch_tree *dst = platform_get_drvdata(pdev); 991 struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
989 int i, ret = 0; 992 int i, ret = 0;
990 993
991 dst->master_netdev->dsa_ptr = NULL;
992
993 /* If we used a tagging format that doesn't have an ethertype
994 * field, make sure that all packets from this point get sent
995 * without the tag and go through the regular receive path.
996 */
997 wmb();
998
999 for (i = 0; i < dst->pd->nr_chips; i++) { 994 for (i = 0; i < dst->pd->nr_chips; i++) {
1000 struct dsa_switch *ds = dst->ds[i]; 995 struct dsa_switch *ds = dst->ds[i];
1001 996
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index ab24521beb4d..a575f0350d5a 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -201,47 +201,6 @@ out:
201 return 0; 201 return 0;
202} 202}
203 203
204static int dsa_bridge_check_vlan_range(struct dsa_switch *ds,
205 const struct net_device *bridge,
206 u16 vid_begin, u16 vid_end)
207{
208 struct dsa_slave_priv *p;
209 struct net_device *dev, *vlan_br;
210 DECLARE_BITMAP(members, DSA_MAX_PORTS);
211 DECLARE_BITMAP(untagged, DSA_MAX_PORTS);
212 u16 vid;
213 int member, err;
214
215 if (!ds->drv->vlan_getnext || !vid_begin)
216 return -EOPNOTSUPP;
217
218 vid = vid_begin - 1;
219
220 do {
221 err = ds->drv->vlan_getnext(ds, &vid, members, untagged);
222 if (err)
223 break;
224
225 if (vid > vid_end)
226 break;
227
228 member = find_first_bit(members, DSA_MAX_PORTS);
229 if (member == DSA_MAX_PORTS)
230 continue;
231
232 dev = ds->ports[member];
233 p = netdev_priv(dev);
234 vlan_br = p->bridge_dev;
235 if (vlan_br == bridge)
236 continue;
237
238 netdev_dbg(vlan_br, "hardware VLAN %d already in use\n", vid);
239 return -EOPNOTSUPP;
240 } while (vid < vid_end);
241
242 return err == -ENOENT ? 0 : err;
243}
244
245static int dsa_slave_port_vlan_add(struct net_device *dev, 204static int dsa_slave_port_vlan_add(struct net_device *dev,
246 const struct switchdev_obj_port_vlan *vlan, 205 const struct switchdev_obj_port_vlan *vlan,
247 struct switchdev_trans *trans) 206 struct switchdev_trans *trans)
@@ -254,15 +213,6 @@ static int dsa_slave_port_vlan_add(struct net_device *dev,
254 if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add) 213 if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add)
255 return -EOPNOTSUPP; 214 return -EOPNOTSUPP;
256 215
257 /* If the requested port doesn't belong to the same bridge as
258 * the VLAN members, fallback to software VLAN (hopefully).
259 */
260 err = dsa_bridge_check_vlan_range(ds, p->bridge_dev,
261 vlan->vid_begin,
262 vlan->vid_end);
263 if (err)
264 return err;
265
266 err = ds->drv->port_vlan_prepare(ds, p->port, vlan, trans); 216 err = ds->drv->port_vlan_prepare(ds, p->port, vlan, trans);
267 if (err) 217 if (err)
268 return err; 218 return err;
@@ -293,41 +243,11 @@ static int dsa_slave_port_vlan_dump(struct net_device *dev,
293{ 243{
294 struct dsa_slave_priv *p = netdev_priv(dev); 244 struct dsa_slave_priv *p = netdev_priv(dev);
295 struct dsa_switch *ds = p->parent; 245 struct dsa_switch *ds = p->parent;
296 DECLARE_BITMAP(members, DSA_MAX_PORTS);
297 DECLARE_BITMAP(untagged, DSA_MAX_PORTS);
298 u16 pvid, vid = 0;
299 int err;
300 246
301 if (!ds->drv->vlan_getnext || !ds->drv->port_pvid_get) 247 if (ds->drv->port_vlan_dump)
302 return -EOPNOTSUPP; 248 return ds->drv->port_vlan_dump(ds, p->port, vlan, cb);
303 249
304 err = ds->drv->port_pvid_get(ds, p->port, &pvid); 250 return -EOPNOTSUPP;
305 if (err)
306 return err;
307
308 for (;;) {
309 err = ds->drv->vlan_getnext(ds, &vid, members, untagged);
310 if (err)
311 break;
312
313 if (!test_bit(p->port, members))
314 continue;
315
316 memset(vlan, 0, sizeof(*vlan));
317 vlan->vid_begin = vlan->vid_end = vid;
318
319 if (vid == pvid)
320 vlan->flags |= BRIDGE_VLAN_INFO_PVID;
321
322 if (test_bit(p->port, untagged))
323 vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
324
325 err = cb(&vlan->obj);
326 if (err)
327 break;
328 }
329
330 return err == -ENOENT ? 0 : err;
331} 251}
332 252
333static int dsa_slave_port_fdb_add(struct net_device *dev, 253static int dsa_slave_port_fdb_add(struct net_device *dev,
@@ -385,31 +305,6 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
385 return -EOPNOTSUPP; 305 return -EOPNOTSUPP;
386} 306}
387 307
388/* Return a bitmask of all ports being currently bridged within a given bridge
389 * device. Note that on leave, the mask will still return the bitmask of ports
390 * currently bridged, prior to port removal, and this is exactly what we want.
391 */
392static u32 dsa_slave_br_port_mask(struct dsa_switch *ds,
393 struct net_device *bridge)
394{
395 struct dsa_slave_priv *p;
396 unsigned int port;
397 u32 mask = 0;
398
399 for (port = 0; port < DSA_MAX_PORTS; port++) {
400 if (!dsa_is_port_initialized(ds, port))
401 continue;
402
403 p = netdev_priv(ds->ports[port]);
404
405 if (ds->ports[port]->priv_flags & IFF_BRIDGE_PORT &&
406 p->bridge_dev == bridge)
407 mask |= 1 << port;
408 }
409
410 return mask;
411}
412
413static int dsa_slave_stp_update(struct net_device *dev, u8 state) 308static int dsa_slave_stp_update(struct net_device *dev, u8 state)
414{ 309{
415 struct dsa_slave_priv *p = netdev_priv(dev); 310 struct dsa_slave_priv *p = netdev_priv(dev);
@@ -422,6 +317,24 @@ static int dsa_slave_stp_update(struct net_device *dev, u8 state)
422 return ret; 317 return ret;
423} 318}
424 319
320static int dsa_slave_vlan_filtering(struct net_device *dev,
321 const struct switchdev_attr *attr,
322 struct switchdev_trans *trans)
323{
324 struct dsa_slave_priv *p = netdev_priv(dev);
325 struct dsa_switch *ds = p->parent;
326
327 /* bridge skips -EOPNOTSUPP, so skip the prepare phase */
328 if (switchdev_trans_ph_prepare(trans))
329 return 0;
330
331 if (ds->drv->port_vlan_filtering)
332 return ds->drv->port_vlan_filtering(ds, p->port,
333 attr->u.vlan_filtering);
334
335 return 0;
336}
337
425static int dsa_slave_port_attr_set(struct net_device *dev, 338static int dsa_slave_port_attr_set(struct net_device *dev,
426 const struct switchdev_attr *attr, 339 const struct switchdev_attr *attr,
427 struct switchdev_trans *trans) 340 struct switchdev_trans *trans)
@@ -438,6 +351,9 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
438 ret = ds->drv->port_stp_update(ds, p->port, 351 ret = ds->drv->port_stp_update(ds, p->port,
439 attr->u.stp_state); 352 attr->u.stp_state);
440 break; 353 break;
354 case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
355 ret = dsa_slave_vlan_filtering(dev, attr, trans);
356 break;
441 default: 357 default:
442 ret = -EOPNOTSUPP; 358 ret = -EOPNOTSUPP;
443 break; 359 break;
@@ -532,23 +448,20 @@ static int dsa_slave_bridge_port_join(struct net_device *dev,
532 448
533 p->bridge_dev = br; 449 p->bridge_dev = br;
534 450
535 if (ds->drv->port_join_bridge) 451 if (ds->drv->port_bridge_join)
536 ret = ds->drv->port_join_bridge(ds, p->port, 452 ret = ds->drv->port_bridge_join(ds, p->port, br);
537 dsa_slave_br_port_mask(ds, br));
538 453
539 return ret; 454 return ret == -EOPNOTSUPP ? 0 : ret;
540} 455}
541 456
542static int dsa_slave_bridge_port_leave(struct net_device *dev) 457static void dsa_slave_bridge_port_leave(struct net_device *dev)
543{ 458{
544 struct dsa_slave_priv *p = netdev_priv(dev); 459 struct dsa_slave_priv *p = netdev_priv(dev);
545 struct dsa_switch *ds = p->parent; 460 struct dsa_switch *ds = p->parent;
546 int ret = -EOPNOTSUPP;
547 461
548 462
549 if (ds->drv->port_leave_bridge) 463 if (ds->drv->port_bridge_leave)
550 ret = ds->drv->port_leave_bridge(ds, p->port, 464 ds->drv->port_bridge_leave(ds, p->port);
551 dsa_slave_br_port_mask(ds, p->bridge_dev));
552 465
553 p->bridge_dev = NULL; 466 p->bridge_dev = NULL;
554 467
@@ -556,8 +469,6 @@ static int dsa_slave_bridge_port_leave(struct net_device *dev)
556 * so allow it to be in BR_STATE_FORWARDING to be kept functional 469 * so allow it to be in BR_STATE_FORWARDING to be kept functional
557 */ 470 */
558 dsa_slave_stp_update(dev, BR_STATE_FORWARDING); 471 dsa_slave_stp_update(dev, BR_STATE_FORWARDING);
559
560 return ret;
561} 472}
562 473
563static int dsa_slave_port_attr_get(struct net_device *dev, 474static int dsa_slave_port_attr_get(struct net_device *dev,
@@ -982,11 +893,15 @@ static void dsa_slave_adjust_link(struct net_device *dev)
982static int dsa_slave_fixed_link_update(struct net_device *dev, 893static int dsa_slave_fixed_link_update(struct net_device *dev,
983 struct fixed_phy_status *status) 894 struct fixed_phy_status *status)
984{ 895{
985 struct dsa_slave_priv *p = netdev_priv(dev); 896 struct dsa_slave_priv *p;
986 struct dsa_switch *ds = p->parent; 897 struct dsa_switch *ds;
987 898
988 if (ds->drv->fixed_link_update) 899 if (dev) {
989 ds->drv->fixed_link_update(ds, p->port, status); 900 p = netdev_priv(dev);
901 ds = p->parent;
902 if (ds->drv->fixed_link_update)
903 ds->drv->fixed_link_update(ds, p->port, status);
904 }
990 905
991 return 0; 906 return 0;
992} 907}
@@ -1228,40 +1143,46 @@ static bool dsa_slave_dev_check(struct net_device *dev)
1228 return dev->netdev_ops == &dsa_slave_netdev_ops; 1143 return dev->netdev_ops == &dsa_slave_netdev_ops;
1229} 1144}
1230 1145
1231static int dsa_slave_master_changed(struct net_device *dev) 1146static int dsa_slave_port_upper_event(struct net_device *dev,
1147 unsigned long event, void *ptr)
1232{ 1148{
1233 struct net_device *master = netdev_master_upper_dev_get(dev); 1149 struct netdev_notifier_changeupper_info *info = ptr;
1234 struct dsa_slave_priv *p = netdev_priv(dev); 1150 struct net_device *upper = info->upper_dev;
1235 int err = 0; 1151 int err = 0;
1236 1152
1237 if (master && master->rtnl_link_ops && 1153 switch (event) {
1238 !strcmp(master->rtnl_link_ops->kind, "bridge")) 1154 case NETDEV_CHANGEUPPER:
1239 err = dsa_slave_bridge_port_join(dev, master); 1155 if (netif_is_bridge_master(upper)) {
1240 else if (dsa_port_is_bridged(p)) 1156 if (info->linking)
1241 err = dsa_slave_bridge_port_leave(dev); 1157 err = dsa_slave_bridge_port_join(dev, upper);
1158 else
1159 dsa_slave_bridge_port_leave(dev);
1160 }
1242 1161
1243 return err; 1162 break;
1163 }
1164
1165 return notifier_from_errno(err);
1244} 1166}
1245 1167
1246int dsa_slave_netdevice_event(struct notifier_block *unused, 1168static int dsa_slave_port_event(struct net_device *dev, unsigned long event,
1247 unsigned long event, void *ptr) 1169 void *ptr)
1248{ 1170{
1249 struct net_device *dev;
1250 int err = 0;
1251
1252 switch (event) { 1171 switch (event) {
1253 case NETDEV_CHANGEUPPER: 1172 case NETDEV_CHANGEUPPER:
1254 dev = netdev_notifier_info_to_dev(ptr); 1173 return dsa_slave_port_upper_event(dev, event, ptr);
1255 if (!dsa_slave_dev_check(dev)) 1174 }
1256 goto out;
1257 1175
1258 err = dsa_slave_master_changed(dev); 1176 return NOTIFY_DONE;
1259 if (err && err != -EOPNOTSUPP) 1177}
1260 netdev_warn(dev, "failed to reflect master change\n");
1261 1178
1262 break; 1179int dsa_slave_netdevice_event(struct notifier_block *unused,
1263 } 1180 unsigned long event, void *ptr)
1181{
1182 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1183
1184 if (dsa_slave_dev_check(dev))
1185 return dsa_slave_port_event(dev, event, ptr);
1264 1186
1265out:
1266 return NOTIFY_DONE; 1187 return NOTIFY_DONE;
1267} 1188}
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 103871784e50..66dff5e3d772 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -125,6 +125,7 @@ EXPORT_SYMBOL(eth_header);
125 */ 125 */
126u32 eth_get_headlen(void *data, unsigned int len) 126u32 eth_get_headlen(void *data, unsigned int len)
127{ 127{
128 const unsigned int flags = FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
128 const struct ethhdr *eth = (const struct ethhdr *)data; 129 const struct ethhdr *eth = (const struct ethhdr *)data;
129 struct flow_keys keys; 130 struct flow_keys keys;
130 131
@@ -134,7 +135,7 @@ u32 eth_get_headlen(void *data, unsigned int len)
134 135
135 /* parse any remaining L2/L3 headers, check for L4 */ 136 /* parse any remaining L2/L3 headers, check for L4 */
136 if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto, 137 if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto,
137 sizeof(*eth), len, 0)) 138 sizeof(*eth), len, flags))
138 return max_t(u32, keys.control.thoff, sizeof(*eth)); 139 return max_t(u32, keys.control.thoff, sizeof(*eth));
139 140
140 /* parse for any L4 headers */ 141 /* parse for any L4 headers */
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 737c87a2a41e..0023c9048812 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -207,7 +207,7 @@ static int lowpan_device_event(struct notifier_block *unused,
207 struct net_device *wdev = netdev_notifier_info_to_dev(ptr); 207 struct net_device *wdev = netdev_notifier_info_to_dev(ptr);
208 208
209 if (wdev->type != ARPHRD_IEEE802154) 209 if (wdev->type != ARPHRD_IEEE802154)
210 goto out; 210 return NOTIFY_DONE;
211 211
212 switch (event) { 212 switch (event) {
213 case NETDEV_UNREGISTER: 213 case NETDEV_UNREGISTER:
@@ -219,11 +219,10 @@ static int lowpan_device_event(struct notifier_block *unused,
219 lowpan_dellink(wdev->ieee802154_ptr->lowpan_dev, NULL); 219 lowpan_dellink(wdev->ieee802154_ptr->lowpan_dev, NULL);
220 break; 220 break;
221 default: 221 default:
222 break; 222 return NOTIFY_DONE;
223 } 223 }
224 224
225out: 225 return NOTIFY_OK;
226 return NOTIFY_DONE;
227} 226}
228 227
229static struct notifier_block lowpan_dev_notifier = { 228static struct notifier_block lowpan_dev_notifier = {
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index a548be247e15..e0bd013a1e5e 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -182,12 +182,14 @@ static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd,
182static HLIST_HEAD(raw_head); 182static HLIST_HEAD(raw_head);
183static DEFINE_RWLOCK(raw_lock); 183static DEFINE_RWLOCK(raw_lock);
184 184
185static void raw_hash(struct sock *sk) 185static int raw_hash(struct sock *sk)
186{ 186{
187 write_lock_bh(&raw_lock); 187 write_lock_bh(&raw_lock);
188 sk_add_node(sk, &raw_head); 188 sk_add_node(sk, &raw_head);
189 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 189 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
190 write_unlock_bh(&raw_lock); 190 write_unlock_bh(&raw_lock);
191
192 return 0;
191} 193}
192 194
193static void raw_unhash(struct sock *sk) 195static void raw_unhash(struct sock *sk)
@@ -462,12 +464,14 @@ static inline struct dgram_sock *dgram_sk(const struct sock *sk)
462 return container_of(sk, struct dgram_sock, sk); 464 return container_of(sk, struct dgram_sock, sk);
463} 465}
464 466
465static void dgram_hash(struct sock *sk) 467static int dgram_hash(struct sock *sk)
466{ 468{
467 write_lock_bh(&dgram_lock); 469 write_lock_bh(&dgram_lock);
468 sk_add_node(sk, &dgram_head); 470 sk_add_node(sk, &dgram_head);
469 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 471 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
470 write_unlock_bh(&dgram_lock); 472 write_unlock_bh(&dgram_lock);
473
474 return 0;
471} 475}
472 476
473static void dgram_unhash(struct sock *sk) 477static void dgram_unhash(struct sock *sk)
@@ -1026,8 +1030,13 @@ static int ieee802154_create(struct net *net, struct socket *sock,
1026 /* Checksums on by default */ 1030 /* Checksums on by default */
1027 sock_set_flag(sk, SOCK_ZAPPED); 1031 sock_set_flag(sk, SOCK_ZAPPED);
1028 1032
1029 if (sk->sk_prot->hash) 1033 if (sk->sk_prot->hash) {
1030 sk->sk_prot->hash(sk); 1034 rc = sk->sk_prot->hash(sk);
1035 if (rc) {
1036 sk_common_release(sk);
1037 goto out;
1038 }
1039 }
1031 1040
1032 if (sk->sk_prot->init) { 1041 if (sk->sk_prot->init) {
1033 rc = sk->sk_prot->init(sk); 1042 rc = sk->sk_prot->init(sk);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 775824720b6b..238225b0c970 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -186,6 +186,7 @@ config NET_IPGRE_DEMUX
186 186
187config NET_IP_TUNNEL 187config NET_IP_TUNNEL
188 tristate 188 tristate
189 select DST_CACHE
189 default n 190 default n
190 191
191config NET_IPGRE 192config NET_IPGRE
@@ -405,14 +406,6 @@ config INET_XFRM_MODE_BEET
405 406
406 If unsure, say Y. 407 If unsure, say Y.
407 408
408config INET_LRO
409 tristate "Large Receive Offload (ipv4/tcp)"
410 default y
411 ---help---
412 Support for Large Receive Offload (ipv4/tcp).
413
414 If unsure, say Y.
415
416config INET_DIAG 409config INET_DIAG
417 tristate "INET: socket monitoring interface" 410 tristate "INET: socket monitoring interface"
418 default y 411 default y
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 62c049b647e9..bfa133691cde 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -32,7 +32,6 @@ obj-$(CONFIG_INET_ESP) += esp4.o
32obj-$(CONFIG_INET_IPCOMP) += ipcomp.o 32obj-$(CONFIG_INET_IPCOMP) += ipcomp.o
33obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o 33obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o
34obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o 34obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o
35obj-$(CONFIG_INET_LRO) += inet_lro.o
36obj-$(CONFIG_INET_TUNNEL) += tunnel4.o 35obj-$(CONFIG_INET_TUNNEL) += tunnel4.o
37obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o 36obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
38obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o 37obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5c5db6636704..9e481992dbae 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -370,7 +370,11 @@ lookup_protocol:
370 */ 370 */
371 inet->inet_sport = htons(inet->inet_num); 371 inet->inet_sport = htons(inet->inet_num);
372 /* Add to protocol hash chains. */ 372 /* Add to protocol hash chains. */
373 sk->sk_prot->hash(sk); 373 err = sk->sk_prot->hash(sk);
374 if (err) {
375 sk_common_release(sk);
376 goto out;
377 }
374 } 378 }
375 379
376 if (sk->sk_prot->init) { 380 if (sk->sk_prot->init) {
@@ -1091,12 +1095,6 @@ void inet_unregister_protosw(struct inet_protosw *p)
1091} 1095}
1092EXPORT_SYMBOL(inet_unregister_protosw); 1096EXPORT_SYMBOL(inet_unregister_protosw);
1093 1097
1094/*
1095 * Shall we try to damage output packets if routing dev changes?
1096 */
1097
1098int sysctl_ip_dynaddr __read_mostly;
1099
1100static int inet_sk_reselect_saddr(struct sock *sk) 1098static int inet_sk_reselect_saddr(struct sock *sk)
1101{ 1099{
1102 struct inet_sock *inet = inet_sk(sk); 1100 struct inet_sock *inet = inet_sk(sk);
@@ -1127,7 +1125,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1127 if (new_saddr == old_saddr) 1125 if (new_saddr == old_saddr)
1128 return 0; 1126 return 0;
1129 1127
1130 if (sysctl_ip_dynaddr > 1) { 1128 if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) {
1131 pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n", 1129 pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n",
1132 __func__, &old_saddr, &new_saddr); 1130 __func__, &old_saddr, &new_saddr);
1133 } 1131 }
@@ -1142,8 +1140,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1142 * Besides that, it does not check for connection 1140 * Besides that, it does not check for connection
1143 * uniqueness. Wait for troubles. 1141 * uniqueness. Wait for troubles.
1144 */ 1142 */
1145 __sk_prot_rehash(sk); 1143 return __sk_prot_rehash(sk);
1146 return 0;
1147} 1144}
1148 1145
1149int inet_sk_rebuild_header(struct sock *sk) 1146int inet_sk_rebuild_header(struct sock *sk)
@@ -1183,7 +1180,7 @@ int inet_sk_rebuild_header(struct sock *sk)
1183 * Other protocols have to map its equivalent state to TCP_SYN_SENT. 1180 * Other protocols have to map its equivalent state to TCP_SYN_SENT.
1184 * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme 1181 * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
1185 */ 1182 */
1186 if (!sysctl_ip_dynaddr || 1183 if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr ||
1187 sk->sk_state != TCP_SYN_SENT || 1184 sk->sk_state != TCP_SYN_SENT ||
1188 (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || 1185 (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
1189 (err = inet_sk_reselect_saddr(sk)) != 0) 1186 (err = inet_sk_reselect_saddr(sk)) != 0)
@@ -1383,6 +1380,45 @@ out:
1383 return pp; 1380 return pp;
1384} 1381}
1385 1382
1383static struct sk_buff **ipip_gro_receive(struct sk_buff **head,
1384 struct sk_buff *skb)
1385{
1386 if (NAPI_GRO_CB(skb)->encap_mark) {
1387 NAPI_GRO_CB(skb)->flush = 1;
1388 return NULL;
1389 }
1390
1391 NAPI_GRO_CB(skb)->encap_mark = 1;
1392
1393 return inet_gro_receive(head, skb);
1394}
1395
1396#define SECONDS_PER_DAY 86400
1397
1398/* inet_current_timestamp - Return IP network timestamp
1399 *
1400 * Return milliseconds since midnight in network byte order.
1401 */
1402__be32 inet_current_timestamp(void)
1403{
1404 u32 secs;
1405 u32 msecs;
1406 struct timespec64 ts;
1407
1408 ktime_get_real_ts64(&ts);
1409
1410 /* Get secs since midnight. */
1411 (void)div_u64_rem(ts.tv_sec, SECONDS_PER_DAY, &secs);
1412 /* Convert to msecs. */
1413 msecs = secs * MSEC_PER_SEC;
1414 /* Convert nsec to msec. */
1415 msecs += (u32)ts.tv_nsec / NSEC_PER_MSEC;
1416
1417 /* Convert to network byte order. */
1418 return htonl(msecs);
1419}
1420EXPORT_SYMBOL(inet_current_timestamp);
1421
1386int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) 1422int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
1387{ 1423{
1388 if (sk->sk_family == AF_INET) 1424 if (sk->sk_family == AF_INET)
@@ -1425,6 +1461,13 @@ out_unlock:
1425 return err; 1461 return err;
1426} 1462}
1427 1463
1464static int ipip_gro_complete(struct sk_buff *skb, int nhoff)
1465{
1466 skb->encapsulation = 1;
1467 skb_shinfo(skb)->gso_type |= SKB_GSO_IPIP;
1468 return inet_gro_complete(skb, nhoff);
1469}
1470
1428int inet_ctl_sock_create(struct sock **sk, unsigned short family, 1471int inet_ctl_sock_create(struct sock **sk, unsigned short family,
1429 unsigned short type, unsigned char protocol, 1472 unsigned short type, unsigned char protocol,
1430 struct net *net) 1473 struct net *net)
@@ -1652,8 +1695,8 @@ static struct packet_offload ip_packet_offload __read_mostly = {
1652static const struct net_offload ipip_offload = { 1695static const struct net_offload ipip_offload = {
1653 .callbacks = { 1696 .callbacks = {
1654 .gso_segment = inet_gso_segment, 1697 .gso_segment = inet_gso_segment,
1655 .gro_receive = inet_gro_receive, 1698 .gro_receive = ipip_gro_receive,
1656 .gro_complete = inet_gro_complete, 1699 .gro_complete = ipip_gro_complete,
1657 }, 1700 },
1658}; 1701};
1659 1702
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 59b3e0e8fd51..c34c7544d1db 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -665,7 +665,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
665 */ 665 */
666 666
667 if (!in_dev) 667 if (!in_dev)
668 goto out; 668 goto out_free_skb;
669 669
670 arp = arp_hdr(skb); 670 arp = arp_hdr(skb);
671 671
@@ -673,7 +673,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
673 default: 673 default:
674 if (arp->ar_pro != htons(ETH_P_IP) || 674 if (arp->ar_pro != htons(ETH_P_IP) ||
675 htons(dev_type) != arp->ar_hrd) 675 htons(dev_type) != arp->ar_hrd)
676 goto out; 676 goto out_free_skb;
677 break; 677 break;
678 case ARPHRD_ETHER: 678 case ARPHRD_ETHER:
679 case ARPHRD_FDDI: 679 case ARPHRD_FDDI:
@@ -690,17 +690,17 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
690 if ((arp->ar_hrd != htons(ARPHRD_ETHER) && 690 if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
691 arp->ar_hrd != htons(ARPHRD_IEEE802)) || 691 arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
692 arp->ar_pro != htons(ETH_P_IP)) 692 arp->ar_pro != htons(ETH_P_IP))
693 goto out; 693 goto out_free_skb;
694 break; 694 break;
695 case ARPHRD_AX25: 695 case ARPHRD_AX25:
696 if (arp->ar_pro != htons(AX25_P_IP) || 696 if (arp->ar_pro != htons(AX25_P_IP) ||
697 arp->ar_hrd != htons(ARPHRD_AX25)) 697 arp->ar_hrd != htons(ARPHRD_AX25))
698 goto out; 698 goto out_free_skb;
699 break; 699 break;
700 case ARPHRD_NETROM: 700 case ARPHRD_NETROM:
701 if (arp->ar_pro != htons(AX25_P_IP) || 701 if (arp->ar_pro != htons(AX25_P_IP) ||
702 arp->ar_hrd != htons(ARPHRD_NETROM)) 702 arp->ar_hrd != htons(ARPHRD_NETROM))
703 goto out; 703 goto out_free_skb;
704 break; 704 break;
705 } 705 }
706 706
@@ -708,7 +708,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
708 708
709 if (arp->ar_op != htons(ARPOP_REPLY) && 709 if (arp->ar_op != htons(ARPOP_REPLY) &&
710 arp->ar_op != htons(ARPOP_REQUEST)) 710 arp->ar_op != htons(ARPOP_REQUEST))
711 goto out; 711 goto out_free_skb;
712 712
713/* 713/*
714 * Extract fields 714 * Extract fields
@@ -733,7 +733,15 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
733 */ 733 */
734 if (ipv4_is_multicast(tip) || 734 if (ipv4_is_multicast(tip) ||
735 (!IN_DEV_ROUTE_LOCALNET(in_dev) && ipv4_is_loopback(tip))) 735 (!IN_DEV_ROUTE_LOCALNET(in_dev) && ipv4_is_loopback(tip)))
736 goto out; 736 goto out_free_skb;
737
738 /*
739 * For some 802.11 wireless deployments (and possibly other networks),
740 * there will be an ARP proxy and gratuitous ARP frames are attacks
741 * and thus should not be accepted.
742 */
743 if (sip == tip && IN_DEV_ORCONF(in_dev, DROP_GRATUITOUS_ARP))
744 goto out_free_skb;
737 745
738/* 746/*
739 * Special case: We must set Frame Relay source Q.922 address 747 * Special case: We must set Frame Relay source Q.922 address
@@ -770,7 +778,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
770 !arp_ignore(in_dev, sip, tip)) 778 !arp_ignore(in_dev, sip, tip))
771 arp_send_dst(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, 779 arp_send_dst(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip,
772 sha, dev->dev_addr, sha, reply_dst); 780 sha, dev->dev_addr, sha, reply_dst);
773 goto out; 781 goto out_consume_skb;
774 } 782 }
775 783
776 if (arp->ar_op == htons(ARPOP_REQUEST) && 784 if (arp->ar_op == htons(ARPOP_REQUEST) &&
@@ -795,7 +803,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
795 neigh_release(n); 803 neigh_release(n);
796 } 804 }
797 } 805 }
798 goto out; 806 goto out_consume_skb;
799 } else if (IN_DEV_FORWARD(in_dev)) { 807 } else if (IN_DEV_FORWARD(in_dev)) {
800 if (addr_type == RTN_UNICAST && 808 if (addr_type == RTN_UNICAST &&
801 (arp_fwd_proxy(in_dev, dev, rt) || 809 (arp_fwd_proxy(in_dev, dev, rt) ||
@@ -818,7 +826,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
818 in_dev->arp_parms, skb); 826 in_dev->arp_parms, skb);
819 goto out_free_dst; 827 goto out_free_dst;
820 } 828 }
821 goto out; 829 goto out_consume_skb;
822 } 830 }
823 } 831 }
824 } 832 }
@@ -868,11 +876,16 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
868 neigh_release(n); 876 neigh_release(n);
869 } 877 }
870 878
871out: 879out_consume_skb:
872 consume_skb(skb); 880 consume_skb(skb);
881
873out_free_dst: 882out_free_dst:
874 dst_release(reply_dst); 883 dst_release(reply_dst);
875 return 0; 884 return NET_RX_SUCCESS;
885
886out_free_skb:
887 kfree_skb(skb);
888 return NET_RX_DROP;
876} 889}
877 890
878static void parp_redo(struct sk_buff *skb) 891static void parp_redo(struct sk_buff *skb)
@@ -916,11 +929,11 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
916 929
917consumeskb: 930consumeskb:
918 consume_skb(skb); 931 consume_skb(skb);
919 return 0; 932 return NET_RX_SUCCESS;
920freeskb: 933freeskb:
921 kfree_skb(skb); 934 kfree_skb(skb);
922out_of_mem: 935out_of_mem:
923 return 0; 936 return NET_RX_DROP;
924} 937}
925 938
926/* 939/*
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index f6303b17546b..e333bc86bd39 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -334,6 +334,9 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
334 334
335 ASSERT_RTNL(); 335 ASSERT_RTNL();
336 336
337 if (in_dev->dead)
338 goto no_promotions;
339
337 /* 1. Deleting primary ifaddr forces deletion all secondaries 340 /* 1. Deleting primary ifaddr forces deletion all secondaries
338 * unless alias promotion is set 341 * unless alias promotion is set
339 **/ 342 **/
@@ -380,6 +383,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
380 fib_del_ifaddr(ifa, ifa1); 383 fib_del_ifaddr(ifa, ifa1);
381 } 384 }
382 385
386no_promotions:
383 /* 2. Unlink it */ 387 /* 2. Unlink it */
384 388
385 *ifap = ifa1->ifa_next; 389 *ifap = ifa1->ifa_next;
@@ -1194,6 +1198,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1194 __be32 addr = 0; 1198 __be32 addr = 0;
1195 struct in_device *in_dev; 1199 struct in_device *in_dev;
1196 struct net *net = dev_net(dev); 1200 struct net *net = dev_net(dev);
1201 int master_idx;
1197 1202
1198 rcu_read_lock(); 1203 rcu_read_lock();
1199 in_dev = __in_dev_get_rcu(dev); 1204 in_dev = __in_dev_get_rcu(dev);
@@ -1214,12 +1219,33 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1214 if (addr) 1219 if (addr)
1215 goto out_unlock; 1220 goto out_unlock;
1216no_in_dev: 1221no_in_dev:
1222 master_idx = l3mdev_master_ifindex_rcu(dev);
1223
1224 /* For VRFs, the VRF device takes the place of the loopback device,
1225 * with addresses on it being preferred. Note in such cases the
1226 * loopback device will be among the devices that fail the master_idx
1227 * equality check in the loop below.
1228 */
1229 if (master_idx &&
1230 (dev = dev_get_by_index_rcu(net, master_idx)) &&
1231 (in_dev = __in_dev_get_rcu(dev))) {
1232 for_primary_ifa(in_dev) {
1233 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1234 ifa->ifa_scope <= scope) {
1235 addr = ifa->ifa_local;
1236 goto out_unlock;
1237 }
1238 } endfor_ifa(in_dev);
1239 }
1217 1240
1218 /* Not loopback addresses on loopback should be preferred 1241 /* Not loopback addresses on loopback should be preferred
1219 in this case. It is important that lo is the first interface 1242 in this case. It is important that lo is the first interface
1220 in dev_base list. 1243 in dev_base list.
1221 */ 1244 */
1222 for_each_netdev_rcu(net, dev) { 1245 for_each_netdev_rcu(net, dev) {
1246 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1247 continue;
1248
1223 in_dev = __in_dev_get_rcu(dev); 1249 in_dev = __in_dev_get_rcu(dev);
1224 if (!in_dev) 1250 if (!in_dev)
1225 continue; 1251 continue;
@@ -1731,17 +1757,20 @@ static int inet_netconf_msgsize_devconf(int type)
1731{ 1757{
1732 int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) 1758 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1733 + nla_total_size(4); /* NETCONFA_IFINDEX */ 1759 + nla_total_size(4); /* NETCONFA_IFINDEX */
1760 bool all = false;
1761
1762 if (type == NETCONFA_ALL)
1763 all = true;
1734 1764
1735 /* type -1 is used for ALL */ 1765 if (all || type == NETCONFA_FORWARDING)
1736 if (type == -1 || type == NETCONFA_FORWARDING)
1737 size += nla_total_size(4); 1766 size += nla_total_size(4);
1738 if (type == -1 || type == NETCONFA_RP_FILTER) 1767 if (all || type == NETCONFA_RP_FILTER)
1739 size += nla_total_size(4); 1768 size += nla_total_size(4);
1740 if (type == -1 || type == NETCONFA_MC_FORWARDING) 1769 if (all || type == NETCONFA_MC_FORWARDING)
1741 size += nla_total_size(4); 1770 size += nla_total_size(4);
1742 if (type == -1 || type == NETCONFA_PROXY_NEIGH) 1771 if (all || type == NETCONFA_PROXY_NEIGH)
1743 size += nla_total_size(4); 1772 size += nla_total_size(4);
1744 if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) 1773 if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1745 size += nla_total_size(4); 1774 size += nla_total_size(4);
1746 1775
1747 return size; 1776 return size;
@@ -1754,36 +1783,39 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1754{ 1783{
1755 struct nlmsghdr *nlh; 1784 struct nlmsghdr *nlh;
1756 struct netconfmsg *ncm; 1785 struct netconfmsg *ncm;
1786 bool all = false;
1757 1787
1758 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), 1788 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1759 flags); 1789 flags);
1760 if (!nlh) 1790 if (!nlh)
1761 return -EMSGSIZE; 1791 return -EMSGSIZE;
1762 1792
1793 if (type == NETCONFA_ALL)
1794 all = true;
1795
1763 ncm = nlmsg_data(nlh); 1796 ncm = nlmsg_data(nlh);
1764 ncm->ncm_family = AF_INET; 1797 ncm->ncm_family = AF_INET;
1765 1798
1766 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) 1799 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1767 goto nla_put_failure; 1800 goto nla_put_failure;
1768 1801
1769 /* type -1 is used for ALL */ 1802 if ((all || type == NETCONFA_FORWARDING) &&
1770 if ((type == -1 || type == NETCONFA_FORWARDING) &&
1771 nla_put_s32(skb, NETCONFA_FORWARDING, 1803 nla_put_s32(skb, NETCONFA_FORWARDING,
1772 IPV4_DEVCONF(*devconf, FORWARDING)) < 0) 1804 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1773 goto nla_put_failure; 1805 goto nla_put_failure;
1774 if ((type == -1 || type == NETCONFA_RP_FILTER) && 1806 if ((all || type == NETCONFA_RP_FILTER) &&
1775 nla_put_s32(skb, NETCONFA_RP_FILTER, 1807 nla_put_s32(skb, NETCONFA_RP_FILTER,
1776 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0) 1808 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1777 goto nla_put_failure; 1809 goto nla_put_failure;
1778 if ((type == -1 || type == NETCONFA_MC_FORWARDING) && 1810 if ((all || type == NETCONFA_MC_FORWARDING) &&
1779 nla_put_s32(skb, NETCONFA_MC_FORWARDING, 1811 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1780 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0) 1812 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1781 goto nla_put_failure; 1813 goto nla_put_failure;
1782 if ((type == -1 || type == NETCONFA_PROXY_NEIGH) && 1814 if ((all || type == NETCONFA_PROXY_NEIGH) &&
1783 nla_put_s32(skb, NETCONFA_PROXY_NEIGH, 1815 nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1784 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0) 1816 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1785 goto nla_put_failure; 1817 goto nla_put_failure;
1786 if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && 1818 if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1787 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, 1819 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1788 IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0) 1820 IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1789 goto nla_put_failure; 1821 goto nla_put_failure;
@@ -1871,14 +1903,14 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1871 } 1903 }
1872 1904
1873 err = -ENOBUFS; 1905 err = -ENOBUFS;
1874 skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC); 1906 skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_ATOMIC);
1875 if (!skb) 1907 if (!skb)
1876 goto errout; 1908 goto errout;
1877 1909
1878 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 1910 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1879 NETLINK_CB(in_skb).portid, 1911 NETLINK_CB(in_skb).portid,
1880 nlh->nlmsg_seq, RTM_NEWNETCONF, 0, 1912 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1881 -1); 1913 NETCONFA_ALL);
1882 if (err < 0) { 1914 if (err < 0) {
1883 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ 1915 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1884 WARN_ON(err == -EMSGSIZE); 1916 WARN_ON(err == -EMSGSIZE);
@@ -1922,7 +1954,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
1922 cb->nlh->nlmsg_seq, 1954 cb->nlh->nlmsg_seq,
1923 RTM_NEWNETCONF, 1955 RTM_NEWNETCONF,
1924 NLM_F_MULTI, 1956 NLM_F_MULTI,
1925 -1) < 0) { 1957 NETCONFA_ALL) < 0) {
1926 rcu_read_unlock(); 1958 rcu_read_unlock();
1927 goto done; 1959 goto done;
1928 } 1960 }
@@ -1938,7 +1970,7 @@ cont:
1938 NETLINK_CB(cb->skb).portid, 1970 NETLINK_CB(cb->skb).portid,
1939 cb->nlh->nlmsg_seq, 1971 cb->nlh->nlmsg_seq,
1940 RTM_NEWNETCONF, NLM_F_MULTI, 1972 RTM_NEWNETCONF, NLM_F_MULTI,
1941 -1) < 0) 1973 NETCONFA_ALL) < 0)
1942 goto done; 1974 goto done;
1943 else 1975 else
1944 h++; 1976 h++;
@@ -1949,7 +1981,7 @@ cont:
1949 NETLINK_CB(cb->skb).portid, 1981 NETLINK_CB(cb->skb).portid,
1950 cb->nlh->nlmsg_seq, 1982 cb->nlh->nlmsg_seq,
1951 RTM_NEWNETCONF, NLM_F_MULTI, 1983 RTM_NEWNETCONF, NLM_F_MULTI,
1952 -1) < 0) 1984 NETCONFA_ALL) < 0)
1953 goto done; 1985 goto done;
1954 else 1986 else
1955 h++; 1987 h++;
@@ -2185,6 +2217,8 @@ static struct devinet_sysctl_table {
2185 "igmpv3_unsolicited_report_interval"), 2217 "igmpv3_unsolicited_report_interval"),
2186 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN, 2218 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2187 "ignore_routes_with_linkdown"), 2219 "ignore_routes_with_linkdown"),
2220 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2221 "drop_gratuitous_arp"),
2188 2222
2189 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), 2223 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2190 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), 2224 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
@@ -2192,6 +2226,8 @@ static struct devinet_sysctl_table {
2192 "promote_secondaries"), 2226 "promote_secondaries"),
2193 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET, 2227 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2194 "route_localnet"), 2228 "route_localnet"),
2229 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2230 "drop_unicast_in_l2_multicast"),
2195 }, 2231 },
2196}; 2232};
2197 2233
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 473447593060..63566ec54794 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -280,7 +280,6 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
280 struct in_device *in_dev; 280 struct in_device *in_dev;
281 struct fib_result res; 281 struct fib_result res;
282 struct rtable *rt; 282 struct rtable *rt;
283 struct flowi4 fl4;
284 struct net *net; 283 struct net *net;
285 int scope; 284 int scope;
286 285
@@ -296,14 +295,13 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
296 295
297 scope = RT_SCOPE_UNIVERSE; 296 scope = RT_SCOPE_UNIVERSE;
298 if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { 297 if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
299 fl4.flowi4_oif = 0; 298 struct flowi4 fl4 = {
300 fl4.flowi4_iif = LOOPBACK_IFINDEX; 299 .flowi4_iif = LOOPBACK_IFINDEX,
301 fl4.daddr = ip_hdr(skb)->saddr; 300 .daddr = ip_hdr(skb)->saddr,
302 fl4.saddr = 0; 301 .flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
303 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); 302 .flowi4_scope = scope,
304 fl4.flowi4_scope = scope; 303 .flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0,
305 fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; 304 };
306 fl4.flowi4_tun_key.tun_id = 0;
307 if (!fib_lookup(net, &fl4, &res, 0)) 305 if (!fib_lookup(net, &fl4, &res, 0))
308 return FIB_RES_PREFSRC(net, res); 306 return FIB_RES_PREFSRC(net, res);
309 } else { 307 } else {
@@ -906,7 +904,11 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
906 if (ifa->ifa_flags & IFA_F_SECONDARY) { 904 if (ifa->ifa_flags & IFA_F_SECONDARY) {
907 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 905 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
908 if (!prim) { 906 if (!prim) {
909 pr_warn("%s: bug: prim == NULL\n", __func__); 907 /* if the device has been deleted, we don't perform
908 * address promotion
909 */
910 if (!in_dev->dead)
911 pr_warn("%s: bug: prim == NULL\n", __func__);
910 return; 912 return;
911 } 913 }
912 if (iprim && iprim != prim) { 914 if (iprim && iprim != prim) {
@@ -922,6 +924,9 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
922 subnet = 1; 924 subnet = 1;
923 } 925 }
924 926
927 if (in_dev->dead)
928 goto no_promotions;
929
925 /* Deletion is more complicated than add. 930 /* Deletion is more complicated than add.
926 * We should take care of not to delete too much :-) 931 * We should take care of not to delete too much :-)
927 * 932 *
@@ -997,6 +1002,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
997 } 1002 }
998 } 1003 }
999 1004
1005no_promotions:
1000 if (!(ok & BRD_OK)) 1006 if (!(ok & BRD_OK))
1001 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 1007 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
1002 if (subnet && ifa->ifa_prefixlen < 31) { 1008 if (subnet && ifa->ifa_prefixlen < 31) {
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d97268e8ff10..2b68418c7198 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -975,6 +975,8 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
975 val = 65535 - 40; 975 val = 65535 - 40;
976 if (type == RTAX_MTU && val > 65535 - 15) 976 if (type == RTAX_MTU && val > 65535 - 15)
977 val = 65535 - 15; 977 val = 65535 - 15;
978 if (type == RTAX_HOPLIMIT && val > 255)
979 val = 255;
978 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) 980 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
979 return -EINVAL; 981 return -EINVAL;
980 fi->fib_metrics[type - 1] = val; 982 fi->fib_metrics[type - 1] = val;
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 976f0dcf6991..a6962ccad98a 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -48,7 +48,7 @@ static inline struct fou *fou_from_sock(struct sock *sk)
48 return sk->sk_user_data; 48 return sk->sk_user_data;
49} 49}
50 50
51static void fou_recv_pull(struct sk_buff *skb, size_t len) 51static int fou_recv_pull(struct sk_buff *skb, size_t len)
52{ 52{
53 struct iphdr *iph = ip_hdr(skb); 53 struct iphdr *iph = ip_hdr(skb);
54 54
@@ -59,6 +59,7 @@ static void fou_recv_pull(struct sk_buff *skb, size_t len)
59 __skb_pull(skb, len); 59 __skb_pull(skb, len);
60 skb_postpull_rcsum(skb, udp_hdr(skb), len); 60 skb_postpull_rcsum(skb, udp_hdr(skb), len);
61 skb_reset_transport_header(skb); 61 skb_reset_transport_header(skb);
62 return iptunnel_pull_offloads(skb);
62} 63}
63 64
64static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) 65static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
@@ -68,9 +69,14 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
68 if (!fou) 69 if (!fou)
69 return 1; 70 return 1;
70 71
71 fou_recv_pull(skb, sizeof(struct udphdr)); 72 if (fou_recv_pull(skb, sizeof(struct udphdr)))
73 goto drop;
72 74
73 return -fou->protocol; 75 return -fou->protocol;
76
77drop:
78 kfree_skb(skb);
79 return 0;
74} 80}
75 81
76static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, 82static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
@@ -170,6 +176,9 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
170 __skb_pull(skb, sizeof(struct udphdr) + hdrlen); 176 __skb_pull(skb, sizeof(struct udphdr) + hdrlen);
171 skb_reset_transport_header(skb); 177 skb_reset_transport_header(skb);
172 178
179 if (iptunnel_pull_offloads(skb))
180 goto drop;
181
173 return -guehdr->proto_ctype; 182 return -guehdr->proto_ctype;
174 183
175drop: 184drop:
@@ -186,6 +195,17 @@ static struct sk_buff **fou_gro_receive(struct sk_buff **head,
186 u8 proto = NAPI_GRO_CB(skb)->proto; 195 u8 proto = NAPI_GRO_CB(skb)->proto;
187 const struct net_offload **offloads; 196 const struct net_offload **offloads;
188 197
198 /* We can clear the encap_mark for FOU as we are essentially doing
199 * one of two possible things. We are either adding an L4 tunnel
200 * header to the outer L3 tunnel header, or we are are simply
201 * treating the GRE tunnel header as though it is a UDP protocol
202 * specific header such as VXLAN or GENEVE.
203 */
204 NAPI_GRO_CB(skb)->encap_mark = 0;
205
206 /* Flag this frame as already having an outer encap header */
207 NAPI_GRO_CB(skb)->is_fou = 1;
208
189 rcu_read_lock(); 209 rcu_read_lock();
190 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; 210 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
191 ops = rcu_dereference(offloads[proto]); 211 ops = rcu_dereference(offloads[proto]);
@@ -208,8 +228,6 @@ static int fou_gro_complete(struct sk_buff *skb, int nhoff,
208 int err = -ENOSYS; 228 int err = -ENOSYS;
209 const struct net_offload **offloads; 229 const struct net_offload **offloads;
210 230
211 udp_tunnel_gro_complete(skb, nhoff);
212
213 rcu_read_lock(); 231 rcu_read_lock();
214 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; 232 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
215 ops = rcu_dereference(offloads[proto]); 233 ops = rcu_dereference(offloads[proto]);
@@ -218,6 +236,8 @@ static int fou_gro_complete(struct sk_buff *skb, int nhoff,
218 236
219 err = ops->callbacks.gro_complete(skb, nhoff); 237 err = ops->callbacks.gro_complete(skb, nhoff);
220 238
239 skb_set_inner_mac_header(skb, nhoff);
240
221out_unlock: 241out_unlock:
222 rcu_read_unlock(); 242 rcu_read_unlock();
223 243
@@ -319,8 +339,6 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
319 339
320 skb_gro_pull(skb, hdrlen); 340 skb_gro_pull(skb, hdrlen);
321 341
322 flush = 0;
323
324 for (p = *head; p; p = p->next) { 342 for (p = *head; p; p = p->next) {
325 const struct guehdr *guehdr2; 343 const struct guehdr *guehdr2;
326 344
@@ -345,6 +363,17 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
345 } 363 }
346 } 364 }
347 365
366 /* We can clear the encap_mark for GUE as we are essentially doing
367 * one of two possible things. We are either adding an L4 tunnel
368 * header to the outer L3 tunnel header, or we are are simply
369 * treating the GRE tunnel header as though it is a UDP protocol
370 * specific header such as VXLAN or GENEVE.
371 */
372 NAPI_GRO_CB(skb)->encap_mark = 0;
373
374 /* Flag this frame as already having an outer encap header */
375 NAPI_GRO_CB(skb)->is_fou = 1;
376
348 rcu_read_lock(); 377 rcu_read_lock();
349 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; 378 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
350 ops = rcu_dereference(offloads[guehdr->proto_ctype]); 379 ops = rcu_dereference(offloads[guehdr->proto_ctype]);
@@ -352,6 +381,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
352 goto out_unlock; 381 goto out_unlock;
353 382
354 pp = ops->callbacks.gro_receive(head, skb); 383 pp = ops->callbacks.gro_receive(head, skb);
384 flush = 0;
355 385
356out_unlock: 386out_unlock:
357 rcu_read_unlock(); 387 rcu_read_unlock();
@@ -384,6 +414,8 @@ static int gue_gro_complete(struct sk_buff *skb, int nhoff,
384 414
385 err = ops->callbacks.gro_complete(skb, nhoff + guehlen); 415 err = ops->callbacks.gro_complete(skb, nhoff + guehlen);
386 416
417 skb_set_inner_mac_header(skb, nhoff + guehlen);
418
387out_unlock: 419out_unlock:
388 rcu_read_unlock(); 420 rcu_read_unlock();
389 return err; 421 return err;
@@ -774,7 +806,6 @@ static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
774 uh->dest = e->dport; 806 uh->dest = e->dport;
775 uh->source = sport; 807 uh->source = sport;
776 uh->len = htons(skb->len); 808 uh->len = htons(skb->len);
777 uh->check = 0;
778 udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb, 809 udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
779 fl4->saddr, fl4->daddr, skb->len); 810 fl4->saddr, fl4->daddr, skb->len);
780 811
@@ -784,11 +815,11 @@ static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
784int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, 815int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
785 u8 *protocol, struct flowi4 *fl4) 816 u8 *protocol, struct flowi4 *fl4)
786{ 817{
787 bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM); 818 int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
788 int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; 819 SKB_GSO_UDP_TUNNEL;
789 __be16 sport; 820 __be16 sport;
790 821
791 skb = iptunnel_handle_offloads(skb, csum, type); 822 skb = iptunnel_handle_offloads(skb, type);
792 823
793 if (IS_ERR(skb)) 824 if (IS_ERR(skb))
794 return PTR_ERR(skb); 825 return PTR_ERR(skb);
@@ -804,8 +835,8 @@ EXPORT_SYMBOL(fou_build_header);
804int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, 835int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
805 u8 *protocol, struct flowi4 *fl4) 836 u8 *protocol, struct flowi4 *fl4)
806{ 837{
807 bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM); 838 int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
808 int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; 839 SKB_GSO_UDP_TUNNEL;
809 struct guehdr *guehdr; 840 struct guehdr *guehdr;
810 size_t hdrlen, optlen = 0; 841 size_t hdrlen, optlen = 0;
811 __be16 sport; 842 __be16 sport;
@@ -814,7 +845,6 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
814 845
815 if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) && 846 if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) &&
816 skb->ip_summed == CHECKSUM_PARTIAL) { 847 skb->ip_summed == CHECKSUM_PARTIAL) {
817 csum = false;
818 optlen += GUE_PLEN_REMCSUM; 848 optlen += GUE_PLEN_REMCSUM;
819 type |= SKB_GSO_TUNNEL_REMCSUM; 849 type |= SKB_GSO_TUNNEL_REMCSUM;
820 need_priv = true; 850 need_priv = true;
@@ -822,7 +852,7 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
822 852
823 optlen += need_priv ? GUE_LEN_PRIV : 0; 853 optlen += need_priv ? GUE_LEN_PRIV : 0;
824 854
825 skb = iptunnel_handle_offloads(skb, csum, type); 855 skb = iptunnel_handle_offloads(skb, type);
826 856
827 if (IS_ERR(skb)) 857 if (IS_ERR(skb))
828 return PTR_ERR(skb); 858 return PTR_ERR(skb);
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 5a8ee3282550..6a5bd4317866 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -18,15 +18,13 @@
18static struct sk_buff *gre_gso_segment(struct sk_buff *skb, 18static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
19 netdev_features_t features) 19 netdev_features_t features)
20{ 20{
21 int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
21 struct sk_buff *segs = ERR_PTR(-EINVAL); 22 struct sk_buff *segs = ERR_PTR(-EINVAL);
22 netdev_features_t enc_features;
23 int ghl;
24 struct gre_base_hdr *greh;
25 u16 mac_offset = skb->mac_header; 23 u16 mac_offset = skb->mac_header;
26 int mac_len = skb->mac_len;
27 __be16 protocol = skb->protocol; 24 __be16 protocol = skb->protocol;
28 int tnl_hlen; 25 u16 mac_len = skb->mac_len;
29 bool csum; 26 int gre_offset, outer_hlen;
27 bool need_csum, ufo;
30 28
31 if (unlikely(skb_shinfo(skb)->gso_type & 29 if (unlikely(skb_shinfo(skb)->gso_type &
32 ~(SKB_GSO_TCPV4 | 30 ~(SKB_GSO_TCPV4 |
@@ -43,74 +41,75 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
43 if (!skb->encapsulation) 41 if (!skb->encapsulation)
44 goto out; 42 goto out;
45 43
46 if (unlikely(!pskb_may_pull(skb, sizeof(*greh)))) 44 if (unlikely(tnl_hlen < sizeof(struct gre_base_hdr)))
47 goto out; 45 goto out;
48 46
49 greh = (struct gre_base_hdr *)skb_transport_header(skb); 47 if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
50
51 ghl = skb_inner_mac_header(skb) - skb_transport_header(skb);
52 if (unlikely(ghl < sizeof(*greh)))
53 goto out; 48 goto out;
54 49
55 csum = !!(greh->flags & GRE_CSUM);
56 if (csum)
57 skb->encap_hdr_csum = 1;
58
59 /* setup inner skb. */ 50 /* setup inner skb. */
60 skb->protocol = greh->protocol;
61 skb->encapsulation = 0; 51 skb->encapsulation = 0;
62 52 SKB_GSO_CB(skb)->encap_level = 0;
63 if (unlikely(!pskb_may_pull(skb, ghl))) 53 __skb_pull(skb, tnl_hlen);
64 goto out;
65
66 __skb_pull(skb, ghl);
67 skb_reset_mac_header(skb); 54 skb_reset_mac_header(skb);
68 skb_set_network_header(skb, skb_inner_network_offset(skb)); 55 skb_set_network_header(skb, skb_inner_network_offset(skb));
69 skb->mac_len = skb_inner_network_offset(skb); 56 skb->mac_len = skb_inner_network_offset(skb);
57 skb->protocol = skb->inner_protocol;
58
59 need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM);
60 skb->encap_hdr_csum = need_csum;
61
62 ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
63
64 features &= skb->dev->hw_enc_features;
65
66 /* The only checksum offload we care about from here on out is the
67 * outer one so strip the existing checksum feature flags based
68 * on the fact that we will be computing our checksum in software.
69 */
70 if (ufo) {
71 features &= ~NETIF_F_CSUM_MASK;
72 if (!need_csum)
73 features |= NETIF_F_HW_CSUM;
74 }
70 75
71 /* segment inner packet. */ 76 /* segment inner packet. */
72 enc_features = skb->dev->hw_enc_features & features; 77 segs = skb_mac_gso_segment(skb, features);
73 segs = skb_mac_gso_segment(skb, enc_features);
74 if (IS_ERR_OR_NULL(segs)) { 78 if (IS_ERR_OR_NULL(segs)) {
75 skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len); 79 skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
80 mac_len);
76 goto out; 81 goto out;
77 } 82 }
78 83
84 outer_hlen = skb_tnl_header_len(skb);
85 gre_offset = outer_hlen - tnl_hlen;
79 skb = segs; 86 skb = segs;
80 tnl_hlen = skb_tnl_header_len(skb);
81 do { 87 do {
82 __skb_push(skb, ghl); 88 struct gre_base_hdr *greh;
83 if (csum) { 89 __be32 *pcsum;
84 __be32 *pcsum;
85
86 if (skb_has_shared_frag(skb)) {
87 int err;
88
89 err = __skb_linearize(skb);
90 if (err) {
91 kfree_skb_list(segs);
92 segs = ERR_PTR(err);
93 goto out;
94 }
95 }
96 90
97 skb_reset_transport_header(skb); 91 /* Set up inner headers if we are offloading inner checksum */
98 92 if (skb->ip_summed == CHECKSUM_PARTIAL) {
99 greh = (struct gre_base_hdr *) 93 skb_reset_inner_headers(skb);
100 skb_transport_header(skb); 94 skb->encapsulation = 1;
101 pcsum = (__be32 *)(greh + 1);
102 *pcsum = 0;
103 *(__sum16 *)pcsum = gso_make_checksum(skb, 0);
104 } 95 }
105 __skb_push(skb, tnl_hlen - ghl);
106 96
107 skb_reset_inner_headers(skb); 97 skb->mac_len = mac_len;
108 skb->encapsulation = 1; 98 skb->protocol = protocol;
109 99
100 __skb_push(skb, outer_hlen);
110 skb_reset_mac_header(skb); 101 skb_reset_mac_header(skb);
111 skb_set_network_header(skb, mac_len); 102 skb_set_network_header(skb, mac_len);
112 skb->mac_len = mac_len; 103 skb_set_transport_header(skb, gre_offset);
113 skb->protocol = protocol; 104
105 if (!need_csum)
106 continue;
107
108 greh = (struct gre_base_hdr *)skb_transport_header(skb);
109 pcsum = (__be32 *)(greh + 1);
110
111 *pcsum = 0;
112 *(__sum16 *)pcsum = gso_make_checksum(skb, 0);
114 } while ((skb = skb->next)); 113 } while ((skb = skb->next));
115out: 114out:
116 return segs; 115 return segs;
@@ -128,6 +127,11 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
128 struct packet_offload *ptype; 127 struct packet_offload *ptype;
129 __be16 type; 128 __be16 type;
130 129
130 if (NAPI_GRO_CB(skb)->encap_mark)
131 goto out;
132
133 NAPI_GRO_CB(skb)->encap_mark = 1;
134
131 off = skb_gro_offset(skb); 135 off = skb_gro_offset(skb);
132 hlen = off + sizeof(*greh); 136 hlen = off + sizeof(*greh);
133 greh = skb_gro_header_fast(skb, off); 137 greh = skb_gro_header_fast(skb, off);
@@ -146,6 +150,14 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
146 if ((greh->flags & ~(GRE_KEY|GRE_CSUM)) != 0) 150 if ((greh->flags & ~(GRE_KEY|GRE_CSUM)) != 0)
147 goto out; 151 goto out;
148 152
153 /* We can only support GRE_CSUM if we can track the location of
154 * the GRE header. In the case of FOU/GUE we cannot because the
155 * outer UDP header displaces the GRE header leaving us in a state
156 * of limbo.
157 */
158 if ((greh->flags & GRE_CSUM) && NAPI_GRO_CB(skb)->is_fou)
159 goto out;
160
149 type = greh->protocol; 161 type = greh->protocol;
150 162
151 rcu_read_lock(); 163 rcu_read_lock();
@@ -177,8 +189,6 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
177 null_compute_pseudo); 189 null_compute_pseudo);
178 } 190 }
179 191
180 flush = 0;
181
182 for (p = *head; p; p = p->next) { 192 for (p = *head; p; p = p->next) {
183 const struct gre_base_hdr *greh2; 193 const struct gre_base_hdr *greh2;
184 194
@@ -215,6 +225,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
215 skb_gro_postpull_rcsum(skb, greh, grehlen); 225 skb_gro_postpull_rcsum(skb, greh, grehlen);
216 226
217 pp = ptype->callbacks.gro_receive(head, skb); 227 pp = ptype->callbacks.gro_receive(head, skb);
228 flush = 0;
218 229
219out_unlock: 230out_unlock:
220 rcu_read_unlock(); 231 rcu_read_unlock();
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 36e26977c908..6333489771ed 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -931,7 +931,6 @@ static bool icmp_echo(struct sk_buff *skb)
931 */ 931 */
932static bool icmp_timestamp(struct sk_buff *skb) 932static bool icmp_timestamp(struct sk_buff *skb)
933{ 933{
934 struct timespec tv;
935 struct icmp_bxm icmp_param; 934 struct icmp_bxm icmp_param;
936 /* 935 /*
937 * Too short. 936 * Too short.
@@ -942,9 +941,7 @@ static bool icmp_timestamp(struct sk_buff *skb)
942 /* 941 /*
943 * Fill in the current time as ms since midnight UT: 942 * Fill in the current time as ms since midnight UT:
944 */ 943 */
945 getnstimeofday(&tv); 944 icmp_param.data.times[1] = inet_current_timestamp();
946 icmp_param.data.times[1] = htonl((tv.tv_sec % 86400) * MSEC_PER_SEC +
947 tv.tv_nsec / NSEC_PER_MSEC);
948 icmp_param.data.times[2] = icmp_param.data.times[1]; 945 icmp_param.data.times[2] = icmp_param.data.times[1];
949 if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4)) 946 if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4))
950 BUG(); 947 BUG();
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index b3086cf27027..9b4ca87f70ba 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -107,12 +107,6 @@
107#include <linux/seq_file.h> 107#include <linux/seq_file.h>
108#endif 108#endif
109 109
110#define IP_MAX_MEMBERSHIPS 20
111#define IP_MAX_MSF 10
112
113/* IGMP reports for link-local multicast groups are enabled by default */
114int sysctl_igmp_llm_reports __read_mostly = 1;
115
116#ifdef CONFIG_IP_MULTICAST 110#ifdef CONFIG_IP_MULTICAST
117/* Parameter names and values are taken from igmp-v2-06 draft */ 111/* Parameter names and values are taken from igmp-v2-06 draft */
118 112
@@ -432,6 +426,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
432 int type, int gdeleted, int sdeleted) 426 int type, int gdeleted, int sdeleted)
433{ 427{
434 struct net_device *dev = pmc->interface->dev; 428 struct net_device *dev = pmc->interface->dev;
429 struct net *net = dev_net(dev);
435 struct igmpv3_report *pih; 430 struct igmpv3_report *pih;
436 struct igmpv3_grec *pgr = NULL; 431 struct igmpv3_grec *pgr = NULL;
437 struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list; 432 struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
@@ -439,7 +434,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
439 434
440 if (pmc->multiaddr == IGMP_ALL_HOSTS) 435 if (pmc->multiaddr == IGMP_ALL_HOSTS)
441 return skb; 436 return skb;
442 if (ipv4_is_local_multicast(pmc->multiaddr) && !sysctl_igmp_llm_reports) 437 if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
443 return skb; 438 return skb;
444 439
445 isquery = type == IGMPV3_MODE_IS_INCLUDE || 440 isquery = type == IGMPV3_MODE_IS_INCLUDE ||
@@ -542,6 +537,7 @@ empty_source:
542static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) 537static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
543{ 538{
544 struct sk_buff *skb = NULL; 539 struct sk_buff *skb = NULL;
540 struct net *net = dev_net(in_dev->dev);
545 int type; 541 int type;
546 542
547 if (!pmc) { 543 if (!pmc) {
@@ -550,7 +546,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
550 if (pmc->multiaddr == IGMP_ALL_HOSTS) 546 if (pmc->multiaddr == IGMP_ALL_HOSTS)
551 continue; 547 continue;
552 if (ipv4_is_local_multicast(pmc->multiaddr) && 548 if (ipv4_is_local_multicast(pmc->multiaddr) &&
553 !sysctl_igmp_llm_reports) 549 !net->ipv4.sysctl_igmp_llm_reports)
554 continue; 550 continue;
555 spin_lock_bh(&pmc->lock); 551 spin_lock_bh(&pmc->lock);
556 if (pmc->sfcount[MCAST_EXCLUDE]) 552 if (pmc->sfcount[MCAST_EXCLUDE])
@@ -686,7 +682,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
686 if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) 682 if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
687 return igmpv3_send_report(in_dev, pmc); 683 return igmpv3_send_report(in_dev, pmc);
688 684
689 if (ipv4_is_local_multicast(group) && !sysctl_igmp_llm_reports) 685 if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
690 return 0; 686 return 0;
691 687
692 if (type == IGMP_HOST_LEAVE_MESSAGE) 688 if (type == IGMP_HOST_LEAVE_MESSAGE)
@@ -765,9 +761,10 @@ static void igmp_ifc_timer_expire(unsigned long data)
765 761
766static void igmp_ifc_event(struct in_device *in_dev) 762static void igmp_ifc_event(struct in_device *in_dev)
767{ 763{
764 struct net *net = dev_net(in_dev->dev);
768 if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) 765 if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
769 return; 766 return;
770 in_dev->mr_ifc_count = in_dev->mr_qrv ?: sysctl_igmp_qrv; 767 in_dev->mr_ifc_count = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
771 igmp_ifc_start_timer(in_dev, 1); 768 igmp_ifc_start_timer(in_dev, 1);
772} 769}
773 770
@@ -857,12 +854,13 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
857static bool igmp_heard_report(struct in_device *in_dev, __be32 group) 854static bool igmp_heard_report(struct in_device *in_dev, __be32 group)
858{ 855{
859 struct ip_mc_list *im; 856 struct ip_mc_list *im;
857 struct net *net = dev_net(in_dev->dev);
860 858
861 /* Timers are only set for non-local groups */ 859 /* Timers are only set for non-local groups */
862 860
863 if (group == IGMP_ALL_HOSTS) 861 if (group == IGMP_ALL_HOSTS)
864 return false; 862 return false;
865 if (ipv4_is_local_multicast(group) && !sysctl_igmp_llm_reports) 863 if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
866 return false; 864 return false;
867 865
868 rcu_read_lock(); 866 rcu_read_lock();
@@ -886,6 +884,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
886 __be32 group = ih->group; 884 __be32 group = ih->group;
887 int max_delay; 885 int max_delay;
888 int mark = 0; 886 int mark = 0;
887 struct net *net = dev_net(in_dev->dev);
889 888
890 889
891 if (len == 8) { 890 if (len == 8) {
@@ -971,7 +970,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
971 if (im->multiaddr == IGMP_ALL_HOSTS) 970 if (im->multiaddr == IGMP_ALL_HOSTS)
972 continue; 971 continue;
973 if (ipv4_is_local_multicast(im->multiaddr) && 972 if (ipv4_is_local_multicast(im->multiaddr) &&
974 !sysctl_igmp_llm_reports) 973 !net->ipv4.sysctl_igmp_llm_reports)
975 continue; 974 continue;
976 spin_lock_bh(&im->lock); 975 spin_lock_bh(&im->lock);
977 if (im->tm_running) 976 if (im->tm_running)
@@ -1087,6 +1086,7 @@ static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr)
1087static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) 1086static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
1088{ 1087{
1089 struct ip_mc_list *pmc; 1088 struct ip_mc_list *pmc;
1089 struct net *net = dev_net(in_dev->dev);
1090 1090
1091 /* this is an "ip_mc_list" for convenience; only the fields below 1091 /* this is an "ip_mc_list" for convenience; only the fields below
1092 * are actually used. In particular, the refcnt and users are not 1092 * are actually used. In particular, the refcnt and users are not
@@ -1101,7 +1101,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
1101 pmc->interface = im->interface; 1101 pmc->interface = im->interface;
1102 in_dev_hold(in_dev); 1102 in_dev_hold(in_dev);
1103 pmc->multiaddr = im->multiaddr; 1103 pmc->multiaddr = im->multiaddr;
1104 pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; 1104 pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
1105 pmc->sfmode = im->sfmode; 1105 pmc->sfmode = im->sfmode;
1106 if (pmc->sfmode == MCAST_INCLUDE) { 1106 if (pmc->sfmode == MCAST_INCLUDE) {
1107 struct ip_sf_list *psf; 1107 struct ip_sf_list *psf;
@@ -1186,6 +1186,7 @@ static void igmp_group_dropped(struct ip_mc_list *im)
1186{ 1186{
1187 struct in_device *in_dev = im->interface; 1187 struct in_device *in_dev = im->interface;
1188#ifdef CONFIG_IP_MULTICAST 1188#ifdef CONFIG_IP_MULTICAST
1189 struct net *net = dev_net(in_dev->dev);
1189 int reporter; 1190 int reporter;
1190#endif 1191#endif
1191 1192
@@ -1197,7 +1198,7 @@ static void igmp_group_dropped(struct ip_mc_list *im)
1197#ifdef CONFIG_IP_MULTICAST 1198#ifdef CONFIG_IP_MULTICAST
1198 if (im->multiaddr == IGMP_ALL_HOSTS) 1199 if (im->multiaddr == IGMP_ALL_HOSTS)
1199 return; 1200 return;
1200 if (ipv4_is_local_multicast(im->multiaddr) && !sysctl_igmp_llm_reports) 1201 if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
1201 return; 1202 return;
1202 1203
1203 reporter = im->reporter; 1204 reporter = im->reporter;
@@ -1222,6 +1223,9 @@ static void igmp_group_dropped(struct ip_mc_list *im)
1222static void igmp_group_added(struct ip_mc_list *im) 1223static void igmp_group_added(struct ip_mc_list *im)
1223{ 1224{
1224 struct in_device *in_dev = im->interface; 1225 struct in_device *in_dev = im->interface;
1226#ifdef CONFIG_IP_MULTICAST
1227 struct net *net = dev_net(in_dev->dev);
1228#endif
1225 1229
1226 if (im->loaded == 0) { 1230 if (im->loaded == 0) {
1227 im->loaded = 1; 1231 im->loaded = 1;
@@ -1231,7 +1235,7 @@ static void igmp_group_added(struct ip_mc_list *im)
1231#ifdef CONFIG_IP_MULTICAST 1235#ifdef CONFIG_IP_MULTICAST
1232 if (im->multiaddr == IGMP_ALL_HOSTS) 1236 if (im->multiaddr == IGMP_ALL_HOSTS)
1233 return; 1237 return;
1234 if (ipv4_is_local_multicast(im->multiaddr) && !sysctl_igmp_llm_reports) 1238 if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
1235 return; 1239 return;
1236 1240
1237 if (in_dev->dead) 1241 if (in_dev->dead)
@@ -1244,7 +1248,7 @@ static void igmp_group_added(struct ip_mc_list *im)
1244 } 1248 }
1245 /* else, v3 */ 1249 /* else, v3 */
1246 1250
1247 im->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; 1251 im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
1248 igmp_ifc_event(in_dev); 1252 igmp_ifc_event(in_dev);
1249#endif 1253#endif
1250} 1254}
@@ -1313,6 +1317,9 @@ static void ip_mc_hash_remove(struct in_device *in_dev,
1313void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) 1317void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1314{ 1318{
1315 struct ip_mc_list *im; 1319 struct ip_mc_list *im;
1320#ifdef CONFIG_IP_MULTICAST
1321 struct net *net = dev_net(in_dev->dev);
1322#endif
1316 1323
1317 ASSERT_RTNL(); 1324 ASSERT_RTNL();
1318 1325
@@ -1339,7 +1346,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1339 spin_lock_init(&im->lock); 1346 spin_lock_init(&im->lock);
1340#ifdef CONFIG_IP_MULTICAST 1347#ifdef CONFIG_IP_MULTICAST
1341 setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im); 1348 setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im);
1342 im->unsolicit_count = sysctl_igmp_qrv; 1349 im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
1343#endif 1350#endif
1344 1351
1345 im->next_rcu = in_dev->mc_list; 1352 im->next_rcu = in_dev->mc_list;
@@ -1532,6 +1539,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev)
1532#ifdef CONFIG_IP_MULTICAST 1539#ifdef CONFIG_IP_MULTICAST
1533 struct ip_mc_list *im; 1540 struct ip_mc_list *im;
1534 int type; 1541 int type;
1542 struct net *net = dev_net(in_dev->dev);
1535 1543
1536 ASSERT_RTNL(); 1544 ASSERT_RTNL();
1537 1545
@@ -1539,7 +1547,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev)
1539 if (im->multiaddr == IGMP_ALL_HOSTS) 1547 if (im->multiaddr == IGMP_ALL_HOSTS)
1540 continue; 1548 continue;
1541 if (ipv4_is_local_multicast(im->multiaddr) && 1549 if (ipv4_is_local_multicast(im->multiaddr) &&
1542 !sysctl_igmp_llm_reports) 1550 !net->ipv4.sysctl_igmp_llm_reports)
1543 continue; 1551 continue;
1544 1552
1545 /* a failover is happening and switches 1553 /* a failover is happening and switches
@@ -1638,6 +1646,9 @@ void ip_mc_down(struct in_device *in_dev)
1638 1646
1639void ip_mc_init_dev(struct in_device *in_dev) 1647void ip_mc_init_dev(struct in_device *in_dev)
1640{ 1648{
1649#ifdef CONFIG_IP_MULTICAST
1650 struct net *net = dev_net(in_dev->dev);
1651#endif
1641 ASSERT_RTNL(); 1652 ASSERT_RTNL();
1642 1653
1643#ifdef CONFIG_IP_MULTICAST 1654#ifdef CONFIG_IP_MULTICAST
@@ -1645,7 +1656,7 @@ void ip_mc_init_dev(struct in_device *in_dev)
1645 (unsigned long)in_dev); 1656 (unsigned long)in_dev);
1646 setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, 1657 setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire,
1647 (unsigned long)in_dev); 1658 (unsigned long)in_dev);
1648 in_dev->mr_qrv = sysctl_igmp_qrv; 1659 in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
1649#endif 1660#endif
1650 1661
1651 spin_lock_init(&in_dev->mc_tomb_lock); 1662 spin_lock_init(&in_dev->mc_tomb_lock);
@@ -1656,11 +1667,14 @@ void ip_mc_init_dev(struct in_device *in_dev)
1656void ip_mc_up(struct in_device *in_dev) 1667void ip_mc_up(struct in_device *in_dev)
1657{ 1668{
1658 struct ip_mc_list *pmc; 1669 struct ip_mc_list *pmc;
1670#ifdef CONFIG_IP_MULTICAST
1671 struct net *net = dev_net(in_dev->dev);
1672#endif
1659 1673
1660 ASSERT_RTNL(); 1674 ASSERT_RTNL();
1661 1675
1662#ifdef CONFIG_IP_MULTICAST 1676#ifdef CONFIG_IP_MULTICAST
1663 in_dev->mr_qrv = sysctl_igmp_qrv; 1677 in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
1664#endif 1678#endif
1665 ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); 1679 ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
1666 1680
@@ -1726,11 +1740,6 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
1726/* 1740/*
1727 * Join a socket to a group 1741 * Join a socket to a group
1728 */ 1742 */
1729int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS;
1730int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF;
1731#ifdef CONFIG_IP_MULTICAST
1732int sysctl_igmp_qrv __read_mostly = IGMP_QUERY_ROBUSTNESS_VARIABLE;
1733#endif
1734 1743
1735static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, 1744static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
1736 __be32 *psfsrc) 1745 __be32 *psfsrc)
@@ -1755,6 +1764,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
1755 if (!psf->sf_count[MCAST_INCLUDE] && !psf->sf_count[MCAST_EXCLUDE]) { 1764 if (!psf->sf_count[MCAST_INCLUDE] && !psf->sf_count[MCAST_EXCLUDE]) {
1756#ifdef CONFIG_IP_MULTICAST 1765#ifdef CONFIG_IP_MULTICAST
1757 struct in_device *in_dev = pmc->interface; 1766 struct in_device *in_dev = pmc->interface;
1767 struct net *net = dev_net(in_dev->dev);
1758#endif 1768#endif
1759 1769
1760 /* no more filters for this source */ 1770 /* no more filters for this source */
@@ -1765,7 +1775,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
1765#ifdef CONFIG_IP_MULTICAST 1775#ifdef CONFIG_IP_MULTICAST
1766 if (psf->sf_oldin && 1776 if (psf->sf_oldin &&
1767 !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) { 1777 !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
1768 psf->sf_crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; 1778 psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
1769 psf->sf_next = pmc->tomb; 1779 psf->sf_next = pmc->tomb;
1770 pmc->tomb = psf; 1780 pmc->tomb = psf;
1771 rv = 1; 1781 rv = 1;
@@ -1823,12 +1833,13 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
1823 pmc->sfcount[MCAST_INCLUDE]) { 1833 pmc->sfcount[MCAST_INCLUDE]) {
1824#ifdef CONFIG_IP_MULTICAST 1834#ifdef CONFIG_IP_MULTICAST
1825 struct ip_sf_list *psf; 1835 struct ip_sf_list *psf;
1836 struct net *net = dev_net(in_dev->dev);
1826#endif 1837#endif
1827 1838
1828 /* filter mode change */ 1839 /* filter mode change */
1829 pmc->sfmode = MCAST_INCLUDE; 1840 pmc->sfmode = MCAST_INCLUDE;
1830#ifdef CONFIG_IP_MULTICAST 1841#ifdef CONFIG_IP_MULTICAST
1831 pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; 1842 pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
1832 in_dev->mr_ifc_count = pmc->crcount; 1843 in_dev->mr_ifc_count = pmc->crcount;
1833 for (psf = pmc->sources; psf; psf = psf->sf_next) 1844 for (psf = pmc->sources; psf; psf = psf->sf_next)
1834 psf->sf_crcount = 0; 1845 psf->sf_crcount = 0;
@@ -1995,6 +2006,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
1995 } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) { 2006 } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) {
1996#ifdef CONFIG_IP_MULTICAST 2007#ifdef CONFIG_IP_MULTICAST
1997 struct ip_sf_list *psf; 2008 struct ip_sf_list *psf;
2009 struct net *net = dev_net(pmc->interface->dev);
1998 in_dev = pmc->interface; 2010 in_dev = pmc->interface;
1999#endif 2011#endif
2000 2012
@@ -2006,7 +2018,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
2006#ifdef CONFIG_IP_MULTICAST 2018#ifdef CONFIG_IP_MULTICAST
2007 /* else no filters; keep old mode for reports */ 2019 /* else no filters; keep old mode for reports */
2008 2020
2009 pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; 2021 pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
2010 in_dev->mr_ifc_count = pmc->crcount; 2022 in_dev->mr_ifc_count = pmc->crcount;
2011 for (psf = pmc->sources; psf; psf = psf->sf_next) 2023 for (psf = pmc->sources; psf; psf = psf->sf_next)
2012 psf->sf_crcount = 0; 2024 psf->sf_crcount = 0;
@@ -2073,7 +2085,7 @@ int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
2073 count++; 2085 count++;
2074 } 2086 }
2075 err = -ENOBUFS; 2087 err = -ENOBUFS;
2076 if (count >= sysctl_igmp_max_memberships) 2088 if (count >= net->ipv4.sysctl_igmp_max_memberships)
2077 goto done; 2089 goto done;
2078 iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL); 2090 iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL);
2079 if (!iml) 2091 if (!iml)
@@ -2245,7 +2257,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
2245 } 2257 }
2246 /* else, add a new source to the filter */ 2258 /* else, add a new source to the filter */
2247 2259
2248 if (psl && psl->sl_count >= sysctl_igmp_max_msf) { 2260 if (psl && psl->sl_count >= net->ipv4.sysctl_igmp_max_msf) {
2249 err = -ENOBUFS; 2261 err = -ENOBUFS;
2250 goto done; 2262 goto done;
2251 } 2263 }
@@ -2918,6 +2930,12 @@ static int __net_init igmp_net_init(struct net *net)
2918 goto out_sock; 2930 goto out_sock;
2919 } 2931 }
2920 2932
2933 /* Sysctl initialization */
2934 net->ipv4.sysctl_igmp_max_memberships = 20;
2935 net->ipv4.sysctl_igmp_max_msf = 10;
2936 /* IGMP reports for link-local multicast groups are enabled by default */
2937 net->ipv4.sysctl_igmp_llm_reports = 1;
2938 net->ipv4.sysctl_igmp_qrv = 2;
2921 return 0; 2939 return 0;
2922 2940
2923out_sock: 2941out_sock:
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 64148914803a..bc5196ea1bdf 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -24,6 +24,7 @@
24#include <net/tcp_states.h> 24#include <net/tcp_states.h>
25#include <net/xfrm.h> 25#include <net/xfrm.h>
26#include <net/tcp.h> 26#include <net/tcp.h>
27#include <net/sock_reuseport.h>
27 28
28#ifdef INET_CSK_DEBUG 29#ifdef INET_CSK_DEBUG
29const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; 30const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
@@ -67,7 +68,8 @@ int inet_csk_bind_conflict(const struct sock *sk,
67 if ((!reuse || !sk2->sk_reuse || 68 if ((!reuse || !sk2->sk_reuse ||
68 sk2->sk_state == TCP_LISTEN) && 69 sk2->sk_state == TCP_LISTEN) &&
69 (!reuseport || !sk2->sk_reuseport || 70 (!reuseport || !sk2->sk_reuseport ||
70 (sk2->sk_state != TCP_TIME_WAIT && 71 rcu_access_pointer(sk->sk_reuseport_cb) ||
72 (sk2->sk_state != TCP_TIME_WAIT &&
71 !uid_eq(uid, sock_i_uid(sk2))))) { 73 !uid_eq(uid, sock_i_uid(sk2))))) {
72 74
73 if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr || 75 if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
@@ -89,161 +91,154 @@ EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
89 91
90/* Obtain a reference to a local port for the given sock, 92/* Obtain a reference to a local port for the given sock,
91 * if snum is zero it means select any available local port. 93 * if snum is zero it means select any available local port.
94 * We try to allocate an odd port (and leave even ports for connect())
92 */ 95 */
93int inet_csk_get_port(struct sock *sk, unsigned short snum) 96int inet_csk_get_port(struct sock *sk, unsigned short snum)
94{ 97{
95 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 98 bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
99 struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
100 int ret = 1, attempts = 5, port = snum;
101 int smallest_size = -1, smallest_port;
96 struct inet_bind_hashbucket *head; 102 struct inet_bind_hashbucket *head;
97 struct inet_bind_bucket *tb;
98 int ret, attempts = 5;
99 struct net *net = sock_net(sk); 103 struct net *net = sock_net(sk);
100 int smallest_size = -1, smallest_rover; 104 int i, low, high, attempt_half;
105 struct inet_bind_bucket *tb;
101 kuid_t uid = sock_i_uid(sk); 106 kuid_t uid = sock_i_uid(sk);
102 int attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0; 107 u32 remaining, offset;
103 108
104 local_bh_disable(); 109 if (port) {
105 if (!snum) { 110have_port:
106 int remaining, rover, low, high; 111 head = &hinfo->bhash[inet_bhashfn(net, port,
112 hinfo->bhash_size)];
113 spin_lock_bh(&head->lock);
114 inet_bind_bucket_for_each(tb, &head->chain)
115 if (net_eq(ib_net(tb), net) && tb->port == port)
116 goto tb_found;
107 117
118 goto tb_not_found;
119 }
108again: 120again:
109 inet_get_local_port_range(net, &low, &high); 121 attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
110 if (attempt_half) { 122other_half_scan:
111 int half = low + ((high - low) >> 1); 123 inet_get_local_port_range(net, &low, &high);
112 124 high++; /* [32768, 60999] -> [32768, 61000[ */
113 if (attempt_half == 1) 125 if (high - low < 4)
114 high = half; 126 attempt_half = 0;
115 else 127 if (attempt_half) {
116 low = half; 128 int half = low + (((high - low) >> 2) << 1);
117 } 129
118 remaining = (high - low) + 1; 130 if (attempt_half == 1)
119 smallest_rover = rover = prandom_u32() % remaining + low; 131 high = half;
120 132 else
121 smallest_size = -1; 133 low = half;
122 do { 134 }
123 if (inet_is_local_reserved_port(net, rover)) 135 remaining = high - low;
124 goto next_nolock; 136 if (likely(remaining > 1))
125 head = &hashinfo->bhash[inet_bhashfn(net, rover, 137 remaining &= ~1U;
126 hashinfo->bhash_size)]; 138
127 spin_lock(&head->lock); 139 offset = prandom_u32() % remaining;
128 inet_bind_bucket_for_each(tb, &head->chain) 140 /* __inet_hash_connect() favors ports having @low parity
129 if (net_eq(ib_net(tb), net) && tb->port == rover) { 141 * We do the opposite to not pollute connect() users.
130 if (((tb->fastreuse > 0 && 142 */
131 sk->sk_reuse && 143 offset |= 1U;
132 sk->sk_state != TCP_LISTEN) || 144 smallest_size = -1;
133 (tb->fastreuseport > 0 && 145 smallest_port = low; /* avoid compiler warning */
134 sk->sk_reuseport && 146
135 uid_eq(tb->fastuid, uid))) && 147other_parity_scan:
136 (tb->num_owners < smallest_size || smallest_size == -1)) { 148 port = low + offset;
137 smallest_size = tb->num_owners; 149 for (i = 0; i < remaining; i += 2, port += 2) {
138 smallest_rover = rover; 150 if (unlikely(port >= high))
139 } 151 port -= remaining;
140 if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { 152 if (inet_is_local_reserved_port(net, port))
141 snum = rover; 153 continue;
142 goto tb_found; 154 head = &hinfo->bhash[inet_bhashfn(net, port,
143 } 155 hinfo->bhash_size)];
144 goto next; 156 spin_lock_bh(&head->lock);
157 inet_bind_bucket_for_each(tb, &head->chain)
158 if (net_eq(ib_net(tb), net) && tb->port == port) {
159 if (((tb->fastreuse > 0 && reuse) ||
160 (tb->fastreuseport > 0 &&
161 sk->sk_reuseport &&
162 !rcu_access_pointer(sk->sk_reuseport_cb) &&
163 uid_eq(tb->fastuid, uid))) &&
164 (tb->num_owners < smallest_size || smallest_size == -1)) {
165 smallest_size = tb->num_owners;
166 smallest_port = port;
145 } 167 }
146 break; 168 if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false))
147 next: 169 goto tb_found;
148 spin_unlock(&head->lock); 170 goto next_port;
149 next_nolock:
150 if (++rover > high)
151 rover = low;
152 } while (--remaining > 0);
153
154 /* Exhausted local port range during search? It is not
155 * possible for us to be holding one of the bind hash
156 * locks if this test triggers, because if 'remaining'
157 * drops to zero, we broke out of the do/while loop at
158 * the top level, not from the 'break;' statement.
159 */
160 ret = 1;
161 if (remaining <= 0) {
162 if (smallest_size != -1) {
163 snum = smallest_rover;
164 goto have_snum;
165 }
166 if (attempt_half == 1) {
167 /* OK we now try the upper half of the range */
168 attempt_half = 2;
169 goto again;
170 } 171 }
171 goto fail; 172 goto tb_not_found;
172 } 173next_port:
173 /* OK, here is the one we will use. HEAD is 174 spin_unlock_bh(&head->lock);
174 * non-NULL and we hold it's mutex. 175 cond_resched();
175 */ 176 }
176 snum = rover; 177
177 } else { 178 if (smallest_size != -1) {
178have_snum: 179 port = smallest_port;
179 head = &hashinfo->bhash[inet_bhashfn(net, snum, 180 goto have_port;
180 hashinfo->bhash_size)];
181 spin_lock(&head->lock);
182 inet_bind_bucket_for_each(tb, &head->chain)
183 if (net_eq(ib_net(tb), net) && tb->port == snum)
184 goto tb_found;
185 } 181 }
186 tb = NULL; 182 offset--;
187 goto tb_not_found; 183 if (!(offset & 1))
184 goto other_parity_scan;
185
186 if (attempt_half == 1) {
187 /* OK we now try the upper half of the range */
188 attempt_half = 2;
189 goto other_half_scan;
190 }
191 return ret;
192
193tb_not_found:
194 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
195 net, head, port);
196 if (!tb)
197 goto fail_unlock;
188tb_found: 198tb_found:
189 if (!hlist_empty(&tb->owners)) { 199 if (!hlist_empty(&tb->owners)) {
190 if (sk->sk_reuse == SK_FORCE_REUSE) 200 if (sk->sk_reuse == SK_FORCE_REUSE)
191 goto success; 201 goto success;
192 202
193 if (((tb->fastreuse > 0 && 203 if (((tb->fastreuse > 0 && reuse) ||
194 sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
195 (tb->fastreuseport > 0 && 204 (tb->fastreuseport > 0 &&
205 !rcu_access_pointer(sk->sk_reuseport_cb) &&
196 sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && 206 sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
197 smallest_size == -1) { 207 smallest_size == -1)
198 goto success; 208 goto success;
199 } else { 209 if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
200 ret = 1; 210 if ((reuse ||
201 if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { 211 (tb->fastreuseport > 0 &&
202 if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) || 212 sk->sk_reuseport &&
203 (tb->fastreuseport > 0 && 213 !rcu_access_pointer(sk->sk_reuseport_cb) &&
204 sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && 214 uid_eq(tb->fastuid, uid))) &&
205 smallest_size != -1 && --attempts >= 0) { 215 smallest_size != -1 && --attempts >= 0) {
206 spin_unlock(&head->lock); 216 spin_unlock_bh(&head->lock);
207 goto again; 217 goto again;
208 }
209
210 goto fail_unlock;
211 } 218 }
219 goto fail_unlock;
212 } 220 }
213 } 221 if (!reuse)
214tb_not_found:
215 ret = 1;
216 if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep,
217 net, head, snum)) == NULL)
218 goto fail_unlock;
219 if (hlist_empty(&tb->owners)) {
220 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
221 tb->fastreuse = 1;
222 else
223 tb->fastreuse = 0; 222 tb->fastreuse = 0;
223 if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))
224 tb->fastreuseport = 0;
225 } else {
226 tb->fastreuse = reuse;
224 if (sk->sk_reuseport) { 227 if (sk->sk_reuseport) {
225 tb->fastreuseport = 1; 228 tb->fastreuseport = 1;
226 tb->fastuid = uid; 229 tb->fastuid = uid;
227 } else 230 } else {
228 tb->fastreuseport = 0;
229 } else {
230 if (tb->fastreuse &&
231 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
232 tb->fastreuse = 0;
233 if (tb->fastreuseport &&
234 (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid)))
235 tb->fastreuseport = 0; 231 tb->fastreuseport = 0;
232 }
236 } 233 }
237success: 234success:
238 if (!inet_csk(sk)->icsk_bind_hash) 235 if (!inet_csk(sk)->icsk_bind_hash)
239 inet_bind_hash(sk, tb, snum); 236 inet_bind_hash(sk, tb, port);
240 WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); 237 WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
241 ret = 0; 238 ret = 0;
242 239
243fail_unlock: 240fail_unlock:
244 spin_unlock(&head->lock); 241 spin_unlock_bh(&head->lock);
245fail:
246 local_bh_enable();
247 return ret; 242 return ret;
248} 243}
249EXPORT_SYMBOL_GPL(inet_csk_get_port); 244EXPORT_SYMBOL_GPL(inet_csk_get_port);
@@ -482,10 +477,6 @@ EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
482#define AF_INET_FAMILY(fam) true 477#define AF_INET_FAMILY(fam) true
483#endif 478#endif
484 479
485/* Only thing we need from tcp.h */
486extern int sysctl_tcp_synack_retries;
487
488
489/* Decide when to expire the request and when to resend SYN-ACK */ 480/* Decide when to expire the request and when to resend SYN-ACK */
490static inline void syn_ack_recalc(struct request_sock *req, const int thresh, 481static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
491 const int max_retries, 482 const int max_retries,
@@ -557,6 +548,7 @@ static void reqsk_timer_handler(unsigned long data)
557{ 548{
558 struct request_sock *req = (struct request_sock *)data; 549 struct request_sock *req = (struct request_sock *)data;
559 struct sock *sk_listener = req->rsk_listener; 550 struct sock *sk_listener = req->rsk_listener;
551 struct net *net = sock_net(sk_listener);
560 struct inet_connection_sock *icsk = inet_csk(sk_listener); 552 struct inet_connection_sock *icsk = inet_csk(sk_listener);
561 struct request_sock_queue *queue = &icsk->icsk_accept_queue; 553 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
562 int qlen, expire = 0, resend = 0; 554 int qlen, expire = 0, resend = 0;
@@ -566,7 +558,7 @@ static void reqsk_timer_handler(unsigned long data)
566 if (sk_state_load(sk_listener) != TCP_LISTEN) 558 if (sk_state_load(sk_listener) != TCP_LISTEN)
567 goto drop; 559 goto drop;
568 560
569 max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 561 max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
570 thresh = max_retries; 562 thresh = max_retries;
571 /* Normally all the openreqs are young and become mature 563 /* Normally all the openreqs are young and become mature
572 * (i.e. converted to established socket) for first timeout. 564 * (i.e. converted to established socket) for first timeout.
@@ -737,6 +729,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
737{ 729{
738 struct inet_connection_sock *icsk = inet_csk(sk); 730 struct inet_connection_sock *icsk = inet_csk(sk);
739 struct inet_sock *inet = inet_sk(sk); 731 struct inet_sock *inet = inet_sk(sk);
732 int err = -EADDRINUSE;
740 733
741 reqsk_queue_alloc(&icsk->icsk_accept_queue); 734 reqsk_queue_alloc(&icsk->icsk_accept_queue);
742 735
@@ -754,13 +747,14 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
754 inet->inet_sport = htons(inet->inet_num); 747 inet->inet_sport = htons(inet->inet_num);
755 748
756 sk_dst_reset(sk); 749 sk_dst_reset(sk);
757 sk->sk_prot->hash(sk); 750 err = sk->sk_prot->hash(sk);
758 751
759 return 0; 752 if (likely(!err))
753 return 0;
760 } 754 }
761 755
762 sk->sk_state = TCP_CLOSE; 756 sk->sk_state = TCP_CLOSE;
763 return -EADDRINUSE; 757 return err;
764} 758}
765EXPORT_SYMBOL_GPL(inet_csk_listen_start); 759EXPORT_SYMBOL_GPL(inet_csk_listen_start);
766 760
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 6029157a19ed..5fdb02f5598e 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -357,18 +357,18 @@ struct sock *inet_diag_find_one_icsk(struct net *net,
357 struct sock *sk; 357 struct sock *sk;
358 358
359 if (req->sdiag_family == AF_INET) 359 if (req->sdiag_family == AF_INET)
360 sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0], 360 sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0],
361 req->id.idiag_dport, req->id.idiag_src[0], 361 req->id.idiag_dport, req->id.idiag_src[0],
362 req->id.idiag_sport, req->id.idiag_if); 362 req->id.idiag_sport, req->id.idiag_if);
363#if IS_ENABLED(CONFIG_IPV6) 363#if IS_ENABLED(CONFIG_IPV6)
364 else if (req->sdiag_family == AF_INET6) { 364 else if (req->sdiag_family == AF_INET6) {
365 if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && 365 if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
366 ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) 366 ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
367 sk = inet_lookup(net, hashinfo, req->id.idiag_dst[3], 367 sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[3],
368 req->id.idiag_dport, req->id.idiag_src[3], 368 req->id.idiag_dport, req->id.idiag_src[3],
369 req->id.idiag_sport, req->id.idiag_if); 369 req->id.idiag_sport, req->id.idiag_if);
370 else 370 else
371 sk = inet6_lookup(net, hashinfo, 371 sk = inet6_lookup(net, hashinfo, NULL, 0,
372 (struct in6_addr *)req->id.idiag_dst, 372 (struct in6_addr *)req->id.idiag_dst,
373 req->id.idiag_dport, 373 req->id.idiag_dport,
374 (struct in6_addr *)req->id.idiag_src, 374 (struct in6_addr *)req->id.idiag_src,
@@ -879,6 +879,7 @@ next_normal:
879 } 879 }
880 880
881 spin_unlock_bh(lock); 881 spin_unlock_bh(lock);
882 cond_resched();
882 } 883 }
883 884
884done: 885done:
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index ccc5980797fc..0d9e9d7bb029 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -20,10 +20,12 @@
20#include <linux/wait.h> 20#include <linux/wait.h>
21#include <linux/vmalloc.h> 21#include <linux/vmalloc.h>
22 22
23#include <net/addrconf.h>
23#include <net/inet_connection_sock.h> 24#include <net/inet_connection_sock.h>
24#include <net/inet_hashtables.h> 25#include <net/inet_hashtables.h>
25#include <net/secure_seq.h> 26#include <net/secure_seq.h>
26#include <net/ip.h> 27#include <net/ip.h>
28#include <net/sock_reuseport.h>
27 29
28static u32 inet_ehashfn(const struct net *net, const __be32 laddr, 30static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
29 const __u16 lport, const __be32 faddr, 31 const __u16 lport, const __be32 faddr,
@@ -205,6 +207,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
205 207
206struct sock *__inet_lookup_listener(struct net *net, 208struct sock *__inet_lookup_listener(struct net *net,
207 struct inet_hashinfo *hashinfo, 209 struct inet_hashinfo *hashinfo,
210 struct sk_buff *skb, int doff,
208 const __be32 saddr, __be16 sport, 211 const __be32 saddr, __be16 sport,
209 const __be32 daddr, const unsigned short hnum, 212 const __be32 daddr, const unsigned short hnum,
210 const int dif) 213 const int dif)
@@ -214,6 +217,7 @@ struct sock *__inet_lookup_listener(struct net *net,
214 unsigned int hash = inet_lhashfn(net, hnum); 217 unsigned int hash = inet_lhashfn(net, hnum);
215 struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; 218 struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
216 int score, hiscore, matches = 0, reuseport = 0; 219 int score, hiscore, matches = 0, reuseport = 0;
220 bool select_ok = true;
217 u32 phash = 0; 221 u32 phash = 0;
218 222
219 rcu_read_lock(); 223 rcu_read_lock();
@@ -229,6 +233,15 @@ begin:
229 if (reuseport) { 233 if (reuseport) {
230 phash = inet_ehashfn(net, daddr, hnum, 234 phash = inet_ehashfn(net, daddr, hnum,
231 saddr, sport); 235 saddr, sport);
236 if (select_ok) {
237 struct sock *sk2;
238 sk2 = reuseport_select_sock(sk, phash,
239 skb, doff);
240 if (sk2) {
241 result = sk2;
242 goto found;
243 }
244 }
232 matches = 1; 245 matches = 1;
233 } 246 }
234 } else if (score == hiscore && reuseport) { 247 } else if (score == hiscore && reuseport) {
@@ -246,11 +259,13 @@ begin:
246 if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) 259 if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
247 goto begin; 260 goto begin;
248 if (result) { 261 if (result) {
262found:
249 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) 263 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
250 result = NULL; 264 result = NULL;
251 else if (unlikely(compute_score(result, net, hnum, daddr, 265 else if (unlikely(compute_score(result, net, hnum, daddr,
252 dif) < hiscore)) { 266 dif) < hiscore)) {
253 sock_put(result); 267 sock_put(result);
268 select_ok = false;
254 goto begin; 269 goto begin;
255 } 270 }
256 } 271 }
@@ -449,32 +464,76 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
449} 464}
450EXPORT_SYMBOL_GPL(inet_ehash_nolisten); 465EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
451 466
452void __inet_hash(struct sock *sk, struct sock *osk) 467static int inet_reuseport_add_sock(struct sock *sk,
468 struct inet_listen_hashbucket *ilb,
469 int (*saddr_same)(const struct sock *sk1,
470 const struct sock *sk2,
471 bool match_wildcard))
472{
473 struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
474 struct sock *sk2;
475 struct hlist_nulls_node *node;
476 kuid_t uid = sock_i_uid(sk);
477
478 sk_nulls_for_each_rcu(sk2, node, &ilb->head) {
479 if (sk2 != sk &&
480 sk2->sk_family == sk->sk_family &&
481 ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
482 sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
483 inet_csk(sk2)->icsk_bind_hash == tb &&
484 sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
485 saddr_same(sk, sk2, false))
486 return reuseport_add_sock(sk, sk2);
487 }
488
489 /* Initial allocation may have already happened via setsockopt */
490 if (!rcu_access_pointer(sk->sk_reuseport_cb))
491 return reuseport_alloc(sk);
492 return 0;
493}
494
495int __inet_hash(struct sock *sk, struct sock *osk,
496 int (*saddr_same)(const struct sock *sk1,
497 const struct sock *sk2,
498 bool match_wildcard))
453{ 499{
454 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 500 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
455 struct inet_listen_hashbucket *ilb; 501 struct inet_listen_hashbucket *ilb;
502 int err = 0;
456 503
457 if (sk->sk_state != TCP_LISTEN) { 504 if (sk->sk_state != TCP_LISTEN) {
458 inet_ehash_nolisten(sk, osk); 505 inet_ehash_nolisten(sk, osk);
459 return; 506 return 0;
460 } 507 }
461 WARN_ON(!sk_unhashed(sk)); 508 WARN_ON(!sk_unhashed(sk));
462 ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; 509 ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
463 510
464 spin_lock(&ilb->lock); 511 spin_lock(&ilb->lock);
512 if (sk->sk_reuseport) {
513 err = inet_reuseport_add_sock(sk, ilb, saddr_same);
514 if (err)
515 goto unlock;
516 }
465 __sk_nulls_add_node_rcu(sk, &ilb->head); 517 __sk_nulls_add_node_rcu(sk, &ilb->head);
466 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 518 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
519unlock:
467 spin_unlock(&ilb->lock); 520 spin_unlock(&ilb->lock);
521
522 return err;
468} 523}
469EXPORT_SYMBOL(__inet_hash); 524EXPORT_SYMBOL(__inet_hash);
470 525
471void inet_hash(struct sock *sk) 526int inet_hash(struct sock *sk)
472{ 527{
528 int err = 0;
529
473 if (sk->sk_state != TCP_CLOSE) { 530 if (sk->sk_state != TCP_CLOSE) {
474 local_bh_disable(); 531 local_bh_disable();
475 __inet_hash(sk, NULL); 532 err = __inet_hash(sk, NULL, ipv4_rcv_saddr_equal);
476 local_bh_enable(); 533 local_bh_enable();
477 } 534 }
535
536 return err;
478} 537}
479EXPORT_SYMBOL_GPL(inet_hash); 538EXPORT_SYMBOL_GPL(inet_hash);
480 539
@@ -493,6 +552,8 @@ void inet_unhash(struct sock *sk)
493 lock = inet_ehash_lockp(hashinfo, sk->sk_hash); 552 lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
494 553
495 spin_lock_bh(lock); 554 spin_lock_bh(lock);
555 if (rcu_access_pointer(sk->sk_reuseport_cb))
556 reuseport_detach_sock(sk);
496 done = __sk_nulls_del_node_init_rcu(sk); 557 done = __sk_nulls_del_node_init_rcu(sk);
497 if (done) 558 if (done)
498 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 559 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
@@ -506,106 +567,106 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
506 struct sock *, __u16, struct inet_timewait_sock **)) 567 struct sock *, __u16, struct inet_timewait_sock **))
507{ 568{
508 struct inet_hashinfo *hinfo = death_row->hashinfo; 569 struct inet_hashinfo *hinfo = death_row->hashinfo;
509 const unsigned short snum = inet_sk(sk)->inet_num; 570 struct inet_timewait_sock *tw = NULL;
510 struct inet_bind_hashbucket *head; 571 struct inet_bind_hashbucket *head;
511 struct inet_bind_bucket *tb; 572 int port = inet_sk(sk)->inet_num;
512 int ret;
513 struct net *net = sock_net(sk); 573 struct net *net = sock_net(sk);
574 struct inet_bind_bucket *tb;
575 u32 remaining, offset;
576 int ret, i, low, high;
577 static u32 hint;
578
579 if (port) {
580 head = &hinfo->bhash[inet_bhashfn(net, port,
581 hinfo->bhash_size)];
582 tb = inet_csk(sk)->icsk_bind_hash;
583 spin_lock_bh(&head->lock);
584 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
585 inet_ehash_nolisten(sk, NULL);
586 spin_unlock_bh(&head->lock);
587 return 0;
588 }
589 spin_unlock(&head->lock);
590 /* No definite answer... Walk to established hash table */
591 ret = check_established(death_row, sk, port, NULL);
592 local_bh_enable();
593 return ret;
594 }
514 595
515 if (!snum) { 596 inet_get_local_port_range(net, &low, &high);
516 int i, remaining, low, high, port; 597 high++; /* [32768, 60999] -> [32768, 61000[ */
517 static u32 hint; 598 remaining = high - low;
518 u32 offset = hint + port_offset; 599 if (likely(remaining > 1))
519 struct inet_timewait_sock *tw = NULL; 600 remaining &= ~1U;
520 601
521 inet_get_local_port_range(net, &low, &high); 602 offset = (hint + port_offset) % remaining;
522 remaining = (high - low) + 1; 603 /* In first pass we try ports of @low parity.
604 * inet_csk_get_port() does the opposite choice.
605 */
606 offset &= ~1U;
607other_parity_scan:
608 port = low + offset;
609 for (i = 0; i < remaining; i += 2, port += 2) {
610 if (unlikely(port >= high))
611 port -= remaining;
612 if (inet_is_local_reserved_port(net, port))
613 continue;
614 head = &hinfo->bhash[inet_bhashfn(net, port,
615 hinfo->bhash_size)];
616 spin_lock_bh(&head->lock);
523 617
524 /* By starting with offset being an even number, 618 /* Does not bother with rcv_saddr checks, because
525 * we tend to leave about 50% of ports for other uses, 619 * the established check is already unique enough.
526 * like bind(0).
527 */ 620 */
528 offset &= ~1; 621 inet_bind_bucket_for_each(tb, &head->chain) {
529 622 if (net_eq(ib_net(tb), net) && tb->port == port) {
530 local_bh_disable(); 623 if (tb->fastreuse >= 0 ||
531 for (i = 0; i < remaining; i++) { 624 tb->fastreuseport >= 0)
532 port = low + (i + offset) % remaining;
533 if (inet_is_local_reserved_port(net, port))
534 continue;
535 head = &hinfo->bhash[inet_bhashfn(net, port,
536 hinfo->bhash_size)];
537 spin_lock(&head->lock);
538
539 /* Does not bother with rcv_saddr checks,
540 * because the established check is already
541 * unique enough.
542 */
543 inet_bind_bucket_for_each(tb, &head->chain) {
544 if (net_eq(ib_net(tb), net) &&
545 tb->port == port) {
546 if (tb->fastreuse >= 0 ||
547 tb->fastreuseport >= 0)
548 goto next_port;
549 WARN_ON(hlist_empty(&tb->owners));
550 if (!check_established(death_row, sk,
551 port, &tw))
552 goto ok;
553 goto next_port; 625 goto next_port;
554 } 626 WARN_ON(hlist_empty(&tb->owners));
627 if (!check_established(death_row, sk,
628 port, &tw))
629 goto ok;
630 goto next_port;
555 } 631 }
556
557 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
558 net, head, port);
559 if (!tb) {
560 spin_unlock(&head->lock);
561 break;
562 }
563 tb->fastreuse = -1;
564 tb->fastreuseport = -1;
565 goto ok;
566
567 next_port:
568 spin_unlock(&head->lock);
569 } 632 }
570 local_bh_enable();
571
572 return -EADDRNOTAVAIL;
573 633
574ok: 634 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
575 hint += (i + 2) & ~1; 635 net, head, port);
576 636 if (!tb) {
577 /* Head lock still held and bh's disabled */ 637 spin_unlock_bh(&head->lock);
578 inet_bind_hash(sk, tb, port); 638 return -ENOMEM;
579 if (sk_unhashed(sk)) {
580 inet_sk(sk)->inet_sport = htons(port);
581 inet_ehash_nolisten(sk, (struct sock *)tw);
582 } 639 }
583 if (tw) 640 tb->fastreuse = -1;
584 inet_twsk_bind_unhash(tw, hinfo); 641 tb->fastreuseport = -1;
585 spin_unlock(&head->lock); 642 goto ok;
643next_port:
644 spin_unlock_bh(&head->lock);
645 cond_resched();
646 }
586 647
587 if (tw) 648 offset++;
588 inet_twsk_deschedule_put(tw); 649 if ((offset & 1) && remaining > 1)
650 goto other_parity_scan;
589 651
590 ret = 0; 652 return -EADDRNOTAVAIL;
591 goto out;
592 }
593 653
594 head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)]; 654ok:
595 tb = inet_csk(sk)->icsk_bind_hash; 655 hint += i + 2;
596 spin_lock_bh(&head->lock); 656
597 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { 657 /* Head lock still held and bh's disabled */
598 inet_ehash_nolisten(sk, NULL); 658 inet_bind_hash(sk, tb, port);
599 spin_unlock_bh(&head->lock); 659 if (sk_unhashed(sk)) {
600 return 0; 660 inet_sk(sk)->inet_sport = htons(port);
601 } else { 661 inet_ehash_nolisten(sk, (struct sock *)tw);
602 spin_unlock(&head->lock);
603 /* No definite answer... Walk to established hash table */
604 ret = check_established(death_row, sk, snum, NULL);
605out:
606 local_bh_enable();
607 return ret;
608 } 662 }
663 if (tw)
664 inet_twsk_bind_unhash(tw, hinfo);
665 spin_unlock(&head->lock);
666 if (tw)
667 inet_twsk_deschedule_put(tw);
668 local_bh_enable();
669 return 0;
609} 670}
610 671
611/* 672/*
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
deleted file mode 100644
index f17ea49b28fb..000000000000
--- a/net/ipv4/inet_lro.c
+++ /dev/null
@@ -1,374 +0,0 @@
1/*
2 * linux/net/ipv4/inet_lro.c
3 *
4 * Large Receive Offload (ipv4 / tcp)
5 *
6 * (C) Copyright IBM Corp. 2007
7 *
8 * Authors:
9 * Jan-Bernd Themann <themann@de.ibm.com>
10 * Christoph Raisch <raisch@de.ibm.com>
11 *
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2, or (at your option)
16 * any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 */
27
28
29#include <linux/module.h>
30#include <linux/if_vlan.h>
31#include <linux/inet_lro.h>
32#include <net/checksum.h>
33
34MODULE_LICENSE("GPL");
35MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
36MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
37
38#define TCP_HDR_LEN(tcph) (tcph->doff << 2)
39#define IP_HDR_LEN(iph) (iph->ihl << 2)
40#define TCP_PAYLOAD_LENGTH(iph, tcph) \
41 (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
42
43#define IPH_LEN_WO_OPTIONS 5
44#define TCPH_LEN_WO_OPTIONS 5
45#define TCPH_LEN_W_TIMESTAMP 8
46
47#define LRO_MAX_PG_HLEN 64
48
49#define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
50
51/*
52 * Basic tcp checks whether packet is suitable for LRO
53 */
54
55static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
56 int len, const struct net_lro_desc *lro_desc)
57{
58 /* check ip header: don't aggregate padded frames */
59 if (ntohs(iph->tot_len) != len)
60 return -1;
61
62 if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0)
63 return -1;
64
65 if (iph->ihl != IPH_LEN_WO_OPTIONS)
66 return -1;
67
68 if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack ||
69 tcph->rst || tcph->syn || tcph->fin)
70 return -1;
71
72 if (INET_ECN_is_ce(ipv4_get_dsfield(iph)))
73 return -1;
74
75 if (tcph->doff != TCPH_LEN_WO_OPTIONS &&
76 tcph->doff != TCPH_LEN_W_TIMESTAMP)
77 return -1;
78
79 /* check tcp options (only timestamp allowed) */
80 if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
81 __be32 *topt = (__be32 *)(tcph + 1);
82
83 if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
84 | (TCPOPT_TIMESTAMP << 8)
85 | TCPOLEN_TIMESTAMP))
86 return -1;
87
88 /* timestamp should be in right order */
89 topt++;
90 if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval),
91 ntohl(*topt)))
92 return -1;
93
94 /* timestamp reply should not be zero */
95 topt++;
96 if (*topt == 0)
97 return -1;
98 }
99
100 return 0;
101}
102
103static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
104{
105 struct iphdr *iph = lro_desc->iph;
106 struct tcphdr *tcph = lro_desc->tcph;
107 __be32 *p;
108 __wsum tcp_hdr_csum;
109
110 tcph->ack_seq = lro_desc->tcp_ack;
111 tcph->window = lro_desc->tcp_window;
112
113 if (lro_desc->tcp_saw_tstamp) {
114 p = (__be32 *)(tcph + 1);
115 *(p+2) = lro_desc->tcp_rcv_tsecr;
116 }
117
118 csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len));
119 iph->tot_len = htons(lro_desc->ip_tot_len);
120
121 tcph->check = 0;
122 tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
123 lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
124 tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
125 lro_desc->ip_tot_len -
126 IP_HDR_LEN(iph), IPPROTO_TCP,
127 lro_desc->data_csum);
128}
129
130static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
131{
132 __wsum tcp_csum;
133 __wsum tcp_hdr_csum;
134 __wsum tcp_ps_hdr_csum;
135
136 tcp_csum = ~csum_unfold(tcph->check);
137 tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
138
139 tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
140 len + TCP_HDR_LEN(tcph),
141 IPPROTO_TCP, 0);
142
143 return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum),
144 tcp_ps_hdr_csum);
145}
146
147static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
148 struct iphdr *iph, struct tcphdr *tcph)
149{
150 int nr_frags;
151 __be32 *ptr;
152 u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
153
154 nr_frags = skb_shinfo(skb)->nr_frags;
155 lro_desc->parent = skb;
156 lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
157 lro_desc->iph = iph;
158 lro_desc->tcph = tcph;
159 lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
160 lro_desc->tcp_ack = tcph->ack_seq;
161 lro_desc->tcp_window = tcph->window;
162
163 lro_desc->pkt_aggr_cnt = 1;
164 lro_desc->ip_tot_len = ntohs(iph->tot_len);
165
166 if (tcph->doff == 8) {
167 ptr = (__be32 *)(tcph+1);
168 lro_desc->tcp_saw_tstamp = 1;
169 lro_desc->tcp_rcv_tsval = *(ptr+1);
170 lro_desc->tcp_rcv_tsecr = *(ptr+2);
171 }
172
173 lro_desc->mss = tcp_data_len;
174 lro_desc->active = 1;
175
176 lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
177 tcp_data_len);
178}
179
180static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
181{
182 memset(lro_desc, 0, sizeof(struct net_lro_desc));
183}
184
185static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
186 struct tcphdr *tcph, int tcp_data_len)
187{
188 struct sk_buff *parent = lro_desc->parent;
189 __be32 *topt;
190
191 lro_desc->pkt_aggr_cnt++;
192 lro_desc->ip_tot_len += tcp_data_len;
193 lro_desc->tcp_next_seq += tcp_data_len;
194 lro_desc->tcp_window = tcph->window;
195 lro_desc->tcp_ack = tcph->ack_seq;
196
197 /* don't update tcp_rcv_tsval, would not work with PAWS */
198 if (lro_desc->tcp_saw_tstamp) {
199 topt = (__be32 *) (tcph + 1);
200 lro_desc->tcp_rcv_tsecr = *(topt + 2);
201 }
202
203 lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
204 lro_tcp_data_csum(iph, tcph,
205 tcp_data_len),
206 parent->len);
207
208 parent->len += tcp_data_len;
209 parent->data_len += tcp_data_len;
210 if (tcp_data_len > lro_desc->mss)
211 lro_desc->mss = tcp_data_len;
212}
213
214static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
215 struct iphdr *iph, struct tcphdr *tcph)
216{
217 struct sk_buff *parent = lro_desc->parent;
218 int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
219
220 lro_add_common(lro_desc, iph, tcph, tcp_data_len);
221
222 skb_pull(skb, (skb->len - tcp_data_len));
223 parent->truesize += skb->truesize;
224
225 if (lro_desc->last_skb)
226 lro_desc->last_skb->next = skb;
227 else
228 skb_shinfo(parent)->frag_list = skb;
229
230 lro_desc->last_skb = skb;
231}
232
233
234static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
235 struct iphdr *iph,
236 struct tcphdr *tcph)
237{
238 if ((lro_desc->iph->saddr != iph->saddr) ||
239 (lro_desc->iph->daddr != iph->daddr) ||
240 (lro_desc->tcph->source != tcph->source) ||
241 (lro_desc->tcph->dest != tcph->dest))
242 return -1;
243 return 0;
244}
245
246static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr,
247 struct net_lro_desc *lro_arr,
248 struct iphdr *iph,
249 struct tcphdr *tcph)
250{
251 struct net_lro_desc *lro_desc = NULL;
252 struct net_lro_desc *tmp;
253 int max_desc = lro_mgr->max_desc;
254 int i;
255
256 for (i = 0; i < max_desc; i++) {
257 tmp = &lro_arr[i];
258 if (tmp->active)
259 if (!lro_check_tcp_conn(tmp, iph, tcph)) {
260 lro_desc = tmp;
261 goto out;
262 }
263 }
264
265 for (i = 0; i < max_desc; i++) {
266 if (!lro_arr[i].active) {
267 lro_desc = &lro_arr[i];
268 goto out;
269 }
270 }
271
272 LRO_INC_STATS(lro_mgr, no_desc);
273out:
274 return lro_desc;
275}
276
277static void lro_flush(struct net_lro_mgr *lro_mgr,
278 struct net_lro_desc *lro_desc)
279{
280 if (lro_desc->pkt_aggr_cnt > 1)
281 lro_update_tcp_ip_header(lro_desc);
282
283 skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
284
285 if (lro_mgr->features & LRO_F_NAPI)
286 netif_receive_skb(lro_desc->parent);
287 else
288 netif_rx(lro_desc->parent);
289
290 LRO_INC_STATS(lro_mgr, flushed);
291 lro_clear_desc(lro_desc);
292}
293
294static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
295 void *priv)
296{
297 struct net_lro_desc *lro_desc;
298 struct iphdr *iph;
299 struct tcphdr *tcph;
300 u64 flags;
301 int vlan_hdr_len = 0;
302
303 if (!lro_mgr->get_skb_header ||
304 lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph,
305 &flags, priv))
306 goto out;
307
308 if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
309 goto out;
310
311 lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
312 if (!lro_desc)
313 goto out;
314
315 if ((skb->protocol == htons(ETH_P_8021Q)) &&
316 !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
317 vlan_hdr_len = VLAN_HLEN;
318
319 if (!lro_desc->active) { /* start new lro session */
320 if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
321 goto out;
322
323 skb->ip_summed = lro_mgr->ip_summed_aggr;
324 lro_init_desc(lro_desc, skb, iph, tcph);
325 LRO_INC_STATS(lro_mgr, aggregated);
326 return 0;
327 }
328
329 if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
330 goto out2;
331
332 if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
333 goto out2;
334
335 lro_add_packet(lro_desc, skb, iph, tcph);
336 LRO_INC_STATS(lro_mgr, aggregated);
337
338 if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
339 lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
340 lro_flush(lro_mgr, lro_desc);
341
342 return 0;
343
344out2: /* send aggregated SKBs to stack */
345 lro_flush(lro_mgr, lro_desc);
346
347out:
348 return 1;
349}
350
351void lro_receive_skb(struct net_lro_mgr *lro_mgr,
352 struct sk_buff *skb,
353 void *priv)
354{
355 if (__lro_proc_skb(lro_mgr, skb, priv)) {
356 if (lro_mgr->features & LRO_F_NAPI)
357 netif_receive_skb(skb);
358 else
359 netif_rx(skb);
360 }
361}
362EXPORT_SYMBOL(lro_receive_skb);
363
364void lro_flush_all(struct net_lro_mgr *lro_mgr)
365{
366 int i;
367 struct net_lro_desc *lro_desc = lro_mgr->lro_arr;
368
369 for (i = 0; i < lro_mgr->max_desc; i++) {
370 if (lro_desc[i].active)
371 lro_flush(lro_mgr, &lro_desc[i]);
372 }
373}
374EXPORT_SYMBOL(lro_flush_all);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index da0d7ce85844..af18f1e4889e 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -71,7 +71,6 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s
71 if (unlikely(opt->optlen)) 71 if (unlikely(opt->optlen))
72 ip_forward_options(skb); 72 ip_forward_options(skb);
73 73
74 skb_sender_cpu_clear(skb);
75 return dst_output(net, sk, skb); 74 return dst_output(net, sk, skb);
76} 75}
77 76
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 187c6fcc3027..efbd47d1a531 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -54,8 +54,6 @@
54 * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c 54 * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
55 * as well. Or notify me, at least. --ANK 55 * as well. Or notify me, at least. --ANK
56 */ 56 */
57
58static int sysctl_ipfrag_max_dist __read_mostly = 64;
59static const char ip_frag_cache_name[] = "ip4-frags"; 57static const char ip_frag_cache_name[] = "ip4-frags";
60 58
61struct ipfrag_skb_cb 59struct ipfrag_skb_cb
@@ -150,7 +148,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
150 qp->daddr = arg->iph->daddr; 148 qp->daddr = arg->iph->daddr;
151 qp->vif = arg->vif; 149 qp->vif = arg->vif;
152 qp->user = arg->user; 150 qp->user = arg->user;
153 qp->peer = sysctl_ipfrag_max_dist ? 151 qp->peer = q->net->max_dist ?
154 inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) : 152 inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) :
155 NULL; 153 NULL;
156} 154}
@@ -275,7 +273,7 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph,
275static int ip_frag_too_far(struct ipq *qp) 273static int ip_frag_too_far(struct ipq *qp)
276{ 274{
277 struct inet_peer *peer = qp->peer; 275 struct inet_peer *peer = qp->peer;
278 unsigned int max = sysctl_ipfrag_max_dist; 276 unsigned int max = qp->q.net->max_dist;
279 unsigned int start, end; 277 unsigned int start, end;
280 278
281 int rc; 279 int rc;
@@ -749,6 +747,14 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
749 .mode = 0644, 747 .mode = 0644,
750 .proc_handler = proc_dointvec_jiffies, 748 .proc_handler = proc_dointvec_jiffies,
751 }, 749 },
750 {
751 .procname = "ipfrag_max_dist",
752 .data = &init_net.ipv4.frags.max_dist,
753 .maxlen = sizeof(int),
754 .mode = 0644,
755 .proc_handler = proc_dointvec_minmax,
756 .extra1 = &zero
757 },
752 { } 758 { }
753}; 759};
754 760
@@ -762,14 +768,6 @@ static struct ctl_table ip4_frags_ctl_table[] = {
762 .mode = 0644, 768 .mode = 0644,
763 .proc_handler = proc_dointvec_jiffies, 769 .proc_handler = proc_dointvec_jiffies,
764 }, 770 },
765 {
766 .procname = "ipfrag_max_dist",
767 .data = &sysctl_ipfrag_max_dist,
768 .maxlen = sizeof(int),
769 .mode = 0644,
770 .proc_handler = proc_dointvec_minmax,
771 .extra1 = &zero
772 },
773 { } 771 { }
774}; 772};
775 773
@@ -790,10 +788,7 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
790 table[1].data = &net->ipv4.frags.low_thresh; 788 table[1].data = &net->ipv4.frags.low_thresh;
791 table[1].extra2 = &net->ipv4.frags.high_thresh; 789 table[1].extra2 = &net->ipv4.frags.high_thresh;
792 table[2].data = &net->ipv4.frags.timeout; 790 table[2].data = &net->ipv4.frags.timeout;
793 791 table[3].data = &net->ipv4.frags.max_dist;
794 /* Don't export sysctls to unprivileged users */
795 if (net->user_ns != &init_user_ns)
796 table[0].procname = NULL;
797 } 792 }
798 793
799 hdr = register_net_sysctl(net, "net/ipv4", table); 794 hdr = register_net_sysctl(net, "net/ipv4", table);
@@ -865,6 +860,8 @@ static int __net_init ipv4_frags_init_net(struct net *net)
865 */ 860 */
866 net->ipv4.frags.timeout = IP_FRAG_TIME; 861 net->ipv4.frags.timeout = IP_FRAG_TIME;
867 862
863 net->ipv4.frags.max_dist = 64;
864
868 res = inet_frags_init_net(&net->ipv4.frags); 865 res = inet_frags_init_net(&net->ipv4.frags);
869 if (res) 866 if (res)
870 return res; 867 return res;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 41ba68de46d8..4cc84212cce1 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -179,6 +179,7 @@ static __be16 tnl_flags_to_gre_flags(__be16 tflags)
179 return flags; 179 return flags;
180} 180}
181 181
182/* Fills in tpi and returns header length to be pulled. */
182static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, 183static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
183 bool *csum_err) 184 bool *csum_err)
184{ 185{
@@ -238,7 +239,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
238 return -EINVAL; 239 return -EINVAL;
239 } 240 }
240 } 241 }
241 return iptunnel_pull_header(skb, hdr_len, tpi->proto); 242 return hdr_len;
242} 243}
243 244
244static void ipgre_err(struct sk_buff *skb, u32 info, 245static void ipgre_err(struct sk_buff *skb, u32 info,
@@ -341,7 +342,7 @@ static void gre_err(struct sk_buff *skb, u32 info)
341 struct tnl_ptk_info tpi; 342 struct tnl_ptk_info tpi;
342 bool csum_err = false; 343 bool csum_err = false;
343 344
344 if (parse_gre_header(skb, &tpi, &csum_err)) { 345 if (parse_gre_header(skb, &tpi, &csum_err) < 0) {
345 if (!csum_err) /* ignore csum errors. */ 346 if (!csum_err) /* ignore csum errors. */
346 return; 347 return;
347 } 348 }
@@ -397,7 +398,10 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
397 iph->saddr, iph->daddr, tpi->key); 398 iph->saddr, iph->daddr, tpi->key);
398 399
399 if (tunnel) { 400 if (tunnel) {
400 skb_pop_mac_header(skb); 401 if (tunnel->dev->type != ARPHRD_NONE)
402 skb_pop_mac_header(skb);
403 else
404 skb_reset_mac_header(skb);
401 if (tunnel->collect_md) { 405 if (tunnel->collect_md) {
402 __be16 flags; 406 __be16 flags;
403 __be64 tun_id; 407 __be64 tun_id;
@@ -419,6 +423,7 @@ static int gre_rcv(struct sk_buff *skb)
419{ 423{
420 struct tnl_ptk_info tpi; 424 struct tnl_ptk_info tpi;
421 bool csum_err = false; 425 bool csum_err = false;
426 int hdr_len;
422 427
423#ifdef CONFIG_NET_IPGRE_BROADCAST 428#ifdef CONFIG_NET_IPGRE_BROADCAST
424 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { 429 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
@@ -428,7 +433,10 @@ static int gre_rcv(struct sk_buff *skb)
428 } 433 }
429#endif 434#endif
430 435
431 if (parse_gre_header(skb, &tpi, &csum_err) < 0) 436 hdr_len = parse_gre_header(skb, &tpi, &csum_err);
437 if (hdr_len < 0)
438 goto drop;
439 if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false) < 0)
432 goto drop; 440 goto drop;
433 441
434 if (ipgre_rcv(skb, &tpi) == PACKET_RCVD) 442 if (ipgre_rcv(skb, &tpi) == PACKET_RCVD)
@@ -440,6 +448,17 @@ drop:
440 return 0; 448 return 0;
441} 449}
442 450
451static __sum16 gre_checksum(struct sk_buff *skb)
452{
453 __wsum csum;
454
455 if (skb->ip_summed == CHECKSUM_PARTIAL)
456 csum = lco_csum(skb);
457 else
458 csum = skb_checksum(skb, 0, skb->len, 0);
459 return csum_fold(csum);
460}
461
443static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags, 462static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
444 __be16 proto, __be32 key, __be32 seq) 463 __be16 proto, __be32 key, __be32 seq)
445{ 464{
@@ -467,8 +486,7 @@ static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
467 !(skb_shinfo(skb)->gso_type & 486 !(skb_shinfo(skb)->gso_type &
468 (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) { 487 (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
469 *ptr = 0; 488 *ptr = 0;
470 *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, 489 *(__sum16 *)ptr = gre_checksum(skb);
471 skb->len, 0));
472 } 490 }
473 } 491 }
474} 492}
@@ -493,8 +511,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
493static struct sk_buff *gre_handle_offloads(struct sk_buff *skb, 511static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
494 bool csum) 512 bool csum)
495{ 513{
496 return iptunnel_handle_offloads(skb, csum, 514 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
497 csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
498} 515}
499 516
500static struct rtable *gre_get_rt(struct sk_buff *skb, 517static struct rtable *gre_get_rt(struct sk_buff *skb,
@@ -514,15 +531,17 @@ static struct rtable *gre_get_rt(struct sk_buff *skb,
514 return ip_route_output_key(net, fl); 531 return ip_route_output_key(net, fl);
515} 532}
516 533
517static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) 534static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
535 __be16 proto)
518{ 536{
519 struct ip_tunnel_info *tun_info; 537 struct ip_tunnel_info *tun_info;
520 const struct ip_tunnel_key *key; 538 const struct ip_tunnel_key *key;
539 struct rtable *rt = NULL;
521 struct flowi4 fl; 540 struct flowi4 fl;
522 struct rtable *rt;
523 int min_headroom; 541 int min_headroom;
524 int tunnel_hlen; 542 int tunnel_hlen;
525 __be16 df, flags; 543 __be16 df, flags;
544 bool use_cache;
526 int err; 545 int err;
527 546
528 tun_info = skb_tunnel_info(skb); 547 tun_info = skb_tunnel_info(skb);
@@ -531,9 +550,17 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
531 goto err_free_skb; 550 goto err_free_skb;
532 551
533 key = &tun_info->key; 552 key = &tun_info->key;
534 rt = gre_get_rt(skb, dev, &fl, key); 553 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
535 if (IS_ERR(rt)) 554 if (use_cache)
536 goto err_free_skb; 555 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
556 if (!rt) {
557 rt = gre_get_rt(skb, dev, &fl, key);
558 if (IS_ERR(rt))
559 goto err_free_skb;
560 if (use_cache)
561 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
562 fl.saddr);
563 }
537 564
538 tunnel_hlen = ip_gre_calc_hlen(key->tun_flags); 565 tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
539 566
@@ -557,7 +584,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
557 } 584 }
558 585
559 flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); 586 flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
560 build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB), 587 build_header(skb, tunnel_hlen, flags, proto,
561 tunnel_id_to_key(tun_info->key.tun_id), 0); 588 tunnel_id_to_key(tun_info->key.tun_id), 0);
562 589
563 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; 590 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
@@ -598,7 +625,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
598 const struct iphdr *tnl_params; 625 const struct iphdr *tnl_params;
599 626
600 if (tunnel->collect_md) { 627 if (tunnel->collect_md) {
601 gre_fb_xmit(skb, dev); 628 gre_fb_xmit(skb, dev, skb->protocol);
602 return NETDEV_TX_OK; 629 return NETDEV_TX_OK;
603 } 630 }
604 631
@@ -642,7 +669,7 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
642 struct ip_tunnel *tunnel = netdev_priv(dev); 669 struct ip_tunnel *tunnel = netdev_priv(dev);
643 670
644 if (tunnel->collect_md) { 671 if (tunnel->collect_md) {
645 gre_fb_xmit(skb, dev); 672 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
646 return NETDEV_TX_OK; 673 return NETDEV_TX_OK;
647 } 674 }
648 675
@@ -844,9 +871,16 @@ static void __gre_tunnel_init(struct net_device *dev)
844 dev->hw_features |= GRE_FEATURES; 871 dev->hw_features |= GRE_FEATURES;
845 872
846 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { 873 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
847 /* TCP offload with GRE SEQ is not supported. */ 874 /* TCP offload with GRE SEQ is not supported, nor
848 dev->features |= NETIF_F_GSO_SOFTWARE; 875 * can we support 2 levels of outer headers requiring
849 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 876 * an update.
877 */
878 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
879 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
880 dev->features |= NETIF_F_GSO_SOFTWARE;
881 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
882 }
883
850 /* Can use a lockless transmit, unless we generate 884 /* Can use a lockless transmit, unless we generate
851 * output sequences 885 * output sequences
852 */ 886 */
@@ -868,7 +902,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
868 netif_keep_dst(dev); 902 netif_keep_dst(dev);
869 dev->addr_len = 4; 903 dev->addr_len = 4;
870 904
871 if (iph->daddr) { 905 if (iph->daddr && !tunnel->collect_md) {
872#ifdef CONFIG_NET_IPGRE_BROADCAST 906#ifdef CONFIG_NET_IPGRE_BROADCAST
873 if (ipv4_is_multicast(iph->daddr)) { 907 if (ipv4_is_multicast(iph->daddr)) {
874 if (!iph->saddr) 908 if (!iph->saddr)
@@ -877,8 +911,9 @@ static int ipgre_tunnel_init(struct net_device *dev)
877 dev->header_ops = &ipgre_header_ops; 911 dev->header_ops = &ipgre_header_ops;
878 } 912 }
879#endif 913#endif
880 } else 914 } else if (!tunnel->collect_md) {
881 dev->header_ops = &ipgre_header_ops; 915 dev->header_ops = &ipgre_header_ops;
916 }
882 917
883 return ip_tunnel_init(dev); 918 return ip_tunnel_init(dev);
884} 919}
@@ -921,6 +956,11 @@ static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
921 if (flags & (GRE_VERSION|GRE_ROUTING)) 956 if (flags & (GRE_VERSION|GRE_ROUTING))
922 return -EINVAL; 957 return -EINVAL;
923 958
959 if (data[IFLA_GRE_COLLECT_METADATA] &&
960 data[IFLA_GRE_ENCAP_TYPE] &&
961 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
962 return -EINVAL;
963
924 return 0; 964 return 0;
925} 965}
926 966
@@ -994,6 +1034,8 @@ static void ipgre_netlink_parms(struct net_device *dev,
994 struct ip_tunnel *t = netdev_priv(dev); 1034 struct ip_tunnel *t = netdev_priv(dev);
995 1035
996 t->collect_md = true; 1036 t->collect_md = true;
1037 if (dev->type == ARPHRD_IPGRE)
1038 dev->type = ARPHRD_NONE;
997 } 1039 }
998} 1040}
999 1041
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d77eb0c3b684..e3d782746d9d 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -308,15 +308,12 @@ drop:
308 return true; 308 return true;
309} 309}
310 310
311int sysctl_ip_early_demux __read_mostly = 1;
312EXPORT_SYMBOL(sysctl_ip_early_demux);
313
314static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 311static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
315{ 312{
316 const struct iphdr *iph = ip_hdr(skb); 313 const struct iphdr *iph = ip_hdr(skb);
317 struct rtable *rt; 314 struct rtable *rt;
318 315
319 if (sysctl_ip_early_demux && 316 if (net->ipv4.sysctl_ip_early_demux &&
320 !skb_dst(skb) && 317 !skb_dst(skb) &&
321 !skb->sk && 318 !skb->sk &&
322 !ip_is_fragment(iph)) { 319 !ip_is_fragment(iph)) {
@@ -362,8 +359,31 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
362 rt = skb_rtable(skb); 359 rt = skb_rtable(skb);
363 if (rt->rt_type == RTN_MULTICAST) { 360 if (rt->rt_type == RTN_MULTICAST) {
364 IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INMCAST, skb->len); 361 IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INMCAST, skb->len);
365 } else if (rt->rt_type == RTN_BROADCAST) 362 } else if (rt->rt_type == RTN_BROADCAST) {
366 IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INBCAST, skb->len); 363 IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INBCAST, skb->len);
364 } else if (skb->pkt_type == PACKET_BROADCAST ||
365 skb->pkt_type == PACKET_MULTICAST) {
366 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
367
368 /* RFC 1122 3.3.6:
369 *
370 * When a host sends a datagram to a link-layer broadcast
371 * address, the IP destination address MUST be a legal IP
372 * broadcast or IP multicast address.
373 *
374 * A host SHOULD silently discard a datagram that is received
375 * via a link-layer broadcast (see Section 2.4) but does not
376 * specify an IP multicast or broadcast destination address.
377 *
378 * This doesn't explicitly say L2 *broadcast*, but broadcast is
379 * in a way a form of multicast and the most common use case for
380 * this is 802.11 protecting against cross-station spoofing (the
381 * so-called "hole-196" attack) so do it for both.
382 */
383 if (in_dev &&
384 IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST))
385 goto drop;
386 }
367 387
368 return dst_input(skb); 388 return dst_input(skb);
369 389
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index bd246792360b..4d158ff1def1 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -58,10 +58,9 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
58 if (opt->ts_needaddr) 58 if (opt->ts_needaddr)
59 ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, skb, rt); 59 ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, skb, rt);
60 if (opt->ts_needtime) { 60 if (opt->ts_needtime) {
61 struct timespec tv;
62 __be32 midtime; 61 __be32 midtime;
63 getnstimeofday(&tv); 62
64 midtime = htonl((tv.tv_sec % 86400) * MSEC_PER_SEC + tv.tv_nsec / NSEC_PER_MSEC); 63 midtime = inet_current_timestamp();
65 memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4); 64 memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4);
66 } 65 }
67 return; 66 return;
@@ -415,11 +414,10 @@ int ip_options_compile(struct net *net,
415 break; 414 break;
416 } 415 }
417 if (timeptr) { 416 if (timeptr) {
418 struct timespec tv; 417 __be32 midtime;
419 u32 midtime; 418
420 getnstimeofday(&tv); 419 midtime = inet_current_timestamp();
421 midtime = (tv.tv_sec % 86400) * MSEC_PER_SEC + tv.tv_nsec / NSEC_PER_MSEC; 420 memcpy(timeptr, &midtime, 4);
422 put_unaligned_be32(midtime, timeptr);
423 opt->is_changed = 1; 421 opt->is_changed = 1;
424 } 422 }
425 } else if ((optptr[3]&0xF) != IPOPT_TS_PRESPEC) { 423 } else if ((optptr[3]&0xF) != IPOPT_TS_PRESPEC) {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 565bf64b2b7d..124bf0a66328 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -79,9 +79,6 @@
79#include <linux/netlink.h> 79#include <linux/netlink.h>
80#include <linux/tcp.h> 80#include <linux/tcp.h>
81 81
82int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
83EXPORT_SYMBOL(sysctl_ip_default_ttl);
84
85static int 82static int
86ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 83ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
87 unsigned int mtu, 84 unsigned int mtu,
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index a50124260f5a..035ad645a8d9 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -573,6 +573,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
573 int optname, char __user *optval, unsigned int optlen) 573 int optname, char __user *optval, unsigned int optlen)
574{ 574{
575 struct inet_sock *inet = inet_sk(sk); 575 struct inet_sock *inet = inet_sk(sk);
576 struct net *net = sock_net(sk);
576 int val = 0, err; 577 int val = 0, err;
577 bool needs_rtnl = setsockopt_needs_rtnl(optname); 578 bool needs_rtnl = setsockopt_needs_rtnl(optname);
578 579
@@ -912,7 +913,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
912 } 913 }
913 /* numsrc >= (1G-4) overflow in 32 bits */ 914 /* numsrc >= (1G-4) overflow in 32 bits */
914 if (msf->imsf_numsrc >= 0x3ffffffcU || 915 if (msf->imsf_numsrc >= 0x3ffffffcU ||
915 msf->imsf_numsrc > sysctl_igmp_max_msf) { 916 msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
916 kfree(msf); 917 kfree(msf);
917 err = -ENOBUFS; 918 err = -ENOBUFS;
918 break; 919 break;
@@ -1067,7 +1068,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
1067 1068
1068 /* numsrc >= (4G-140)/128 overflow in 32 bits */ 1069 /* numsrc >= (4G-140)/128 overflow in 32 bits */
1069 if (gsf->gf_numsrc >= 0x1ffffff || 1070 if (gsf->gf_numsrc >= 0x1ffffff ||
1070 gsf->gf_numsrc > sysctl_igmp_max_msf) { 1071 gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
1071 err = -ENOBUFS; 1072 err = -ENOBUFS;
1072 goto mc_msf_out; 1073 goto mc_msf_out;
1073 } 1074 }
@@ -1342,10 +1343,13 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1342 val = inet->tos; 1343 val = inet->tos;
1343 break; 1344 break;
1344 case IP_TTL: 1345 case IP_TTL:
1346 {
1347 struct net *net = sock_net(sk);
1345 val = (inet->uc_ttl == -1 ? 1348 val = (inet->uc_ttl == -1 ?
1346 sysctl_ip_default_ttl : 1349 net->ipv4.sysctl_ip_default_ttl :
1347 inet->uc_ttl); 1350 inet->uc_ttl);
1348 break; 1351 break;
1352 }
1349 case IP_HDRINCL: 1353 case IP_HDRINCL:
1350 val = inet->hdrincl; 1354 val = inet->hdrincl;
1351 break; 1355 break;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 336e6892a93c..a69ed94bda1b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -68,61 +68,6 @@ static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
68 IP_TNL_HASH_BITS); 68 IP_TNL_HASH_BITS);
69} 69}
70 70
71static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72 struct dst_entry *dst, __be32 saddr)
73{
74 struct dst_entry *old_dst;
75
76 dst_clone(dst);
77 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
78 dst_release(old_dst);
79 idst->saddr = saddr;
80}
81
82static noinline void tunnel_dst_set(struct ip_tunnel *t,
83 struct dst_entry *dst, __be32 saddr)
84{
85 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
86}
87
88static void tunnel_dst_reset(struct ip_tunnel *t)
89{
90 tunnel_dst_set(t, NULL, 0);
91}
92
93void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
94{
95 int i;
96
97 for_each_possible_cpu(i)
98 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
99}
100EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
101
102static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
103 u32 cookie, __be32 *saddr)
104{
105 struct ip_tunnel_dst *idst;
106 struct dst_entry *dst;
107
108 rcu_read_lock();
109 idst = raw_cpu_ptr(t->dst_cache);
110 dst = rcu_dereference(idst->dst);
111 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
112 dst = NULL;
113 if (dst) {
114 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
115 *saddr = idst->saddr;
116 } else {
117 tunnel_dst_reset(t);
118 dst_release(dst);
119 dst = NULL;
120 }
121 }
122 rcu_read_unlock();
123 return (struct rtable *)dst;
124}
125
126static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, 71static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
127 __be16 flags, __be32 key) 72 __be16 flags, __be32 key)
128{ 73{
@@ -381,11 +326,12 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
381 326
382 if (!IS_ERR(rt)) { 327 if (!IS_ERR(rt)) {
383 tdev = rt->dst.dev; 328 tdev = rt->dst.dev;
384 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
385 ip_rt_put(rt); 329 ip_rt_put(rt);
386 } 330 }
387 if (dev->type != ARPHRD_ETHER) 331 if (dev->type != ARPHRD_ETHER)
388 dev->flags |= IFF_POINTOPOINT; 332 dev->flags |= IFF_POINTOPOINT;
333
334 dst_cache_reset(&tunnel->dst_cache);
389 } 335 }
390 336
391 if (!tdev && tunnel->parms.link) 337 if (!tdev && tunnel->parms.link)
@@ -731,7 +677,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
731 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) 677 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
732 goto tx_error; 678 goto tx_error;
733 679
734 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL; 680 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
681 NULL;
735 682
736 if (!rt) { 683 if (!rt) {
737 rt = ip_route_output_key(tunnel->net, &fl4); 684 rt = ip_route_output_key(tunnel->net, &fl4);
@@ -741,7 +688,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
741 goto tx_error; 688 goto tx_error;
742 } 689 }
743 if (connected) 690 if (connected)
744 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); 691 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
692 fl4.saddr);
745 } 693 }
746 694
747 if (rt->dst.dev == dev) { 695 if (rt->dst.dev == dev) {
@@ -837,7 +785,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
837 if (set_mtu) 785 if (set_mtu)
838 dev->mtu = mtu; 786 dev->mtu = mtu;
839 } 787 }
840 ip_tunnel_dst_reset_all(t); 788 dst_cache_reset(&t->dst_cache);
841 netdev_state_change(dev); 789 netdev_state_change(dev);
842} 790}
843 791
@@ -976,7 +924,7 @@ static void ip_tunnel_dev_free(struct net_device *dev)
976 struct ip_tunnel *tunnel = netdev_priv(dev); 924 struct ip_tunnel *tunnel = netdev_priv(dev);
977 925
978 gro_cells_destroy(&tunnel->gro_cells); 926 gro_cells_destroy(&tunnel->gro_cells);
979 free_percpu(tunnel->dst_cache); 927 dst_cache_destroy(&tunnel->dst_cache);
980 free_percpu(dev->tstats); 928 free_percpu(dev->tstats);
981 free_netdev(dev); 929 free_netdev(dev);
982} 930}
@@ -1170,15 +1118,15 @@ int ip_tunnel_init(struct net_device *dev)
1170 if (!dev->tstats) 1118 if (!dev->tstats)
1171 return -ENOMEM; 1119 return -ENOMEM;
1172 1120
1173 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); 1121 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1174 if (!tunnel->dst_cache) { 1122 if (err) {
1175 free_percpu(dev->tstats); 1123 free_percpu(dev->tstats);
1176 return -ENOMEM; 1124 return err;
1177 } 1125 }
1178 1126
1179 err = gro_cells_init(&tunnel->gro_cells, dev); 1127 err = gro_cells_init(&tunnel->gro_cells, dev);
1180 if (err) { 1128 if (err) {
1181 free_percpu(tunnel->dst_cache); 1129 dst_cache_destroy(&tunnel->dst_cache);
1182 free_percpu(dev->tstats); 1130 free_percpu(dev->tstats);
1183 return err; 1131 return err;
1184 } 1132 }
@@ -1208,7 +1156,7 @@ void ip_tunnel_uninit(struct net_device *dev)
1208 if (itn->fb_tunnel_dev != dev) 1156 if (itn->fb_tunnel_dev != dev)
1209 ip_tunnel_del(itn, netdev_priv(dev)); 1157 ip_tunnel_del(itn, netdev_priv(dev));
1210 1158
1211 ip_tunnel_dst_reset_all(tunnel); 1159 dst_cache_reset(&tunnel->dst_cache);
1212} 1160}
1213EXPORT_SYMBOL_GPL(ip_tunnel_uninit); 1161EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1214 1162
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 859d415c0b2d..6165f30c4d72 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -86,7 +86,8 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
86} 86}
87EXPORT_SYMBOL_GPL(iptunnel_xmit); 87EXPORT_SYMBOL_GPL(iptunnel_xmit);
88 88
89int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) 89int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto,
90 bool xnet)
90{ 91{
91 if (unlikely(!pskb_may_pull(skb, hdr_len))) 92 if (unlikely(!pskb_may_pull(skb, hdr_len)))
92 return -ENOMEM; 93 return -ENOMEM;
@@ -109,14 +110,12 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
109 skb->protocol = inner_proto; 110 skb->protocol = inner_proto;
110 } 111 }
111 112
112 nf_reset(skb);
113 secpath_reset(skb);
114 skb_clear_hash_if_not_l4(skb); 113 skb_clear_hash_if_not_l4(skb);
115 skb_dst_drop(skb);
116 skb->vlan_tci = 0; 114 skb->vlan_tci = 0;
117 skb_set_queue_mapping(skb, 0); 115 skb_set_queue_mapping(skb, 0);
118 skb->pkt_type = PACKET_HOST; 116 skb_scrub_packet(skb, xnet);
119 return 0; 117
118 return iptunnel_pull_offloads(skb);
120} 119}
121EXPORT_SYMBOL_GPL(iptunnel_pull_header); 120EXPORT_SYMBOL_GPL(iptunnel_pull_header);
122 121
@@ -148,7 +147,6 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
148EXPORT_SYMBOL_GPL(iptunnel_metadata_reply); 147EXPORT_SYMBOL_GPL(iptunnel_metadata_reply);
149 148
150struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, 149struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
151 bool csum_help,
152 int gso_type_mask) 150 int gso_type_mask)
153{ 151{
154 int err; 152 int err;
@@ -166,20 +164,15 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
166 return skb; 164 return skb;
167 } 165 }
168 166
169 /* If packet is not gso and we are resolving any partial checksum, 167 if (skb->ip_summed != CHECKSUM_PARTIAL) {
170 * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL
171 * on the outer header without confusing devices that implement
172 * NETIF_F_IP_CSUM with encapsulation.
173 */
174 if (csum_help)
175 skb->encapsulation = 0;
176
177 if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) {
178 err = skb_checksum_help(skb);
179 if (unlikely(err))
180 goto error;
181 } else if (skb->ip_summed != CHECKSUM_PARTIAL)
182 skb->ip_summed = CHECKSUM_NONE; 168 skb->ip_summed = CHECKSUM_NONE;
169 /* We clear encapsulation here to prevent badly-written
170 * drivers potentially deciding to offload an inner checksum
171 * if we set CHECKSUM_PARTIAL on the outer header.
172 * This should go away when the drivers are all fixed.
173 */
174 skb->encapsulation = 0;
175 }
183 176
184 return skb; 177 return skb;
185error: 178error:
@@ -379,8 +372,8 @@ static int ip6_tun_fill_encap_info(struct sk_buff *skb,
379 if (nla_put_be64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id) || 372 if (nla_put_be64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id) ||
380 nla_put_in6_addr(skb, LWTUNNEL_IP6_DST, &tun_info->key.u.ipv6.dst) || 373 nla_put_in6_addr(skb, LWTUNNEL_IP6_DST, &tun_info->key.u.ipv6.dst) ||
381 nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) || 374 nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) ||
382 nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.tos) || 375 nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.tos) ||
383 nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.ttl) || 376 nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.ttl) ||
384 nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags)) 377 nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags))
385 return -ENOMEM; 378 return -ENOMEM;
386 379
@@ -406,6 +399,12 @@ static const struct lwtunnel_encap_ops ip6_tun_lwt_ops = {
406 399
407void __init ip_tunnel_core_init(void) 400void __init ip_tunnel_core_init(void)
408{ 401{
402 /* If you land here, make sure whether increasing ip_tunnel_info's
403 * options_len is a reasonable choice with its usage in front ends
404 * (f.e., it's part of flow keys, etc).
405 */
406 BUILD_BUG_ON(IP_TUNNEL_OPTS_MAX != 255);
407
409 lwtunnel_encap_add_ops(&ip_tun_lwt_ops, LWTUNNEL_ENCAP_IP); 408 lwtunnel_encap_add_ops(&ip_tun_lwt_ops, LWTUNNEL_ENCAP_IP);
410 lwtunnel_encap_add_ops(&ip6_tun_lwt_ops, LWTUNNEL_ENCAP_IP6); 409 lwtunnel_encap_add_ops(&ip6_tun_lwt_ops, LWTUNNEL_ENCAP_IP6);
411} 410}
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 5cf10b777b7e..a917903d5e97 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -156,6 +156,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
156 struct dst_entry *dst = skb_dst(skb); 156 struct dst_entry *dst = skb_dst(skb);
157 struct net_device *tdev; /* Device to other host */ 157 struct net_device *tdev; /* Device to other host */
158 int err; 158 int err;
159 int mtu;
159 160
160 if (!dst) { 161 if (!dst) {
161 dev->stats.tx_carrier_errors++; 162 dev->stats.tx_carrier_errors++;
@@ -192,6 +193,23 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
192 tunnel->err_count = 0; 193 tunnel->err_count = 0;
193 } 194 }
194 195
196 mtu = dst_mtu(dst);
197 if (skb->len > mtu) {
198 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
199 if (skb->protocol == htons(ETH_P_IP)) {
200 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
201 htonl(mtu));
202 } else {
203 if (mtu < IPV6_MIN_MTU)
204 mtu = IPV6_MIN_MTU;
205
206 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
207 }
208
209 dst_release(dst);
210 goto tx_error;
211 }
212
195 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); 213 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
196 skb_dst_set(skb, dst); 214 skb_dst_set(skb, dst);
197 skb->dev = skb_dst(skb)->dev; 215 skb->dev = skb_dst(skb)->dev;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 4044da61e747..ec51d02166de 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -195,7 +195,7 @@ static int ipip_rcv(struct sk_buff *skb)
195 if (tunnel) { 195 if (tunnel) {
196 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 196 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
197 goto drop; 197 goto drop;
198 if (iptunnel_pull_header(skb, 0, tpi.proto)) 198 if (iptunnel_pull_header(skb, 0, tpi.proto, false))
199 goto drop; 199 goto drop;
200 return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error); 200 return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
201 } 201 }
@@ -219,7 +219,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
219 if (unlikely(skb->protocol != htons(ETH_P_IP))) 219 if (unlikely(skb->protocol != htons(ETH_P_IP)))
220 goto tx_error; 220 goto tx_error;
221 221
222 skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP); 222 skb = iptunnel_handle_offloads(skb, SKB_GSO_IPIP);
223 if (IS_ERR(skb)) 223 if (IS_ERR(skb))
224 goto out; 224 goto out;
225 225
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index b488cac9c5ca..4133b0f513af 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -359,11 +359,12 @@ unsigned int arpt_do_table(struct sk_buff *skb,
359} 359}
360 360
361/* All zeroes == unconditional rule. */ 361/* All zeroes == unconditional rule. */
362static inline bool unconditional(const struct arpt_arp *arp) 362static inline bool unconditional(const struct arpt_entry *e)
363{ 363{
364 static const struct arpt_arp uncond; 364 static const struct arpt_arp uncond;
365 365
366 return memcmp(arp, &uncond, sizeof(uncond)) == 0; 366 return e->target_offset == sizeof(struct arpt_entry) &&
367 memcmp(&e->arp, &uncond, sizeof(uncond)) == 0;
367} 368}
368 369
369/* Figures out from what hook each rule can be called: returns 0 if 370/* Figures out from what hook each rule can be called: returns 0 if
@@ -402,11 +403,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
402 |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); 403 |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS));
403 404
404 /* Unconditional return/END. */ 405 /* Unconditional return/END. */
405 if ((e->target_offset == sizeof(struct arpt_entry) && 406 if ((unconditional(e) &&
406 (strcmp(t->target.u.user.name, 407 (strcmp(t->target.u.user.name,
407 XT_STANDARD_TARGET) == 0) && 408 XT_STANDARD_TARGET) == 0) &&
408 t->verdict < 0 && unconditional(&e->arp)) || 409 t->verdict < 0) || visited) {
409 visited) {
410 unsigned int oldpos, size; 410 unsigned int oldpos, size;
411 411
412 if ((strcmp(t->target.u.user.name, 412 if ((strcmp(t->target.u.user.name,
@@ -474,14 +474,12 @@ next:
474 return 1; 474 return 1;
475} 475}
476 476
477static inline int check_entry(const struct arpt_entry *e, const char *name) 477static inline int check_entry(const struct arpt_entry *e)
478{ 478{
479 const struct xt_entry_target *t; 479 const struct xt_entry_target *t;
480 480
481 if (!arp_checkentry(&e->arp)) { 481 if (!arp_checkentry(&e->arp))
482 duprintf("arp_tables: arp check failed %p %s.\n", e, name);
483 return -EINVAL; 482 return -EINVAL;
484 }
485 483
486 if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) 484 if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset)
487 return -EINVAL; 485 return -EINVAL;
@@ -522,10 +520,6 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
522 struct xt_target *target; 520 struct xt_target *target;
523 int ret; 521 int ret;
524 522
525 ret = check_entry(e, name);
526 if (ret)
527 return ret;
528
529 e->counters.pcnt = xt_percpu_counter_alloc(); 523 e->counters.pcnt = xt_percpu_counter_alloc();
530 if (IS_ERR_VALUE(e->counters.pcnt)) 524 if (IS_ERR_VALUE(e->counters.pcnt))
531 return -ENOMEM; 525 return -ENOMEM;
@@ -557,7 +551,7 @@ static bool check_underflow(const struct arpt_entry *e)
557 const struct xt_entry_target *t; 551 const struct xt_entry_target *t;
558 unsigned int verdict; 552 unsigned int verdict;
559 553
560 if (!unconditional(&e->arp)) 554 if (!unconditional(e))
561 return false; 555 return false;
562 t = arpt_get_target_c(e); 556 t = arpt_get_target_c(e);
563 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) 557 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
@@ -576,9 +570,11 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
576 unsigned int valid_hooks) 570 unsigned int valid_hooks)
577{ 571{
578 unsigned int h; 572 unsigned int h;
573 int err;
579 574
580 if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || 575 if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 ||
581 (unsigned char *)e + sizeof(struct arpt_entry) >= limit) { 576 (unsigned char *)e + sizeof(struct arpt_entry) >= limit ||
577 (unsigned char *)e + e->next_offset > limit) {
582 duprintf("Bad offset %p\n", e); 578 duprintf("Bad offset %p\n", e);
583 return -EINVAL; 579 return -EINVAL;
584 } 580 }
@@ -590,6 +586,10 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
590 return -EINVAL; 586 return -EINVAL;
591 } 587 }
592 588
589 err = check_entry(e);
590 if (err)
591 return err;
592
593 /* Check hooks & underflows */ 593 /* Check hooks & underflows */
594 for (h = 0; h < NF_ARP_NUMHOOKS; h++) { 594 for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
595 if (!(valid_hooks & (1 << h))) 595 if (!(valid_hooks & (1 << h)))
@@ -598,9 +598,9 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
598 newinfo->hook_entry[h] = hook_entries[h]; 598 newinfo->hook_entry[h] = hook_entries[h];
599 if ((unsigned char *)e - base == underflows[h]) { 599 if ((unsigned char *)e - base == underflows[h]) {
600 if (!check_underflow(e)) { 600 if (!check_underflow(e)) {
601 pr_err("Underflows must be unconditional and " 601 pr_debug("Underflows must be unconditional and "
602 "use the STANDARD target with " 602 "use the STANDARD target with "
603 "ACCEPT/DROP\n"); 603 "ACCEPT/DROP\n");
604 return -EINVAL; 604 return -EINVAL;
605 } 605 }
606 newinfo->underflow[h] = underflows[h]; 606 newinfo->underflow[h] = underflows[h];
@@ -969,6 +969,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
969 sizeof(struct arpt_get_entries) + get.size); 969 sizeof(struct arpt_get_entries) + get.size);
970 return -EINVAL; 970 return -EINVAL;
971 } 971 }
972 get.name[sizeof(get.name) - 1] = '\0';
972 973
973 t = xt_find_table_lock(net, NFPROTO_ARP, get.name); 974 t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
974 if (!IS_ERR_OR_NULL(t)) { 975 if (!IS_ERR_OR_NULL(t)) {
@@ -1233,7 +1234,8 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
1233 1234
1234 duprintf("check_compat_entry_size_and_hooks %p\n", e); 1235 duprintf("check_compat_entry_size_and_hooks %p\n", e);
1235 if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || 1236 if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 ||
1236 (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) { 1237 (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit ||
1238 (unsigned char *)e + e->next_offset > limit) {
1237 duprintf("Bad offset %p, limit = %p\n", e, limit); 1239 duprintf("Bad offset %p, limit = %p\n", e, limit);
1238 return -EINVAL; 1240 return -EINVAL;
1239 } 1241 }
@@ -1246,7 +1248,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
1246 } 1248 }
1247 1249
1248 /* For purposes of check_entry casting the compat entry is fine */ 1250 /* For purposes of check_entry casting the compat entry is fine */
1249 ret = check_entry((struct arpt_entry *)e, name); 1251 ret = check_entry((struct arpt_entry *)e);
1250 if (ret) 1252 if (ret)
1251 return ret; 1253 return ret;
1252 1254
@@ -1662,6 +1664,7 @@ static int compat_get_entries(struct net *net,
1662 *len, sizeof(get) + get.size); 1664 *len, sizeof(get) + get.size);
1663 return -EINVAL; 1665 return -EINVAL;
1664 } 1666 }
1667 get.name[sizeof(get.name) - 1] = '\0';
1665 1668
1666 xt_compat_lock(NFPROTO_ARP); 1669 xt_compat_lock(NFPROTO_ARP);
1667 t = xt_find_table_lock(net, NFPROTO_ARP, get.name); 1670 t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
@@ -1780,9 +1783,29 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
1780 return ret; 1783 return ret;
1781} 1784}
1782 1785
1783struct xt_table *arpt_register_table(struct net *net, 1786static void __arpt_unregister_table(struct xt_table *table)
1784 const struct xt_table *table, 1787{
1785 const struct arpt_replace *repl) 1788 struct xt_table_info *private;
1789 void *loc_cpu_entry;
1790 struct module *table_owner = table->me;
1791 struct arpt_entry *iter;
1792
1793 private = xt_unregister_table(table);
1794
1795 /* Decrease module usage counts and free resources */
1796 loc_cpu_entry = private->entries;
1797 xt_entry_foreach(iter, loc_cpu_entry, private->size)
1798 cleanup_entry(iter);
1799 if (private->number > private->initial_entries)
1800 module_put(table_owner);
1801 xt_free_table_info(private);
1802}
1803
1804int arpt_register_table(struct net *net,
1805 const struct xt_table *table,
1806 const struct arpt_replace *repl,
1807 const struct nf_hook_ops *ops,
1808 struct xt_table **res)
1786{ 1809{
1787 int ret; 1810 int ret;
1788 struct xt_table_info *newinfo; 1811 struct xt_table_info *newinfo;
@@ -1791,10 +1814,8 @@ struct xt_table *arpt_register_table(struct net *net,
1791 struct xt_table *new_table; 1814 struct xt_table *new_table;
1792 1815
1793 newinfo = xt_alloc_table_info(repl->size); 1816 newinfo = xt_alloc_table_info(repl->size);
1794 if (!newinfo) { 1817 if (!newinfo)
1795 ret = -ENOMEM; 1818 return -ENOMEM;
1796 goto out;
1797 }
1798 1819
1799 loc_cpu_entry = newinfo->entries; 1820 loc_cpu_entry = newinfo->entries;
1800 memcpy(loc_cpu_entry, repl->entries, repl->size); 1821 memcpy(loc_cpu_entry, repl->entries, repl->size);
@@ -1809,30 +1830,28 @@ struct xt_table *arpt_register_table(struct net *net,
1809 ret = PTR_ERR(new_table); 1830 ret = PTR_ERR(new_table);
1810 goto out_free; 1831 goto out_free;
1811 } 1832 }
1812 return new_table; 1833
1834 /* set res now, will see skbs right after nf_register_net_hooks */
1835 WRITE_ONCE(*res, new_table);
1836
1837 ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
1838 if (ret != 0) {
1839 __arpt_unregister_table(new_table);
1840 *res = NULL;
1841 }
1842
1843 return ret;
1813 1844
1814out_free: 1845out_free:
1815 xt_free_table_info(newinfo); 1846 xt_free_table_info(newinfo);
1816out: 1847 return ret;
1817 return ERR_PTR(ret);
1818} 1848}
1819 1849
1820void arpt_unregister_table(struct xt_table *table) 1850void arpt_unregister_table(struct net *net, struct xt_table *table,
1851 const struct nf_hook_ops *ops)
1821{ 1852{
1822 struct xt_table_info *private; 1853 nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
1823 void *loc_cpu_entry; 1854 __arpt_unregister_table(table);
1824 struct module *table_owner = table->me;
1825 struct arpt_entry *iter;
1826
1827 private = xt_unregister_table(table);
1828
1829 /* Decrease module usage counts and free resources */
1830 loc_cpu_entry = private->entries;
1831 xt_entry_foreach(iter, loc_cpu_entry, private->size)
1832 cleanup_entry(iter);
1833 if (private->number > private->initial_entries)
1834 module_put(table_owner);
1835 xt_free_table_info(private);
1836} 1855}
1837 1856
1838/* The built-in targets: standard (NULL) and error. */ 1857/* The built-in targets: standard (NULL) and error. */
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 1897ee160920..8f8713b4388f 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -17,12 +17,15 @@ MODULE_DESCRIPTION("arptables filter table");
17#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ 17#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
18 (1 << NF_ARP_FORWARD)) 18 (1 << NF_ARP_FORWARD))
19 19
20static int __net_init arptable_filter_table_init(struct net *net);
21
20static const struct xt_table packet_filter = { 22static const struct xt_table packet_filter = {
21 .name = "filter", 23 .name = "filter",
22 .valid_hooks = FILTER_VALID_HOOKS, 24 .valid_hooks = FILTER_VALID_HOOKS,
23 .me = THIS_MODULE, 25 .me = THIS_MODULE,
24 .af = NFPROTO_ARP, 26 .af = NFPROTO_ARP,
25 .priority = NF_IP_PRI_FILTER, 27 .priority = NF_IP_PRI_FILTER,
28 .table_init = arptable_filter_table_init,
26}; 29};
27 30
28/* The work comes in here from netfilter.c */ 31/* The work comes in here from netfilter.c */
@@ -35,26 +38,32 @@ arptable_filter_hook(void *priv, struct sk_buff *skb,
35 38
36static struct nf_hook_ops *arpfilter_ops __read_mostly; 39static struct nf_hook_ops *arpfilter_ops __read_mostly;
37 40
38static int __net_init arptable_filter_net_init(struct net *net) 41static int __net_init arptable_filter_table_init(struct net *net)
39{ 42{
40 struct arpt_replace *repl; 43 struct arpt_replace *repl;
41 44 int err;
45
46 if (net->ipv4.arptable_filter)
47 return 0;
48
42 repl = arpt_alloc_initial_table(&packet_filter); 49 repl = arpt_alloc_initial_table(&packet_filter);
43 if (repl == NULL) 50 if (repl == NULL)
44 return -ENOMEM; 51 return -ENOMEM;
45 net->ipv4.arptable_filter = 52 err = arpt_register_table(net, &packet_filter, repl, arpfilter_ops,
46 arpt_register_table(net, &packet_filter, repl); 53 &net->ipv4.arptable_filter);
47 kfree(repl); 54 kfree(repl);
48 return PTR_ERR_OR_ZERO(net->ipv4.arptable_filter); 55 return err;
49} 56}
50 57
51static void __net_exit arptable_filter_net_exit(struct net *net) 58static void __net_exit arptable_filter_net_exit(struct net *net)
52{ 59{
53 arpt_unregister_table(net->ipv4.arptable_filter); 60 if (!net->ipv4.arptable_filter)
61 return;
62 arpt_unregister_table(net, net->ipv4.arptable_filter, arpfilter_ops);
63 net->ipv4.arptable_filter = NULL;
54} 64}
55 65
56static struct pernet_operations arptable_filter_net_ops = { 66static struct pernet_operations arptable_filter_net_ops = {
57 .init = arptable_filter_net_init,
58 .exit = arptable_filter_net_exit, 67 .exit = arptable_filter_net_exit,
59}; 68};
60 69
@@ -62,26 +71,29 @@ static int __init arptable_filter_init(void)
62{ 71{
63 int ret; 72 int ret;
64 73
74 arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook);
75 if (IS_ERR(arpfilter_ops))
76 return PTR_ERR(arpfilter_ops);
77
65 ret = register_pernet_subsys(&arptable_filter_net_ops); 78 ret = register_pernet_subsys(&arptable_filter_net_ops);
66 if (ret < 0) 79 if (ret < 0) {
80 kfree(arpfilter_ops);
67 return ret; 81 return ret;
82 }
68 83
69 arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook); 84 ret = arptable_filter_table_init(&init_net);
70 if (IS_ERR(arpfilter_ops)) { 85 if (ret) {
71 ret = PTR_ERR(arpfilter_ops); 86 unregister_pernet_subsys(&arptable_filter_net_ops);
72 goto cleanup_table; 87 kfree(arpfilter_ops);
73 } 88 }
74 return ret;
75 89
76cleanup_table:
77 unregister_pernet_subsys(&arptable_filter_net_ops);
78 return ret; 90 return ret;
79} 91}
80 92
81static void __exit arptable_filter_fini(void) 93static void __exit arptable_filter_fini(void)
82{ 94{
83 xt_hook_unlink(&packet_filter, arpfilter_ops);
84 unregister_pernet_subsys(&arptable_filter_net_ops); 95 unregister_pernet_subsys(&arptable_filter_net_ops);
96 kfree(arpfilter_ops);
85} 97}
86 98
87module_init(arptable_filter_init); 99module_init(arptable_filter_init);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b99affad6ba1..631c100a1338 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -168,11 +168,12 @@ get_entry(const void *base, unsigned int offset)
168 168
169/* All zeroes == unconditional rule. */ 169/* All zeroes == unconditional rule. */
170/* Mildly perf critical (only if packet tracing is on) */ 170/* Mildly perf critical (only if packet tracing is on) */
171static inline bool unconditional(const struct ipt_ip *ip) 171static inline bool unconditional(const struct ipt_entry *e)
172{ 172{
173 static const struct ipt_ip uncond; 173 static const struct ipt_ip uncond;
174 174
175 return memcmp(ip, &uncond, sizeof(uncond)) == 0; 175 return e->target_offset == sizeof(struct ipt_entry) &&
176 memcmp(&e->ip, &uncond, sizeof(uncond)) == 0;
176#undef FWINV 177#undef FWINV
177} 178}
178 179
@@ -229,11 +230,10 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
229 } else if (s == e) { 230 } else if (s == e) {
230 (*rulenum)++; 231 (*rulenum)++;
231 232
232 if (s->target_offset == sizeof(struct ipt_entry) && 233 if (unconditional(s) &&
233 strcmp(t->target.u.kernel.target->name, 234 strcmp(t->target.u.kernel.target->name,
234 XT_STANDARD_TARGET) == 0 && 235 XT_STANDARD_TARGET) == 0 &&
235 t->verdict < 0 && 236 t->verdict < 0) {
236 unconditional(&s->ip)) {
237 /* Tail of chains: STANDARD target (return/policy) */ 237 /* Tail of chains: STANDARD target (return/policy) */
238 *comment = *chainname == hookname 238 *comment = *chainname == hookname
239 ? comments[NF_IP_TRACE_COMMENT_POLICY] 239 ? comments[NF_IP_TRACE_COMMENT_POLICY]
@@ -476,11 +476,10 @@ mark_source_chains(const struct xt_table_info *newinfo,
476 e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); 476 e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
477 477
478 /* Unconditional return/END. */ 478 /* Unconditional return/END. */
479 if ((e->target_offset == sizeof(struct ipt_entry) && 479 if ((unconditional(e) &&
480 (strcmp(t->target.u.user.name, 480 (strcmp(t->target.u.user.name,
481 XT_STANDARD_TARGET) == 0) && 481 XT_STANDARD_TARGET) == 0) &&
482 t->verdict < 0 && unconditional(&e->ip)) || 482 t->verdict < 0) || visited) {
483 visited) {
484 unsigned int oldpos, size; 483 unsigned int oldpos, size;
485 484
486 if ((strcmp(t->target.u.user.name, 485 if ((strcmp(t->target.u.user.name,
@@ -569,14 +568,12 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net)
569} 568}
570 569
571static int 570static int
572check_entry(const struct ipt_entry *e, const char *name) 571check_entry(const struct ipt_entry *e)
573{ 572{
574 const struct xt_entry_target *t; 573 const struct xt_entry_target *t;
575 574
576 if (!ip_checkentry(&e->ip)) { 575 if (!ip_checkentry(&e->ip))
577 duprintf("ip check failed %p %s.\n", e, name);
578 return -EINVAL; 576 return -EINVAL;
579 }
580 577
581 if (e->target_offset + sizeof(struct xt_entry_target) > 578 if (e->target_offset + sizeof(struct xt_entry_target) >
582 e->next_offset) 579 e->next_offset)
@@ -666,10 +663,6 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
666 struct xt_mtchk_param mtpar; 663 struct xt_mtchk_param mtpar;
667 struct xt_entry_match *ematch; 664 struct xt_entry_match *ematch;
668 665
669 ret = check_entry(e, name);
670 if (ret)
671 return ret;
672
673 e->counters.pcnt = xt_percpu_counter_alloc(); 666 e->counters.pcnt = xt_percpu_counter_alloc();
674 if (IS_ERR_VALUE(e->counters.pcnt)) 667 if (IS_ERR_VALUE(e->counters.pcnt))
675 return -ENOMEM; 668 return -ENOMEM;
@@ -721,7 +714,7 @@ static bool check_underflow(const struct ipt_entry *e)
721 const struct xt_entry_target *t; 714 const struct xt_entry_target *t;
722 unsigned int verdict; 715 unsigned int verdict;
723 716
724 if (!unconditional(&e->ip)) 717 if (!unconditional(e))
725 return false; 718 return false;
726 t = ipt_get_target_c(e); 719 t = ipt_get_target_c(e);
727 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) 720 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
@@ -741,9 +734,11 @@ check_entry_size_and_hooks(struct ipt_entry *e,
741 unsigned int valid_hooks) 734 unsigned int valid_hooks)
742{ 735{
743 unsigned int h; 736 unsigned int h;
737 int err;
744 738
745 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || 739 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 ||
746 (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { 740 (unsigned char *)e + sizeof(struct ipt_entry) >= limit ||
741 (unsigned char *)e + e->next_offset > limit) {
747 duprintf("Bad offset %p\n", e); 742 duprintf("Bad offset %p\n", e);
748 return -EINVAL; 743 return -EINVAL;
749 } 744 }
@@ -755,6 +750,10 @@ check_entry_size_and_hooks(struct ipt_entry *e,
755 return -EINVAL; 750 return -EINVAL;
756 } 751 }
757 752
753 err = check_entry(e);
754 if (err)
755 return err;
756
758 /* Check hooks & underflows */ 757 /* Check hooks & underflows */
759 for (h = 0; h < NF_INET_NUMHOOKS; h++) { 758 for (h = 0; h < NF_INET_NUMHOOKS; h++) {
760 if (!(valid_hooks & (1 << h))) 759 if (!(valid_hooks & (1 << h)))
@@ -763,9 +762,9 @@ check_entry_size_and_hooks(struct ipt_entry *e,
763 newinfo->hook_entry[h] = hook_entries[h]; 762 newinfo->hook_entry[h] = hook_entries[h];
764 if ((unsigned char *)e - base == underflows[h]) { 763 if ((unsigned char *)e - base == underflows[h]) {
765 if (!check_underflow(e)) { 764 if (!check_underflow(e)) {
766 pr_err("Underflows must be unconditional and " 765 pr_debug("Underflows must be unconditional and "
767 "use the STANDARD target with " 766 "use the STANDARD target with "
768 "ACCEPT/DROP\n"); 767 "ACCEPT/DROP\n");
769 return -EINVAL; 768 return -EINVAL;
770 } 769 }
771 newinfo->underflow[h] = underflows[h]; 770 newinfo->underflow[h] = underflows[h];
@@ -1157,6 +1156,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
1157 *len, sizeof(get) + get.size); 1156 *len, sizeof(get) + get.size);
1158 return -EINVAL; 1157 return -EINVAL;
1159 } 1158 }
1159 get.name[sizeof(get.name) - 1] = '\0';
1160 1160
1161 t = xt_find_table_lock(net, AF_INET, get.name); 1161 t = xt_find_table_lock(net, AF_INET, get.name);
1162 if (!IS_ERR_OR_NULL(t)) { 1162 if (!IS_ERR_OR_NULL(t)) {
@@ -1493,7 +1493,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1493 1493
1494 duprintf("check_compat_entry_size_and_hooks %p\n", e); 1494 duprintf("check_compat_entry_size_and_hooks %p\n", e);
1495 if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || 1495 if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
1496 (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) { 1496 (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit ||
1497 (unsigned char *)e + e->next_offset > limit) {
1497 duprintf("Bad offset %p, limit = %p\n", e, limit); 1498 duprintf("Bad offset %p, limit = %p\n", e, limit);
1498 return -EINVAL; 1499 return -EINVAL;
1499 } 1500 }
@@ -1506,7 +1507,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1506 } 1507 }
1507 1508
1508 /* For purposes of check_entry casting the compat entry is fine */ 1509 /* For purposes of check_entry casting the compat entry is fine */
1509 ret = check_entry((struct ipt_entry *)e, name); 1510 ret = check_entry((struct ipt_entry *)e);
1510 if (ret) 1511 if (ret)
1511 return ret; 1512 return ret;
1512 1513
@@ -1935,6 +1936,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1935 *len, sizeof(get) + get.size); 1936 *len, sizeof(get) + get.size);
1936 return -EINVAL; 1937 return -EINVAL;
1937 } 1938 }
1939 get.name[sizeof(get.name) - 1] = '\0';
1938 1940
1939 xt_compat_lock(AF_INET); 1941 xt_compat_lock(AF_INET);
1940 t = xt_find_table_lock(net, AF_INET, get.name); 1942 t = xt_find_table_lock(net, AF_INET, get.name);
@@ -2062,9 +2064,27 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2062 return ret; 2064 return ret;
2063} 2065}
2064 2066
2065struct xt_table *ipt_register_table(struct net *net, 2067static void __ipt_unregister_table(struct net *net, struct xt_table *table)
2066 const struct xt_table *table, 2068{
2067 const struct ipt_replace *repl) 2069 struct xt_table_info *private;
2070 void *loc_cpu_entry;
2071 struct module *table_owner = table->me;
2072 struct ipt_entry *iter;
2073
2074 private = xt_unregister_table(table);
2075
2076 /* Decrease module usage counts and free resources */
2077 loc_cpu_entry = private->entries;
2078 xt_entry_foreach(iter, loc_cpu_entry, private->size)
2079 cleanup_entry(iter, net);
2080 if (private->number > private->initial_entries)
2081 module_put(table_owner);
2082 xt_free_table_info(private);
2083}
2084
2085int ipt_register_table(struct net *net, const struct xt_table *table,
2086 const struct ipt_replace *repl,
2087 const struct nf_hook_ops *ops, struct xt_table **res)
2068{ 2088{
2069 int ret; 2089 int ret;
2070 struct xt_table_info *newinfo; 2090 struct xt_table_info *newinfo;
@@ -2073,10 +2093,8 @@ struct xt_table *ipt_register_table(struct net *net,
2073 struct xt_table *new_table; 2093 struct xt_table *new_table;
2074 2094
2075 newinfo = xt_alloc_table_info(repl->size); 2095 newinfo = xt_alloc_table_info(repl->size);
2076 if (!newinfo) { 2096 if (!newinfo)
2077 ret = -ENOMEM; 2097 return -ENOMEM;
2078 goto out;
2079 }
2080 2098
2081 loc_cpu_entry = newinfo->entries; 2099 loc_cpu_entry = newinfo->entries;
2082 memcpy(loc_cpu_entry, repl->entries, repl->size); 2100 memcpy(loc_cpu_entry, repl->entries, repl->size);
@@ -2091,30 +2109,27 @@ struct xt_table *ipt_register_table(struct net *net,
2091 goto out_free; 2109 goto out_free;
2092 } 2110 }
2093 2111
2094 return new_table; 2112 /* set res now, will see skbs right after nf_register_net_hooks */
2113 WRITE_ONCE(*res, new_table);
2114
2115 ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
2116 if (ret != 0) {
2117 __ipt_unregister_table(net, new_table);
2118 *res = NULL;
2119 }
2120
2121 return ret;
2095 2122
2096out_free: 2123out_free:
2097 xt_free_table_info(newinfo); 2124 xt_free_table_info(newinfo);
2098out: 2125 return ret;
2099 return ERR_PTR(ret);
2100} 2126}
2101 2127
2102void ipt_unregister_table(struct net *net, struct xt_table *table) 2128void ipt_unregister_table(struct net *net, struct xt_table *table,
2129 const struct nf_hook_ops *ops)
2103{ 2130{
2104 struct xt_table_info *private; 2131 nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
2105 void *loc_cpu_entry; 2132 __ipt_unregister_table(net, table);
2106 struct module *table_owner = table->me;
2107 struct ipt_entry *iter;
2108
2109 private = xt_unregister_table(table);
2110
2111 /* Decrease module usage counts and free resources */
2112 loc_cpu_entry = private->entries;
2113 xt_entry_foreach(iter, loc_cpu_entry, private->size)
2114 cleanup_entry(iter, net);
2115 if (private->number > private->initial_entries)
2116 module_put(table_owner);
2117 xt_free_table_info(private);
2118} 2133}
2119 2134
2120/* Returns 1 if the type and code is matched by the range, 0 otherwise */ 2135/* Returns 1 if the type and code is matched by the range, 0 otherwise */
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 5fdc556514ba..db5b87509446 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -18,7 +18,8 @@
18#include <net/netfilter/nf_conntrack_synproxy.h> 18#include <net/netfilter/nf_conntrack_synproxy.h>
19 19
20static struct iphdr * 20static struct iphdr *
21synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr) 21synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr,
22 __be32 daddr)
22{ 23{
23 struct iphdr *iph; 24 struct iphdr *iph;
24 25
@@ -29,7 +30,7 @@ synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr)
29 iph->tos = 0; 30 iph->tos = 0;
30 iph->id = 0; 31 iph->id = 0;
31 iph->frag_off = htons(IP_DF); 32 iph->frag_off = htons(IP_DF);
32 iph->ttl = sysctl_ip_default_ttl; 33 iph->ttl = net->ipv4.sysctl_ip_default_ttl;
33 iph->protocol = IPPROTO_TCP; 34 iph->protocol = IPPROTO_TCP;
34 iph->check = 0; 35 iph->check = 0;
35 iph->saddr = saddr; 36 iph->saddr = saddr;
@@ -39,14 +40,12 @@ synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr)
39} 40}
40 41
41static void 42static void
42synproxy_send_tcp(const struct synproxy_net *snet, 43synproxy_send_tcp(struct net *net,
43 const struct sk_buff *skb, struct sk_buff *nskb, 44 const struct sk_buff *skb, struct sk_buff *nskb,
44 struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo, 45 struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
45 struct iphdr *niph, struct tcphdr *nth, 46 struct iphdr *niph, struct tcphdr *nth,
46 unsigned int tcp_hdr_size) 47 unsigned int tcp_hdr_size)
47{ 48{
48 struct net *net = nf_ct_net(snet->tmpl);
49
50 nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0); 49 nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0);
51 nskb->ip_summed = CHECKSUM_PARTIAL; 50 nskb->ip_summed = CHECKSUM_PARTIAL;
52 nskb->csum_start = (unsigned char *)nth - nskb->head; 51 nskb->csum_start = (unsigned char *)nth - nskb->head;
@@ -71,7 +70,7 @@ free_nskb:
71} 70}
72 71
73static void 72static void
74synproxy_send_client_synack(const struct synproxy_net *snet, 73synproxy_send_client_synack(struct net *net,
75 const struct sk_buff *skb, const struct tcphdr *th, 74 const struct sk_buff *skb, const struct tcphdr *th,
76 const struct synproxy_options *opts) 75 const struct synproxy_options *opts)
77{ 76{
@@ -90,7 +89,7 @@ synproxy_send_client_synack(const struct synproxy_net *snet,
90 return; 89 return;
91 skb_reserve(nskb, MAX_TCP_HEADER); 90 skb_reserve(nskb, MAX_TCP_HEADER);
92 91
93 niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr); 92 niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr);
94 93
95 skb_reset_transport_header(nskb); 94 skb_reset_transport_header(nskb);
96 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); 95 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
@@ -108,15 +107,16 @@ synproxy_send_client_synack(const struct synproxy_net *snet,
108 107
109 synproxy_build_options(nth, opts); 108 synproxy_build_options(nth, opts);
110 109
111 synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, 110 synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
112 niph, nth, tcp_hdr_size); 111 niph, nth, tcp_hdr_size);
113} 112}
114 113
115static void 114static void
116synproxy_send_server_syn(const struct synproxy_net *snet, 115synproxy_send_server_syn(struct net *net,
117 const struct sk_buff *skb, const struct tcphdr *th, 116 const struct sk_buff *skb, const struct tcphdr *th,
118 const struct synproxy_options *opts, u32 recv_seq) 117 const struct synproxy_options *opts, u32 recv_seq)
119{ 118{
119 struct synproxy_net *snet = synproxy_pernet(net);
120 struct sk_buff *nskb; 120 struct sk_buff *nskb;
121 struct iphdr *iph, *niph; 121 struct iphdr *iph, *niph;
122 struct tcphdr *nth; 122 struct tcphdr *nth;
@@ -131,7 +131,7 @@ synproxy_send_server_syn(const struct synproxy_net *snet,
131 return; 131 return;
132 skb_reserve(nskb, MAX_TCP_HEADER); 132 skb_reserve(nskb, MAX_TCP_HEADER);
133 133
134 niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr); 134 niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr);
135 135
136 skb_reset_transport_header(nskb); 136 skb_reset_transport_header(nskb);
137 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); 137 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
@@ -152,12 +152,12 @@ synproxy_send_server_syn(const struct synproxy_net *snet,
152 152
153 synproxy_build_options(nth, opts); 153 synproxy_build_options(nth, opts);
154 154
155 synproxy_send_tcp(snet, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, 155 synproxy_send_tcp(net, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
156 niph, nth, tcp_hdr_size); 156 niph, nth, tcp_hdr_size);
157} 157}
158 158
159static void 159static void
160synproxy_send_server_ack(const struct synproxy_net *snet, 160synproxy_send_server_ack(struct net *net,
161 const struct ip_ct_tcp *state, 161 const struct ip_ct_tcp *state,
162 const struct sk_buff *skb, const struct tcphdr *th, 162 const struct sk_buff *skb, const struct tcphdr *th,
163 const struct synproxy_options *opts) 163 const struct synproxy_options *opts)
@@ -176,7 +176,7 @@ synproxy_send_server_ack(const struct synproxy_net *snet,
176 return; 176 return;
177 skb_reserve(nskb, MAX_TCP_HEADER); 177 skb_reserve(nskb, MAX_TCP_HEADER);
178 178
179 niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr); 179 niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr);
180 180
181 skb_reset_transport_header(nskb); 181 skb_reset_transport_header(nskb);
182 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); 182 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
@@ -192,11 +192,11 @@ synproxy_send_server_ack(const struct synproxy_net *snet,
192 192
193 synproxy_build_options(nth, opts); 193 synproxy_build_options(nth, opts);
194 194
195 synproxy_send_tcp(snet, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); 195 synproxy_send_tcp(net, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
196} 196}
197 197
198static void 198static void
199synproxy_send_client_ack(const struct synproxy_net *snet, 199synproxy_send_client_ack(struct net *net,
200 const struct sk_buff *skb, const struct tcphdr *th, 200 const struct sk_buff *skb, const struct tcphdr *th,
201 const struct synproxy_options *opts) 201 const struct synproxy_options *opts)
202{ 202{
@@ -214,7 +214,7 @@ synproxy_send_client_ack(const struct synproxy_net *snet,
214 return; 214 return;
215 skb_reserve(nskb, MAX_TCP_HEADER); 215 skb_reserve(nskb, MAX_TCP_HEADER);
216 216
217 niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr); 217 niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr);
218 218
219 skb_reset_transport_header(nskb); 219 skb_reset_transport_header(nskb);
220 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); 220 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
@@ -230,15 +230,16 @@ synproxy_send_client_ack(const struct synproxy_net *snet,
230 230
231 synproxy_build_options(nth, opts); 231 synproxy_build_options(nth, opts);
232 232
233 synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, 233 synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
234 niph, nth, tcp_hdr_size); 234 niph, nth, tcp_hdr_size);
235} 235}
236 236
237static bool 237static bool
238synproxy_recv_client_ack(const struct synproxy_net *snet, 238synproxy_recv_client_ack(struct net *net,
239 const struct sk_buff *skb, const struct tcphdr *th, 239 const struct sk_buff *skb, const struct tcphdr *th,
240 struct synproxy_options *opts, u32 recv_seq) 240 struct synproxy_options *opts, u32 recv_seq)
241{ 241{
242 struct synproxy_net *snet = synproxy_pernet(net);
242 int mss; 243 int mss;
243 244
244 mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1); 245 mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1);
@@ -254,7 +255,7 @@ synproxy_recv_client_ack(const struct synproxy_net *snet,
254 if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP) 255 if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
255 synproxy_check_timestamp_cookie(opts); 256 synproxy_check_timestamp_cookie(opts);
256 257
257 synproxy_send_server_syn(snet, skb, th, opts, recv_seq); 258 synproxy_send_server_syn(net, skb, th, opts, recv_seq);
258 return true; 259 return true;
259} 260}
260 261
@@ -262,7 +263,8 @@ static unsigned int
262synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) 263synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
263{ 264{
264 const struct xt_synproxy_info *info = par->targinfo; 265 const struct xt_synproxy_info *info = par->targinfo;
265 struct synproxy_net *snet = synproxy_pernet(par->net); 266 struct net *net = par->net;
267 struct synproxy_net *snet = synproxy_pernet(net);
266 struct synproxy_options opts = {}; 268 struct synproxy_options opts = {};
267 struct tcphdr *th, _th; 269 struct tcphdr *th, _th;
268 270
@@ -291,12 +293,12 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
291 XT_SYNPROXY_OPT_SACK_PERM | 293 XT_SYNPROXY_OPT_SACK_PERM |
292 XT_SYNPROXY_OPT_ECN); 294 XT_SYNPROXY_OPT_ECN);
293 295
294 synproxy_send_client_synack(snet, skb, th, &opts); 296 synproxy_send_client_synack(net, skb, th, &opts);
295 return NF_DROP; 297 return NF_DROP;
296 298
297 } else if (th->ack && !(th->fin || th->rst || th->syn)) { 299 } else if (th->ack && !(th->fin || th->rst || th->syn)) {
298 /* ACK from client */ 300 /* ACK from client */
299 synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq)); 301 synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq));
300 return NF_DROP; 302 return NF_DROP;
301 } 303 }
302 304
@@ -307,7 +309,8 @@ static unsigned int ipv4_synproxy_hook(void *priv,
307 struct sk_buff *skb, 309 struct sk_buff *skb,
308 const struct nf_hook_state *nhs) 310 const struct nf_hook_state *nhs)
309{ 311{
310 struct synproxy_net *snet = synproxy_pernet(nhs->net); 312 struct net *net = nhs->net;
313 struct synproxy_net *snet = synproxy_pernet(net);
311 enum ip_conntrack_info ctinfo; 314 enum ip_conntrack_info ctinfo;
312 struct nf_conn *ct; 315 struct nf_conn *ct;
313 struct nf_conn_synproxy *synproxy; 316 struct nf_conn_synproxy *synproxy;
@@ -364,7 +367,7 @@ static unsigned int ipv4_synproxy_hook(void *priv,
364 * therefore we need to add 1 to make the SYN sequence 367 * therefore we need to add 1 to make the SYN sequence
365 * number match the one of first SYN. 368 * number match the one of first SYN.
366 */ 369 */
367 if (synproxy_recv_client_ack(snet, skb, th, &opts, 370 if (synproxy_recv_client_ack(net, skb, th, &opts,
368 ntohl(th->seq) + 1)) 371 ntohl(th->seq) + 1))
369 this_cpu_inc(snet->stats->cookie_retrans); 372 this_cpu_inc(snet->stats->cookie_retrans);
370 373
@@ -390,12 +393,12 @@ static unsigned int ipv4_synproxy_hook(void *priv,
390 XT_SYNPROXY_OPT_SACK_PERM); 393 XT_SYNPROXY_OPT_SACK_PERM);
391 394
392 swap(opts.tsval, opts.tsecr); 395 swap(opts.tsval, opts.tsecr);
393 synproxy_send_server_ack(snet, state, skb, th, &opts); 396 synproxy_send_server_ack(net, state, skb, th, &opts);
394 397
395 nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq)); 398 nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
396 399
397 swap(opts.tsval, opts.tsecr); 400 swap(opts.tsval, opts.tsecr);
398 synproxy_send_client_ack(snet, skb, th, &opts); 401 synproxy_send_client_ack(net, skb, th, &opts);
399 402
400 consume_skb(skb); 403 consume_skb(skb);
401 return NF_STOLEN; 404 return NF_STOLEN;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 397ef2dd133e..7667f223d7f8 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -23,6 +23,7 @@ MODULE_DESCRIPTION("iptables filter table");
23#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \ 23#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
24 (1 << NF_INET_FORWARD) | \ 24 (1 << NF_INET_FORWARD) | \
25 (1 << NF_INET_LOCAL_OUT)) 25 (1 << NF_INET_LOCAL_OUT))
26static int __net_init iptable_filter_table_init(struct net *net);
26 27
27static const struct xt_table packet_filter = { 28static const struct xt_table packet_filter = {
28 .name = "filter", 29 .name = "filter",
@@ -30,6 +31,7 @@ static const struct xt_table packet_filter = {
30 .me = THIS_MODULE, 31 .me = THIS_MODULE,
31 .af = NFPROTO_IPV4, 32 .af = NFPROTO_IPV4,
32 .priority = NF_IP_PRI_FILTER, 33 .priority = NF_IP_PRI_FILTER,
34 .table_init = iptable_filter_table_init,
33}; 35};
34 36
35static unsigned int 37static unsigned int
@@ -48,12 +50,16 @@ iptable_filter_hook(void *priv, struct sk_buff *skb,
48static struct nf_hook_ops *filter_ops __read_mostly; 50static struct nf_hook_ops *filter_ops __read_mostly;
49 51
50/* Default to forward because I got too much mail already. */ 52/* Default to forward because I got too much mail already. */
51static bool forward = true; 53static bool forward __read_mostly = true;
52module_param(forward, bool, 0000); 54module_param(forward, bool, 0000);
53 55
54static int __net_init iptable_filter_net_init(struct net *net) 56static int __net_init iptable_filter_table_init(struct net *net)
55{ 57{
56 struct ipt_replace *repl; 58 struct ipt_replace *repl;
59 int err;
60
61 if (net->ipv4.iptable_filter)
62 return 0;
57 63
58 repl = ipt_alloc_initial_table(&packet_filter); 64 repl = ipt_alloc_initial_table(&packet_filter);
59 if (repl == NULL) 65 if (repl == NULL)
@@ -62,15 +68,26 @@ static int __net_init iptable_filter_net_init(struct net *net)
62 ((struct ipt_standard *)repl->entries)[1].target.verdict = 68 ((struct ipt_standard *)repl->entries)[1].target.verdict =
63 forward ? -NF_ACCEPT - 1 : -NF_DROP - 1; 69 forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
64 70
65 net->ipv4.iptable_filter = 71 err = ipt_register_table(net, &packet_filter, repl, filter_ops,
66 ipt_register_table(net, &packet_filter, repl); 72 &net->ipv4.iptable_filter);
67 kfree(repl); 73 kfree(repl);
68 return PTR_ERR_OR_ZERO(net->ipv4.iptable_filter); 74 return err;
75}
76
77static int __net_init iptable_filter_net_init(struct net *net)
78{
79 if (net == &init_net || !forward)
80 return iptable_filter_table_init(net);
81
82 return 0;
69} 83}
70 84
71static void __net_exit iptable_filter_net_exit(struct net *net) 85static void __net_exit iptable_filter_net_exit(struct net *net)
72{ 86{
73 ipt_unregister_table(net, net->ipv4.iptable_filter); 87 if (!net->ipv4.iptable_filter)
88 return;
89 ipt_unregister_table(net, net->ipv4.iptable_filter, filter_ops);
90 net->ipv4.iptable_filter = NULL;
74} 91}
75 92
76static struct pernet_operations iptable_filter_net_ops = { 93static struct pernet_operations iptable_filter_net_ops = {
@@ -82,24 +99,21 @@ static int __init iptable_filter_init(void)
82{ 99{
83 int ret; 100 int ret;
84 101
102 filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook);
103 if (IS_ERR(filter_ops))
104 return PTR_ERR(filter_ops);
105
85 ret = register_pernet_subsys(&iptable_filter_net_ops); 106 ret = register_pernet_subsys(&iptable_filter_net_ops);
86 if (ret < 0) 107 if (ret < 0)
87 return ret; 108 kfree(filter_ops);
88
89 /* Register hooks */
90 filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook);
91 if (IS_ERR(filter_ops)) {
92 ret = PTR_ERR(filter_ops);
93 unregister_pernet_subsys(&iptable_filter_net_ops);
94 }
95 109
96 return ret; 110 return ret;
97} 111}
98 112
99static void __exit iptable_filter_fini(void) 113static void __exit iptable_filter_fini(void)
100{ 114{
101 xt_hook_unlink(&packet_filter, filter_ops);
102 unregister_pernet_subsys(&iptable_filter_net_ops); 115 unregister_pernet_subsys(&iptable_filter_net_ops);
116 kfree(filter_ops);
103} 117}
104 118
105module_init(iptable_filter_init); 119module_init(iptable_filter_init);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index ba5d392a13c4..57fc97cdac70 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -28,12 +28,15 @@ MODULE_DESCRIPTION("iptables mangle table");
28 (1 << NF_INET_LOCAL_OUT) | \ 28 (1 << NF_INET_LOCAL_OUT) | \
29 (1 << NF_INET_POST_ROUTING)) 29 (1 << NF_INET_POST_ROUTING))
30 30
31static int __net_init iptable_mangle_table_init(struct net *net);
32
31static const struct xt_table packet_mangler = { 33static const struct xt_table packet_mangler = {
32 .name = "mangle", 34 .name = "mangle",
33 .valid_hooks = MANGLE_VALID_HOOKS, 35 .valid_hooks = MANGLE_VALID_HOOKS,
34 .me = THIS_MODULE, 36 .me = THIS_MODULE,
35 .af = NFPROTO_IPV4, 37 .af = NFPROTO_IPV4,
36 .priority = NF_IP_PRI_MANGLE, 38 .priority = NF_IP_PRI_MANGLE,
39 .table_init = iptable_mangle_table_init,
37}; 40};
38 41
39static unsigned int 42static unsigned int
@@ -92,27 +95,32 @@ iptable_mangle_hook(void *priv,
92} 95}
93 96
94static struct nf_hook_ops *mangle_ops __read_mostly; 97static struct nf_hook_ops *mangle_ops __read_mostly;
95 98static int __net_init iptable_mangle_table_init(struct net *net)
96static int __net_init iptable_mangle_net_init(struct net *net)
97{ 99{
98 struct ipt_replace *repl; 100 struct ipt_replace *repl;
101 int ret;
102
103 if (net->ipv4.iptable_mangle)
104 return 0;
99 105
100 repl = ipt_alloc_initial_table(&packet_mangler); 106 repl = ipt_alloc_initial_table(&packet_mangler);
101 if (repl == NULL) 107 if (repl == NULL)
102 return -ENOMEM; 108 return -ENOMEM;
103 net->ipv4.iptable_mangle = 109 ret = ipt_register_table(net, &packet_mangler, repl, mangle_ops,
104 ipt_register_table(net, &packet_mangler, repl); 110 &net->ipv4.iptable_mangle);
105 kfree(repl); 111 kfree(repl);
106 return PTR_ERR_OR_ZERO(net->ipv4.iptable_mangle); 112 return ret;
107} 113}
108 114
109static void __net_exit iptable_mangle_net_exit(struct net *net) 115static void __net_exit iptable_mangle_net_exit(struct net *net)
110{ 116{
111 ipt_unregister_table(net, net->ipv4.iptable_mangle); 117 if (!net->ipv4.iptable_mangle)
118 return;
119 ipt_unregister_table(net, net->ipv4.iptable_mangle, mangle_ops);
120 net->ipv4.iptable_mangle = NULL;
112} 121}
113 122
114static struct pernet_operations iptable_mangle_net_ops = { 123static struct pernet_operations iptable_mangle_net_ops = {
115 .init = iptable_mangle_net_init,
116 .exit = iptable_mangle_net_exit, 124 .exit = iptable_mangle_net_exit,
117}; 125};
118 126
@@ -120,15 +128,22 @@ static int __init iptable_mangle_init(void)
120{ 128{
121 int ret; 129 int ret;
122 130
131 mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook);
132 if (IS_ERR(mangle_ops)) {
133 ret = PTR_ERR(mangle_ops);
134 return ret;
135 }
136
123 ret = register_pernet_subsys(&iptable_mangle_net_ops); 137 ret = register_pernet_subsys(&iptable_mangle_net_ops);
124 if (ret < 0) 138 if (ret < 0) {
139 kfree(mangle_ops);
125 return ret; 140 return ret;
141 }
126 142
127 /* Register hooks */ 143 ret = iptable_mangle_table_init(&init_net);
128 mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook); 144 if (ret) {
129 if (IS_ERR(mangle_ops)) {
130 ret = PTR_ERR(mangle_ops);
131 unregister_pernet_subsys(&iptable_mangle_net_ops); 145 unregister_pernet_subsys(&iptable_mangle_net_ops);
146 kfree(mangle_ops);
132 } 147 }
133 148
134 return ret; 149 return ret;
@@ -136,8 +151,8 @@ static int __init iptable_mangle_init(void)
136 151
137static void __exit iptable_mangle_fini(void) 152static void __exit iptable_mangle_fini(void)
138{ 153{
139 xt_hook_unlink(&packet_mangler, mangle_ops);
140 unregister_pernet_subsys(&iptable_mangle_net_ops); 154 unregister_pernet_subsys(&iptable_mangle_net_ops);
155 kfree(mangle_ops);
141} 156}
142 157
143module_init(iptable_mangle_init); 158module_init(iptable_mangle_init);
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index ae2cd2752046..138a24bc76ad 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -18,6 +18,8 @@
18#include <net/netfilter/nf_nat_core.h> 18#include <net/netfilter/nf_nat_core.h>
19#include <net/netfilter/nf_nat_l3proto.h> 19#include <net/netfilter/nf_nat_l3proto.h>
20 20
21static int __net_init iptable_nat_table_init(struct net *net);
22
21static const struct xt_table nf_nat_ipv4_table = { 23static const struct xt_table nf_nat_ipv4_table = {
22 .name = "nat", 24 .name = "nat",
23 .valid_hooks = (1 << NF_INET_PRE_ROUTING) | 25 .valid_hooks = (1 << NF_INET_PRE_ROUTING) |
@@ -26,6 +28,7 @@ static const struct xt_table nf_nat_ipv4_table = {
26 (1 << NF_INET_LOCAL_IN), 28 (1 << NF_INET_LOCAL_IN),
27 .me = THIS_MODULE, 29 .me = THIS_MODULE,
28 .af = NFPROTO_IPV4, 30 .af = NFPROTO_IPV4,
31 .table_init = iptable_nat_table_init,
29}; 32};
30 33
31static unsigned int iptable_nat_do_chain(void *priv, 34static unsigned int iptable_nat_do_chain(void *priv,
@@ -95,50 +98,50 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
95 }, 98 },
96}; 99};
97 100
98static int __net_init iptable_nat_net_init(struct net *net) 101static int __net_init iptable_nat_table_init(struct net *net)
99{ 102{
100 struct ipt_replace *repl; 103 struct ipt_replace *repl;
104 int ret;
105
106 if (net->ipv4.nat_table)
107 return 0;
101 108
102 repl = ipt_alloc_initial_table(&nf_nat_ipv4_table); 109 repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
103 if (repl == NULL) 110 if (repl == NULL)
104 return -ENOMEM; 111 return -ENOMEM;
105 net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl); 112 ret = ipt_register_table(net, &nf_nat_ipv4_table, repl,
113 nf_nat_ipv4_ops, &net->ipv4.nat_table);
106 kfree(repl); 114 kfree(repl);
107 return PTR_ERR_OR_ZERO(net->ipv4.nat_table); 115 return ret;
108} 116}
109 117
110static void __net_exit iptable_nat_net_exit(struct net *net) 118static void __net_exit iptable_nat_net_exit(struct net *net)
111{ 119{
112 ipt_unregister_table(net, net->ipv4.nat_table); 120 if (!net->ipv4.nat_table)
121 return;
122 ipt_unregister_table(net, net->ipv4.nat_table, nf_nat_ipv4_ops);
123 net->ipv4.nat_table = NULL;
113} 124}
114 125
115static struct pernet_operations iptable_nat_net_ops = { 126static struct pernet_operations iptable_nat_net_ops = {
116 .init = iptable_nat_net_init,
117 .exit = iptable_nat_net_exit, 127 .exit = iptable_nat_net_exit,
118}; 128};
119 129
120static int __init iptable_nat_init(void) 130static int __init iptable_nat_init(void)
121{ 131{
122 int err; 132 int ret = register_pernet_subsys(&iptable_nat_net_ops);
123 133
124 err = register_pernet_subsys(&iptable_nat_net_ops); 134 if (ret)
125 if (err < 0) 135 return ret;
126 goto err1;
127 136
128 err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); 137 ret = iptable_nat_table_init(&init_net);
129 if (err < 0) 138 if (ret)
130 goto err2; 139 unregister_pernet_subsys(&iptable_nat_net_ops);
131 return 0; 140 return ret;
132
133err2:
134 unregister_pernet_subsys(&iptable_nat_net_ops);
135err1:
136 return err;
137} 141}
138 142
139static void __exit iptable_nat_exit(void) 143static void __exit iptable_nat_exit(void)
140{ 144{
141 nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
142 unregister_pernet_subsys(&iptable_nat_net_ops); 145 unregister_pernet_subsys(&iptable_nat_net_ops);
143} 146}
144 147
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 1ba02811acb0..2642ecd2645c 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -10,12 +10,15 @@
10 10
11#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) 11#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
12 12
13static int __net_init iptable_raw_table_init(struct net *net);
14
13static const struct xt_table packet_raw = { 15static const struct xt_table packet_raw = {
14 .name = "raw", 16 .name = "raw",
15 .valid_hooks = RAW_VALID_HOOKS, 17 .valid_hooks = RAW_VALID_HOOKS,
16 .me = THIS_MODULE, 18 .me = THIS_MODULE,
17 .af = NFPROTO_IPV4, 19 .af = NFPROTO_IPV4,
18 .priority = NF_IP_PRI_RAW, 20 .priority = NF_IP_PRI_RAW,
21 .table_init = iptable_raw_table_init,
19}; 22};
20 23
21/* The work comes in here from netfilter.c. */ 24/* The work comes in here from netfilter.c. */
@@ -34,26 +37,32 @@ iptable_raw_hook(void *priv, struct sk_buff *skb,
34 37
35static struct nf_hook_ops *rawtable_ops __read_mostly; 38static struct nf_hook_ops *rawtable_ops __read_mostly;
36 39
37static int __net_init iptable_raw_net_init(struct net *net) 40static int __net_init iptable_raw_table_init(struct net *net)
38{ 41{
39 struct ipt_replace *repl; 42 struct ipt_replace *repl;
43 int ret;
44
45 if (net->ipv4.iptable_raw)
46 return 0;
40 47
41 repl = ipt_alloc_initial_table(&packet_raw); 48 repl = ipt_alloc_initial_table(&packet_raw);
42 if (repl == NULL) 49 if (repl == NULL)
43 return -ENOMEM; 50 return -ENOMEM;
44 net->ipv4.iptable_raw = 51 ret = ipt_register_table(net, &packet_raw, repl, rawtable_ops,
45 ipt_register_table(net, &packet_raw, repl); 52 &net->ipv4.iptable_raw);
46 kfree(repl); 53 kfree(repl);
47 return PTR_ERR_OR_ZERO(net->ipv4.iptable_raw); 54 return ret;
48} 55}
49 56
50static void __net_exit iptable_raw_net_exit(struct net *net) 57static void __net_exit iptable_raw_net_exit(struct net *net)
51{ 58{
52 ipt_unregister_table(net, net->ipv4.iptable_raw); 59 if (!net->ipv4.iptable_raw)
60 return;
61 ipt_unregister_table(net, net->ipv4.iptable_raw, rawtable_ops);
62 net->ipv4.iptable_raw = NULL;
53} 63}
54 64
55static struct pernet_operations iptable_raw_net_ops = { 65static struct pernet_operations iptable_raw_net_ops = {
56 .init = iptable_raw_net_init,
57 .exit = iptable_raw_net_exit, 66 .exit = iptable_raw_net_exit,
58}; 67};
59 68
@@ -61,15 +70,20 @@ static int __init iptable_raw_init(void)
61{ 70{
62 int ret; 71 int ret;
63 72
73 rawtable_ops = xt_hook_ops_alloc(&packet_raw, iptable_raw_hook);
74 if (IS_ERR(rawtable_ops))
75 return PTR_ERR(rawtable_ops);
76
64 ret = register_pernet_subsys(&iptable_raw_net_ops); 77 ret = register_pernet_subsys(&iptable_raw_net_ops);
65 if (ret < 0) 78 if (ret < 0) {
79 kfree(rawtable_ops);
66 return ret; 80 return ret;
81 }
67 82
68 /* Register hooks */ 83 ret = iptable_raw_table_init(&init_net);
69 rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook); 84 if (ret) {
70 if (IS_ERR(rawtable_ops)) {
71 ret = PTR_ERR(rawtable_ops);
72 unregister_pernet_subsys(&iptable_raw_net_ops); 85 unregister_pernet_subsys(&iptable_raw_net_ops);
86 kfree(rawtable_ops);
73 } 87 }
74 88
75 return ret; 89 return ret;
@@ -77,8 +91,8 @@ static int __init iptable_raw_init(void)
77 91
78static void __exit iptable_raw_fini(void) 92static void __exit iptable_raw_fini(void)
79{ 93{
80 xt_hook_unlink(&packet_raw, rawtable_ops);
81 unregister_pernet_subsys(&iptable_raw_net_ops); 94 unregister_pernet_subsys(&iptable_raw_net_ops);
95 kfree(rawtable_ops);
82} 96}
83 97
84module_init(iptable_raw_init); 98module_init(iptable_raw_init);
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index c2e23d5e9cd4..ff226596e4b5 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -28,12 +28,15 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules");
28 (1 << NF_INET_FORWARD) | \ 28 (1 << NF_INET_FORWARD) | \
29 (1 << NF_INET_LOCAL_OUT) 29 (1 << NF_INET_LOCAL_OUT)
30 30
31static int __net_init iptable_security_table_init(struct net *net);
32
31static const struct xt_table security_table = { 33static const struct xt_table security_table = {
32 .name = "security", 34 .name = "security",
33 .valid_hooks = SECURITY_VALID_HOOKS, 35 .valid_hooks = SECURITY_VALID_HOOKS,
34 .me = THIS_MODULE, 36 .me = THIS_MODULE,
35 .af = NFPROTO_IPV4, 37 .af = NFPROTO_IPV4,
36 .priority = NF_IP_PRI_SECURITY, 38 .priority = NF_IP_PRI_SECURITY,
39 .table_init = iptable_security_table_init,
37}; 40};
38 41
39static unsigned int 42static unsigned int
@@ -51,26 +54,33 @@ iptable_security_hook(void *priv, struct sk_buff *skb,
51 54
52static struct nf_hook_ops *sectbl_ops __read_mostly; 55static struct nf_hook_ops *sectbl_ops __read_mostly;
53 56
54static int __net_init iptable_security_net_init(struct net *net) 57static int __net_init iptable_security_table_init(struct net *net)
55{ 58{
56 struct ipt_replace *repl; 59 struct ipt_replace *repl;
60 int ret;
61
62 if (net->ipv4.iptable_security)
63 return 0;
57 64
58 repl = ipt_alloc_initial_table(&security_table); 65 repl = ipt_alloc_initial_table(&security_table);
59 if (repl == NULL) 66 if (repl == NULL)
60 return -ENOMEM; 67 return -ENOMEM;
61 net->ipv4.iptable_security = 68 ret = ipt_register_table(net, &security_table, repl, sectbl_ops,
62 ipt_register_table(net, &security_table, repl); 69 &net->ipv4.iptable_security);
63 kfree(repl); 70 kfree(repl);
64 return PTR_ERR_OR_ZERO(net->ipv4.iptable_security); 71 return ret;
65} 72}
66 73
67static void __net_exit iptable_security_net_exit(struct net *net) 74static void __net_exit iptable_security_net_exit(struct net *net)
68{ 75{
69 ipt_unregister_table(net, net->ipv4.iptable_security); 76 if (!net->ipv4.iptable_security)
77 return;
78
79 ipt_unregister_table(net, net->ipv4.iptable_security, sectbl_ops);
80 net->ipv4.iptable_security = NULL;
70} 81}
71 82
72static struct pernet_operations iptable_security_net_ops = { 83static struct pernet_operations iptable_security_net_ops = {
73 .init = iptable_security_net_init,
74 .exit = iptable_security_net_exit, 84 .exit = iptable_security_net_exit,
75}; 85};
76 86
@@ -78,27 +88,29 @@ static int __init iptable_security_init(void)
78{ 88{
79 int ret; 89 int ret;
80 90
91 sectbl_ops = xt_hook_ops_alloc(&security_table, iptable_security_hook);
92 if (IS_ERR(sectbl_ops))
93 return PTR_ERR(sectbl_ops);
94
81 ret = register_pernet_subsys(&iptable_security_net_ops); 95 ret = register_pernet_subsys(&iptable_security_net_ops);
82 if (ret < 0) 96 if (ret < 0) {
97 kfree(sectbl_ops);
83 return ret; 98 return ret;
84
85 sectbl_ops = xt_hook_link(&security_table, iptable_security_hook);
86 if (IS_ERR(sectbl_ops)) {
87 ret = PTR_ERR(sectbl_ops);
88 goto cleanup_table;
89 } 99 }
90 100
91 return ret; 101 ret = iptable_security_table_init(&init_net);
102 if (ret) {
103 unregister_pernet_subsys(&iptable_security_net_ops);
104 kfree(sectbl_ops);
105 }
92 106
93cleanup_table:
94 unregister_pernet_subsys(&iptable_security_net_ops);
95 return ret; 107 return ret;
96} 108}
97 109
98static void __exit iptable_security_fini(void) 110static void __exit iptable_security_fini(void)
99{ 111{
100 xt_hook_unlink(&security_table, sectbl_ops);
101 unregister_pernet_subsys(&iptable_security_net_ops); 112 unregister_pernet_subsys(&iptable_security_net_ops);
113 kfree(sectbl_ops);
102} 114}
103 115
104module_init(iptable_security_init); 116module_init(iptable_security_init);
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index a04dee536b8e..d88da36b383c 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -31,10 +31,8 @@ static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
31 err = ip_defrag(net, skb, user); 31 err = ip_defrag(net, skb, user);
32 local_bh_enable(); 32 local_bh_enable();
33 33
34 if (!err) { 34 if (!err)
35 ip_send_check(ip_hdr(skb));
36 skb->ignore_df = 1; 35 skb->ignore_df = 1;
37 }
38 36
39 return err; 37 return err;
40} 38}
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 61c7cc22ea68..f8aad03d674b 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -127,29 +127,15 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
127 u8 proto, void *data, __sum16 *check, 127 u8 proto, void *data, __sum16 *check,
128 int datalen, int oldlen) 128 int datalen, int oldlen)
129{ 129{
130 const struct iphdr *iph = ip_hdr(skb);
131 struct rtable *rt = skb_rtable(skb);
132
133 if (skb->ip_summed != CHECKSUM_PARTIAL) { 130 if (skb->ip_summed != CHECKSUM_PARTIAL) {
134 if (!(rt->rt_flags & RTCF_LOCAL) && 131 const struct iphdr *iph = ip_hdr(skb);
135 (!skb->dev || skb->dev->features & 132
136 (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) { 133 skb->ip_summed = CHECKSUM_PARTIAL;
137 skb->ip_summed = CHECKSUM_PARTIAL; 134 skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
138 skb->csum_start = skb_headroom(skb) + 135 ip_hdrlen(skb);
139 skb_network_offset(skb) + 136 skb->csum_offset = (void *)check - data;
140 ip_hdrlen(skb); 137 *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
141 skb->csum_offset = (void *)check - data; 138 proto, 0);
142 *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
143 datalen, proto, 0);
144 } else {
145 *check = 0;
146 *check = csum_tcpudp_magic(iph->saddr, iph->daddr,
147 datalen, proto,
148 csum_partial(data, datalen,
149 0));
150 if (proto == IPPROTO_UDP && !*check)
151 *check = CSUM_MANGLED_0;
152 }
153 } else 139 } else
154 inet_proto_csum_replace2(check, skb, 140 inet_proto_csum_replace2(check, skb,
155 htons(oldlen), htons(datalen), true); 141 htons(oldlen), htons(datalen), true);
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index c6eb42100e9a..ea91058b5f6f 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -108,10 +108,18 @@ static int masq_inet_event(struct notifier_block *this,
108 unsigned long event, 108 unsigned long event,
109 void *ptr) 109 void *ptr)
110{ 110{
111 struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; 111 struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
112 struct netdev_notifier_info info; 112 struct netdev_notifier_info info;
113 113
114 netdev_notifier_info_init(&info, dev); 114 /* The masq_dev_notifier will catch the case of the device going
115 * down. So if the inetdev is dead and being destroyed we have
116 * no work to do. Otherwise this is an individual address removal
117 * and we have to perform the flush.
118 */
119 if (idev->dead)
120 return NOTIFY_DONE;
121
122 netdev_notifier_info_init(&info, idev->dev);
115 return masq_device_event(this, event, &info); 123 return masq_device_event(this, event, &info);
116} 124}
117 125
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index b72ffc58e255..51ced81b616c 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -25,7 +25,12 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
25 25
26 memset(&range, 0, sizeof(range)); 26 memset(&range, 0, sizeof(range));
27 range.flags = priv->flags; 27 range.flags = priv->flags;
28 28 if (priv->sreg_proto_min) {
29 range.min_proto.all =
30 *(__be16 *)&regs->data[priv->sreg_proto_min];
31 range.max_proto.all =
32 *(__be16 *)&regs->data[priv->sreg_proto_max];
33 }
29 regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->hook, 34 regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->hook,
30 &range, pkt->out); 35 &range, pkt->out);
31} 36}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index d3a27165f9cc..cf9700b1a106 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -145,10 +145,12 @@ fail:
145} 145}
146EXPORT_SYMBOL_GPL(ping_get_port); 146EXPORT_SYMBOL_GPL(ping_get_port);
147 147
148void ping_hash(struct sock *sk) 148int ping_hash(struct sock *sk)
149{ 149{
150 pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); 150 pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
151 BUG(); /* "Please do not press this button again." */ 151 BUG(); /* "Please do not press this button again." */
152
153 return 0;
152} 154}
153 155
154void ping_unhash(struct sock *sk) 156void ping_unhash(struct sock *sk)
@@ -1140,13 +1142,6 @@ static int ping_v4_seq_show(struct seq_file *seq, void *v)
1140 return 0; 1142 return 0;
1141} 1143}
1142 1144
1143static const struct seq_operations ping_v4_seq_ops = {
1144 .show = ping_v4_seq_show,
1145 .start = ping_v4_seq_start,
1146 .next = ping_seq_next,
1147 .stop = ping_seq_stop,
1148};
1149
1150static int ping_seq_open(struct inode *inode, struct file *file) 1145static int ping_seq_open(struct inode *inode, struct file *file)
1151{ 1146{
1152 struct ping_seq_afinfo *afinfo = PDE_DATA(inode); 1147 struct ping_seq_afinfo *afinfo = PDE_DATA(inode);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 3abd9d7a3adf..9f665b63a927 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -390,7 +390,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
390 390
391 seq_printf(seq, "\nIp: %d %d", 391 seq_printf(seq, "\nIp: %d %d",
392 IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, 392 IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
393 sysctl_ip_default_ttl); 393 net->ipv4.sysctl_ip_default_ttl);
394 394
395 BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); 395 BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
396 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) 396 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 7113bae4e6a0..8d22de74080c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -93,7 +93,7 @@ static struct raw_hashinfo raw_v4_hashinfo = {
93 .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock), 93 .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
94}; 94};
95 95
96void raw_hash_sk(struct sock *sk) 96int raw_hash_sk(struct sock *sk)
97{ 97{
98 struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; 98 struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
99 struct hlist_head *head; 99 struct hlist_head *head;
@@ -104,6 +104,8 @@ void raw_hash_sk(struct sock *sk)
104 sk_add_node(sk, head); 104 sk_add_node(sk, head);
105 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 105 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
106 write_unlock_bh(&h->lock); 106 write_unlock_bh(&h->lock);
107
108 return 0;
107} 109}
108EXPORT_SYMBOL_GPL(raw_hash_sk); 110EXPORT_SYMBOL_GPL(raw_hash_sk);
109 111
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 02c62299d717..60398a9370e7 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1438,9 +1438,9 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
1438#endif 1438#endif
1439} 1439}
1440 1440
1441static struct rtable *rt_dst_alloc(struct net_device *dev, 1441struct rtable *rt_dst_alloc(struct net_device *dev,
1442 unsigned int flags, u16 type, 1442 unsigned int flags, u16 type,
1443 bool nopolicy, bool noxfrm, bool will_cache) 1443 bool nopolicy, bool noxfrm, bool will_cache)
1444{ 1444{
1445 struct rtable *rt; 1445 struct rtable *rt;
1446 1446
@@ -1468,6 +1468,7 @@ static struct rtable *rt_dst_alloc(struct net_device *dev,
1468 1468
1469 return rt; 1469 return rt;
1470} 1470}
1471EXPORT_SYMBOL(rt_dst_alloc);
1471 1472
1472/* called in rcu_read_lock() section */ 1473/* called in rcu_read_lock() section */
1473static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, 1474static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
@@ -2045,6 +2046,18 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2045 */ 2046 */
2046 if (fi && res->prefixlen < 4) 2047 if (fi && res->prefixlen < 4)
2047 fi = NULL; 2048 fi = NULL;
2049 } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
2050 (orig_oif != dev_out->ifindex)) {
2051 /* For local routes that require a particular output interface
2052 * we do not want to cache the result. Caching the result
2053 * causes incorrect behaviour when there are multiple source
2054 * addresses on the interface, the end result being that if the
2055 * intended recipient is waiting on that interface for the
2056 * packet he won't receive it because it will be delivered on
2057 * the loopback interface and the IP_PKTINFO ipi_ifindex will
2058 * be set to the loopback interface as well.
2059 */
2060 fi = NULL;
2048 } 2061 }
2049 2062
2050 fnhe = NULL; 2063 fnhe = NULL;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 643a86c49020..4c04f09338e3 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -19,8 +19,6 @@
19#include <net/tcp.h> 19#include <net/tcp.h>
20#include <net/route.h> 20#include <net/route.h>
21 21
22extern int sysctl_tcp_syncookies;
23
24static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly; 22static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;
25 23
26#define COOKIEBITS 24 /* Upper bits store count */ 24#define COOKIEBITS 24 /* Upper bits store count */
@@ -50,8 +48,7 @@ static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;
50#define TSBITS 6 48#define TSBITS 6
51#define TSMASK (((__u32)1 << TSBITS) - 1) 49#define TSMASK (((__u32)1 << TSBITS) - 1)
52 50
53static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], 51static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv4_cookie_scratch);
54 ipv4_cookie_scratch);
55 52
56static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, 53static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
57 u32 count, int c) 54 u32 count, int c)
@@ -307,7 +304,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
307 __u8 rcv_wscale; 304 __u8 rcv_wscale;
308 struct flowi4 fl4; 305 struct flowi4 fl4;
309 306
310 if (!sysctl_tcp_syncookies || !th->ack || th->rst) 307 if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
311 goto out; 308 goto out;
312 309
313 if (tcp_synq_no_recent_overflow(sk)) 310 if (tcp_synq_no_recent_overflow(sk))
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 4d367b4139a3..1e1fe6086dd9 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -283,31 +283,6 @@ static struct ctl_table ipv4_table[] = {
283 .proc_handler = proc_dointvec 283 .proc_handler = proc_dointvec
284 }, 284 },
285 { 285 {
286 .procname = "ip_default_ttl",
287 .data = &sysctl_ip_default_ttl,
288 .maxlen = sizeof(int),
289 .mode = 0644,
290 .proc_handler = proc_dointvec_minmax,
291 .extra1 = &ip_ttl_min,
292 .extra2 = &ip_ttl_max,
293 },
294 {
295 .procname = "tcp_syn_retries",
296 .data = &sysctl_tcp_syn_retries,
297 .maxlen = sizeof(int),
298 .mode = 0644,
299 .proc_handler = proc_dointvec_minmax,
300 .extra1 = &tcp_syn_retries_min,
301 .extra2 = &tcp_syn_retries_max
302 },
303 {
304 .procname = "tcp_synack_retries",
305 .data = &sysctl_tcp_synack_retries,
306 .maxlen = sizeof(int),
307 .mode = 0644,
308 .proc_handler = proc_dointvec
309 },
310 {
311 .procname = "tcp_max_orphans", 286 .procname = "tcp_max_orphans",
312 .data = &sysctl_tcp_max_orphans, 287 .data = &sysctl_tcp_max_orphans,
313 .maxlen = sizeof(int), 288 .maxlen = sizeof(int),
@@ -322,51 +297,6 @@ static struct ctl_table ipv4_table[] = {
322 .proc_handler = proc_dointvec 297 .proc_handler = proc_dointvec
323 }, 298 },
324 { 299 {
325 .procname = "ip_early_demux",
326 .data = &sysctl_ip_early_demux,
327 .maxlen = sizeof(int),
328 .mode = 0644,
329 .proc_handler = proc_dointvec
330 },
331 {
332 .procname = "ip_dynaddr",
333 .data = &sysctl_ip_dynaddr,
334 .maxlen = sizeof(int),
335 .mode = 0644,
336 .proc_handler = proc_dointvec
337 },
338 {
339 .procname = "tcp_retries1",
340 .data = &sysctl_tcp_retries1,
341 .maxlen = sizeof(int),
342 .mode = 0644,
343 .proc_handler = proc_dointvec_minmax,
344 .extra2 = &tcp_retr1_max
345 },
346 {
347 .procname = "tcp_retries2",
348 .data = &sysctl_tcp_retries2,
349 .maxlen = sizeof(int),
350 .mode = 0644,
351 .proc_handler = proc_dointvec
352 },
353 {
354 .procname = "tcp_fin_timeout",
355 .data = &sysctl_tcp_fin_timeout,
356 .maxlen = sizeof(int),
357 .mode = 0644,
358 .proc_handler = proc_dointvec_jiffies,
359 },
360#ifdef CONFIG_SYN_COOKIES
361 {
362 .procname = "tcp_syncookies",
363 .data = &sysctl_tcp_syncookies,
364 .maxlen = sizeof(int),
365 .mode = 0644,
366 .proc_handler = proc_dointvec
367 },
368#endif
369 {
370 .procname = "tcp_fastopen", 300 .procname = "tcp_fastopen",
371 .data = &sysctl_tcp_fastopen, 301 .data = &sysctl_tcp_fastopen,
372 .maxlen = sizeof(int), 302 .maxlen = sizeof(int),
@@ -415,30 +345,6 @@ static struct ctl_table ipv4_table[] = {
415 .proc_handler = proc_dointvec 345 .proc_handler = proc_dointvec
416 }, 346 },
417 { 347 {
418 .procname = "igmp_max_memberships",
419 .data = &sysctl_igmp_max_memberships,
420 .maxlen = sizeof(int),
421 .mode = 0644,
422 .proc_handler = proc_dointvec
423 },
424 {
425 .procname = "igmp_max_msf",
426 .data = &sysctl_igmp_max_msf,
427 .maxlen = sizeof(int),
428 .mode = 0644,
429 .proc_handler = proc_dointvec
430 },
431#ifdef CONFIG_IP_MULTICAST
432 {
433 .procname = "igmp_qrv",
434 .data = &sysctl_igmp_qrv,
435 .maxlen = sizeof(int),
436 .mode = 0644,
437 .proc_handler = proc_dointvec_minmax,
438 .extra1 = &one
439 },
440#endif
441 {
442 .procname = "inet_peer_threshold", 348 .procname = "inet_peer_threshold",
443 .data = &inet_peer_threshold, 349 .data = &inet_peer_threshold,
444 .maxlen = sizeof(int), 350 .maxlen = sizeof(int),
@@ -460,13 +366,6 @@ static struct ctl_table ipv4_table[] = {
460 .proc_handler = proc_dointvec_jiffies, 366 .proc_handler = proc_dointvec_jiffies,
461 }, 367 },
462 { 368 {
463 .procname = "tcp_orphan_retries",
464 .data = &sysctl_tcp_orphan_retries,
465 .maxlen = sizeof(int),
466 .mode = 0644,
467 .proc_handler = proc_dointvec
468 },
469 {
470 .procname = "tcp_fack", 369 .procname = "tcp_fack",
471 .data = &sysctl_tcp_fack, 370 .data = &sysctl_tcp_fack,
472 .maxlen = sizeof(int), 371 .maxlen = sizeof(int),
@@ -481,13 +380,6 @@ static struct ctl_table ipv4_table[] = {
481 .proc_handler = proc_dointvec, 380 .proc_handler = proc_dointvec,
482 }, 381 },
483 { 382 {
484 .procname = "tcp_reordering",
485 .data = &sysctl_tcp_reordering,
486 .maxlen = sizeof(int),
487 .mode = 0644,
488 .proc_handler = proc_dointvec
489 },
490 {
491 .procname = "tcp_max_reordering", 383 .procname = "tcp_max_reordering",
492 .data = &sysctl_tcp_max_reordering, 384 .data = &sysctl_tcp_max_reordering,
493 .maxlen = sizeof(int), 385 .maxlen = sizeof(int),
@@ -517,13 +409,6 @@ static struct ctl_table ipv4_table[] = {
517 .extra1 = &one, 409 .extra1 = &one,
518 }, 410 },
519 { 411 {
520 .procname = "tcp_notsent_lowat",
521 .data = &sysctl_tcp_notsent_lowat,
522 .maxlen = sizeof(sysctl_tcp_notsent_lowat),
523 .mode = 0644,
524 .proc_handler = proc_dointvec,
525 },
526 {
527 .procname = "tcp_rmem", 412 .procname = "tcp_rmem",
528 .data = &sysctl_tcp_rmem, 413 .data = &sysctl_tcp_rmem,
529 .maxlen = sizeof(sysctl_tcp_rmem), 414 .maxlen = sizeof(sysctl_tcp_rmem),
@@ -845,6 +730,29 @@ static struct ctl_table ipv4_net_table[] = {
845 .proc_handler = proc_dointvec 730 .proc_handler = proc_dointvec
846 }, 731 },
847 { 732 {
733 .procname = "ip_dynaddr",
734 .data = &init_net.ipv4.sysctl_ip_dynaddr,
735 .maxlen = sizeof(int),
736 .mode = 0644,
737 .proc_handler = proc_dointvec
738 },
739 {
740 .procname = "ip_early_demux",
741 .data = &init_net.ipv4.sysctl_ip_early_demux,
742 .maxlen = sizeof(int),
743 .mode = 0644,
744 .proc_handler = proc_dointvec
745 },
746 {
747 .procname = "ip_default_ttl",
748 .data = &init_net.ipv4.sysctl_ip_default_ttl,
749 .maxlen = sizeof(int),
750 .mode = 0644,
751 .proc_handler = proc_dointvec_minmax,
752 .extra1 = &ip_ttl_min,
753 .extra2 = &ip_ttl_max,
754 },
755 {
848 .procname = "ip_local_port_range", 756 .procname = "ip_local_port_range",
849 .maxlen = sizeof(init_net.ipv4.ip_local_ports.range), 757 .maxlen = sizeof(init_net.ipv4.ip_local_ports.range),
850 .data = &init_net.ipv4.ip_local_ports.range, 758 .data = &init_net.ipv4.ip_local_ports.range,
@@ -934,12 +842,36 @@ static struct ctl_table ipv4_net_table[] = {
934 }, 842 },
935 { 843 {
936 .procname = "igmp_link_local_mcast_reports", 844 .procname = "igmp_link_local_mcast_reports",
937 .data = &sysctl_igmp_llm_reports, 845 .data = &init_net.ipv4.sysctl_igmp_llm_reports,
846 .maxlen = sizeof(int),
847 .mode = 0644,
848 .proc_handler = proc_dointvec
849 },
850 {
851 .procname = "igmp_max_memberships",
852 .data = &init_net.ipv4.sysctl_igmp_max_memberships,
938 .maxlen = sizeof(int), 853 .maxlen = sizeof(int),
939 .mode = 0644, 854 .mode = 0644,
940 .proc_handler = proc_dointvec 855 .proc_handler = proc_dointvec
941 }, 856 },
942 { 857 {
858 .procname = "igmp_max_msf",
859 .data = &init_net.ipv4.sysctl_igmp_max_msf,
860 .maxlen = sizeof(int),
861 .mode = 0644,
862 .proc_handler = proc_dointvec
863 },
864#ifdef CONFIG_IP_MULTICAST
865 {
866 .procname = "igmp_qrv",
867 .data = &init_net.ipv4.sysctl_igmp_qrv,
868 .maxlen = sizeof(int),
869 .mode = 0644,
870 .proc_handler = proc_dointvec_minmax,
871 .extra1 = &one
872 },
873#endif
874 {
943 .procname = "tcp_keepalive_time", 875 .procname = "tcp_keepalive_time",
944 .data = &init_net.ipv4.sysctl_tcp_keepalive_time, 876 .data = &init_net.ipv4.sysctl_tcp_keepalive_time,
945 .maxlen = sizeof(int), 877 .maxlen = sizeof(int),
@@ -960,6 +892,74 @@ static struct ctl_table ipv4_net_table[] = {
960 .mode = 0644, 892 .mode = 0644,
961 .proc_handler = proc_dointvec_jiffies, 893 .proc_handler = proc_dointvec_jiffies,
962 }, 894 },
895 {
896 .procname = "tcp_syn_retries",
897 .data = &init_net.ipv4.sysctl_tcp_syn_retries,
898 .maxlen = sizeof(int),
899 .mode = 0644,
900 .proc_handler = proc_dointvec_minmax,
901 .extra1 = &tcp_syn_retries_min,
902 .extra2 = &tcp_syn_retries_max
903 },
904 {
905 .procname = "tcp_synack_retries",
906 .data = &init_net.ipv4.sysctl_tcp_synack_retries,
907 .maxlen = sizeof(int),
908 .mode = 0644,
909 .proc_handler = proc_dointvec
910 },
911#ifdef CONFIG_SYN_COOKIES
912 {
913 .procname = "tcp_syncookies",
914 .data = &init_net.ipv4.sysctl_tcp_syncookies,
915 .maxlen = sizeof(int),
916 .mode = 0644,
917 .proc_handler = proc_dointvec
918 },
919#endif
920 {
921 .procname = "tcp_reordering",
922 .data = &init_net.ipv4.sysctl_tcp_reordering,
923 .maxlen = sizeof(int),
924 .mode = 0644,
925 .proc_handler = proc_dointvec
926 },
927 {
928 .procname = "tcp_retries1",
929 .data = &init_net.ipv4.sysctl_tcp_retries1,
930 .maxlen = sizeof(int),
931 .mode = 0644,
932 .proc_handler = proc_dointvec_minmax,
933 .extra2 = &tcp_retr1_max
934 },
935 {
936 .procname = "tcp_retries2",
937 .data = &init_net.ipv4.sysctl_tcp_retries2,
938 .maxlen = sizeof(int),
939 .mode = 0644,
940 .proc_handler = proc_dointvec
941 },
942 {
943 .procname = "tcp_orphan_retries",
944 .data = &init_net.ipv4.sysctl_tcp_orphan_retries,
945 .maxlen = sizeof(int),
946 .mode = 0644,
947 .proc_handler = proc_dointvec
948 },
949 {
950 .procname = "tcp_fin_timeout",
951 .data = &init_net.ipv4.sysctl_tcp_fin_timeout,
952 .maxlen = sizeof(int),
953 .mode = 0644,
954 .proc_handler = proc_dointvec_jiffies,
955 },
956 {
957 .procname = "tcp_notsent_lowat",
958 .data = &init_net.ipv4.sysctl_tcp_notsent_lowat,
959 .maxlen = sizeof(unsigned int),
960 .mode = 0644,
961 .proc_handler = proc_dointvec,
962 },
963 { } 963 { }
964}; 964};
965 965
@@ -988,6 +988,10 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
988 if (!net->ipv4.sysctl_local_reserved_ports) 988 if (!net->ipv4.sysctl_local_reserved_ports)
989 goto err_ports; 989 goto err_ports;
990 990
991 net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
992 net->ipv4.sysctl_ip_dynaddr = 0;
993 net->ipv4.sysctl_ip_early_demux = 1;
994
991 return 0; 995 return 0;
992 996
993err_ports: 997err_ports:
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 483ffdf5aa4d..08b8b960a8ed 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -247,6 +247,7 @@
247 247
248#define pr_fmt(fmt) "TCP: " fmt 248#define pr_fmt(fmt) "TCP: " fmt
249 249
250#include <crypto/hash.h>
250#include <linux/kernel.h> 251#include <linux/kernel.h>
251#include <linux/module.h> 252#include <linux/module.h>
252#include <linux/types.h> 253#include <linux/types.h>
@@ -266,7 +267,6 @@
266#include <linux/swap.h> 267#include <linux/swap.h>
267#include <linux/cache.h> 268#include <linux/cache.h>
268#include <linux/err.h> 269#include <linux/err.h>
269#include <linux/crypto.h>
270#include <linux/time.h> 270#include <linux/time.h>
271#include <linux/slab.h> 271#include <linux/slab.h>
272 272
@@ -282,8 +282,6 @@
282#include <asm/unaligned.h> 282#include <asm/unaligned.h>
283#include <net/busy_poll.h> 283#include <net/busy_poll.h>
284 284
285int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
286
287int sysctl_tcp_min_tso_segs __read_mostly = 2; 285int sysctl_tcp_min_tso_segs __read_mostly = 2;
288 286
289int sysctl_tcp_autocorking __read_mostly = 1; 287int sysctl_tcp_autocorking __read_mostly = 1;
@@ -406,7 +404,7 @@ void tcp_init_sock(struct sock *sk)
406 tp->mss_cache = TCP_MSS_DEFAULT; 404 tp->mss_cache = TCP_MSS_DEFAULT;
407 u64_stats_init(&tp->syncp); 405 u64_stats_init(&tp->syncp);
408 406
409 tp->reordering = sysctl_tcp_reordering; 407 tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
410 tcp_enable_early_retrans(tp); 408 tcp_enable_early_retrans(tp);
411 tcp_assign_congestion_control(sk); 409 tcp_assign_congestion_control(sk);
412 410
@@ -558,20 +556,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
558 return -EINVAL; 556 return -EINVAL;
559 557
560 slow = lock_sock_fast(sk); 558 slow = lock_sock_fast(sk);
561 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) 559 answ = tcp_inq(sk);
562 answ = 0;
563 else if (sock_flag(sk, SOCK_URGINLINE) ||
564 !tp->urg_data ||
565 before(tp->urg_seq, tp->copied_seq) ||
566 !before(tp->urg_seq, tp->rcv_nxt)) {
567
568 answ = tp->rcv_nxt - tp->copied_seq;
569
570 /* Subtract 1, if FIN was received */
571 if (answ && sock_flag(sk, SOCK_DONE))
572 answ--;
573 } else
574 answ = tp->urg_seq - tp->copied_seq;
575 unlock_sock_fast(sk, slow); 560 unlock_sock_fast(sk, slow);
576 break; 561 break;
577 case SIOCATMARK: 562 case SIOCATMARK:
@@ -1466,8 +1451,10 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
1466 1451
1467 while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { 1452 while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
1468 offset = seq - TCP_SKB_CB(skb)->seq; 1453 offset = seq - TCP_SKB_CB(skb)->seq;
1469 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) 1454 if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
1455 pr_err_once("%s: found a SYN, please report !\n", __func__);
1470 offset--; 1456 offset--;
1457 }
1471 if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) { 1458 if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) {
1472 *off = offset; 1459 *off = offset;
1473 return skb; 1460 return skb;
@@ -1657,8 +1644,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
1657 break; 1644 break;
1658 1645
1659 offset = *seq - TCP_SKB_CB(skb)->seq; 1646 offset = *seq - TCP_SKB_CB(skb)->seq;
1660 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) 1647 if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
1648 pr_err_once("%s: found a SYN, please report !\n", __func__);
1661 offset--; 1649 offset--;
1650 }
1662 if (offset < skb->len) 1651 if (offset < skb->len)
1663 goto found_ok_skb; 1652 goto found_ok_skb;
1664 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) 1653 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -2326,6 +2315,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2326{ 2315{
2327 struct tcp_sock *tp = tcp_sk(sk); 2316 struct tcp_sock *tp = tcp_sk(sk);
2328 struct inet_connection_sock *icsk = inet_csk(sk); 2317 struct inet_connection_sock *icsk = inet_csk(sk);
2318 struct net *net = sock_net(sk);
2329 int val; 2319 int val;
2330 int err = 0; 2320 int err = 0;
2331 2321
@@ -2522,7 +2512,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2522 case TCP_LINGER2: 2512 case TCP_LINGER2:
2523 if (val < 0) 2513 if (val < 0)
2524 tp->linger2 = -1; 2514 tp->linger2 = -1;
2525 else if (val > sysctl_tcp_fin_timeout / HZ) 2515 else if (val > net->ipv4.sysctl_tcp_fin_timeout / HZ)
2526 tp->linger2 = 0; 2516 tp->linger2 = 0;
2527 else 2517 else
2528 tp->linger2 = val * HZ; 2518 tp->linger2 = val * HZ;
@@ -2639,6 +2629,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2639 const struct inet_connection_sock *icsk = inet_csk(sk); 2629 const struct inet_connection_sock *icsk = inet_csk(sk);
2640 u32 now = tcp_time_stamp; 2630 u32 now = tcp_time_stamp;
2641 unsigned int start; 2631 unsigned int start;
2632 int notsent_bytes;
2642 u64 rate64; 2633 u64 rate64;
2643 u32 rate; 2634 u32 rate;
2644 2635
@@ -2719,6 +2710,13 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2719 } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); 2710 } while (u64_stats_fetch_retry_irq(&tp->syncp, start));
2720 info->tcpi_segs_out = tp->segs_out; 2711 info->tcpi_segs_out = tp->segs_out;
2721 info->tcpi_segs_in = tp->segs_in; 2712 info->tcpi_segs_in = tp->segs_in;
2713
2714 notsent_bytes = READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt);
2715 info->tcpi_notsent_bytes = max(0, notsent_bytes);
2716
2717 info->tcpi_min_rtt = tcp_min_rtt(tp);
2718 info->tcpi_data_segs_in = tp->data_segs_in;
2719 info->tcpi_data_segs_out = tp->data_segs_out;
2722} 2720}
2723EXPORT_SYMBOL_GPL(tcp_get_info); 2721EXPORT_SYMBOL_GPL(tcp_get_info);
2724 2722
@@ -2727,6 +2725,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2727{ 2725{
2728 struct inet_connection_sock *icsk = inet_csk(sk); 2726 struct inet_connection_sock *icsk = inet_csk(sk);
2729 struct tcp_sock *tp = tcp_sk(sk); 2727 struct tcp_sock *tp = tcp_sk(sk);
2728 struct net *net = sock_net(sk);
2730 int val, len; 2729 int val, len;
2731 2730
2732 if (get_user(len, optlen)) 2731 if (get_user(len, optlen))
@@ -2761,12 +2760,12 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2761 val = keepalive_probes(tp); 2760 val = keepalive_probes(tp);
2762 break; 2761 break;
2763 case TCP_SYNCNT: 2762 case TCP_SYNCNT:
2764 val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 2763 val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
2765 break; 2764 break;
2766 case TCP_LINGER2: 2765 case TCP_LINGER2:
2767 val = tp->linger2; 2766 val = tp->linger2;
2768 if (val >= 0) 2767 if (val >= 0)
2769 val = (val ? : sysctl_tcp_fin_timeout) / HZ; 2768 val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ;
2770 break; 2769 break;
2771 case TCP_DEFER_ACCEPT: 2770 case TCP_DEFER_ACCEPT:
2772 val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, 2771 val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
@@ -2943,17 +2942,26 @@ static bool tcp_md5sig_pool_populated = false;
2943 2942
2944static void __tcp_alloc_md5sig_pool(void) 2943static void __tcp_alloc_md5sig_pool(void)
2945{ 2944{
2945 struct crypto_ahash *hash;
2946 int cpu; 2946 int cpu;
2947 2947
2948 hash = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
2949 if (IS_ERR(hash))
2950 return;
2951
2948 for_each_possible_cpu(cpu) { 2952 for_each_possible_cpu(cpu) {
2949 if (!per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm) { 2953 struct ahash_request *req;
2950 struct crypto_hash *hash;
2951 2954
2952 hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); 2955 if (per_cpu(tcp_md5sig_pool, cpu).md5_req)
2953 if (IS_ERR(hash)) 2956 continue;
2954 return; 2957
2955 per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash; 2958 req = ahash_request_alloc(hash, GFP_KERNEL);
2956 } 2959 if (!req)
2960 return;
2961
2962 ahash_request_set_callback(req, 0, NULL, NULL);
2963
2964 per_cpu(tcp_md5sig_pool, cpu).md5_req = req;
2957 } 2965 }
2958 /* before setting tcp_md5sig_pool_populated, we must commit all writes 2966 /* before setting tcp_md5sig_pool_populated, we must commit all writes
2959 * to memory. See smp_rmb() in tcp_get_md5sig_pool() 2967 * to memory. See smp_rmb() in tcp_get_md5sig_pool()
@@ -3003,7 +3011,6 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
3003{ 3011{
3004 struct scatterlist sg; 3012 struct scatterlist sg;
3005 struct tcphdr hdr; 3013 struct tcphdr hdr;
3006 int err;
3007 3014
3008 /* We are not allowed to change tcphdr, make a local copy */ 3015 /* We are not allowed to change tcphdr, make a local copy */
3009 memcpy(&hdr, th, sizeof(hdr)); 3016 memcpy(&hdr, th, sizeof(hdr));
@@ -3011,8 +3018,8 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
3011 3018
3012 /* options aren't included in the hash */ 3019 /* options aren't included in the hash */
3013 sg_init_one(&sg, &hdr, sizeof(hdr)); 3020 sg_init_one(&sg, &hdr, sizeof(hdr));
3014 err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(hdr)); 3021 ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(hdr));
3015 return err; 3022 return crypto_ahash_update(hp->md5_req);
3016} 3023}
3017EXPORT_SYMBOL(tcp_md5_hash_header); 3024EXPORT_SYMBOL(tcp_md5_hash_header);
3018 3025
@@ -3021,7 +3028,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
3021{ 3028{
3022 struct scatterlist sg; 3029 struct scatterlist sg;
3023 const struct tcphdr *tp = tcp_hdr(skb); 3030 const struct tcphdr *tp = tcp_hdr(skb);
3024 struct hash_desc *desc = &hp->md5_desc; 3031 struct ahash_request *req = hp->md5_req;
3025 unsigned int i; 3032 unsigned int i;
3026 const unsigned int head_data_len = skb_headlen(skb) > header_len ? 3033 const unsigned int head_data_len = skb_headlen(skb) > header_len ?
3027 skb_headlen(skb) - header_len : 0; 3034 skb_headlen(skb) - header_len : 0;
@@ -3031,7 +3038,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
3031 sg_init_table(&sg, 1); 3038 sg_init_table(&sg, 1);
3032 3039
3033 sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len); 3040 sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
3034 if (crypto_hash_update(desc, &sg, head_data_len)) 3041 ahash_request_set_crypt(req, &sg, NULL, head_data_len);
3042 if (crypto_ahash_update(req))
3035 return 1; 3043 return 1;
3036 3044
3037 for (i = 0; i < shi->nr_frags; ++i) { 3045 for (i = 0; i < shi->nr_frags; ++i) {
@@ -3041,7 +3049,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
3041 3049
3042 sg_set_page(&sg, page, skb_frag_size(f), 3050 sg_set_page(&sg, page, skb_frag_size(f),
3043 offset_in_page(offset)); 3051 offset_in_page(offset));
3044 if (crypto_hash_update(desc, &sg, skb_frag_size(f))) 3052 ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f));
3053 if (crypto_ahash_update(req))
3045 return 1; 3054 return 1;
3046 } 3055 }
3047 3056
@@ -3058,7 +3067,8 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *ke
3058 struct scatterlist sg; 3067 struct scatterlist sg;
3059 3068
3060 sg_init_one(&sg, key->key, key->keylen); 3069 sg_init_one(&sg, key->key, key->keylen);
3061 return crypto_hash_update(&hp->md5_desc, &sg, key->keylen); 3070 ahash_request_set_crypt(hp->md5_req, &sg, NULL, key->keylen);
3071 return crypto_ahash_update(hp->md5_req);
3062} 3072}
3063EXPORT_SYMBOL(tcp_md5_hash_key); 3073EXPORT_SYMBOL(tcp_md5_hash_key);
3064 3074
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 55be6ac70cff..cffd8f9ed1a9 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -1,3 +1,4 @@
1#include <linux/crypto.h>
1#include <linux/err.h> 2#include <linux/err.h>
2#include <linux/init.h> 3#include <linux/init.h>
3#include <linux/kernel.h> 4#include <linux/kernel.h>
@@ -124,6 +125,49 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
124 return false; 125 return false;
125} 126}
126 127
128
129/* If an incoming SYN or SYNACK frame contains a payload and/or FIN,
130 * queue this additional data / FIN.
131 */
132void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
133{
134 struct tcp_sock *tp = tcp_sk(sk);
135
136 if (TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt)
137 return;
138
139 skb = skb_clone(skb, GFP_ATOMIC);
140 if (!skb)
141 return;
142
143 skb_dst_drop(skb);
144 /* segs_in has been initialized to 1 in tcp_create_openreq_child().
145 * Hence, reset segs_in to 0 before calling tcp_segs_in()
146 * to avoid double counting. Also, tcp_segs_in() expects
147 * skb->len to include the tcp_hdrlen. Hence, it should
148 * be called before __skb_pull().
149 */
150 tp->segs_in = 0;
151 tcp_segs_in(tp, skb);
152 __skb_pull(skb, tcp_hdrlen(skb));
153 skb_set_owner_r(skb, sk);
154
155 TCP_SKB_CB(skb)->seq++;
156 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN;
157
158 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
159 __skb_queue_tail(&sk->sk_receive_queue, skb);
160 tp->syn_data_acked = 1;
161
162 /* u64_stats_update_begin(&tp->syncp) not needed here,
163 * as we certainly are not changing upper 32bit value (0)
164 */
165 tp->bytes_received = skb->len;
166
167 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
168 tcp_fin(sk);
169}
170
127static struct sock *tcp_fastopen_create_child(struct sock *sk, 171static struct sock *tcp_fastopen_create_child(struct sock *sk,
128 struct sk_buff *skb, 172 struct sk_buff *skb,
129 struct dst_entry *dst, 173 struct dst_entry *dst,
@@ -132,7 +176,6 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
132 struct tcp_sock *tp; 176 struct tcp_sock *tp;
133 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 177 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
134 struct sock *child; 178 struct sock *child;
135 u32 end_seq;
136 bool own_req; 179 bool own_req;
137 180
138 req->num_retrans = 0; 181 req->num_retrans = 0;
@@ -178,35 +221,11 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
178 tcp_init_metrics(child); 221 tcp_init_metrics(child);
179 tcp_init_buffer_space(child); 222 tcp_init_buffer_space(child);
180 223
181 /* Queue the data carried in the SYN packet. 224 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
182 * We used to play tricky games with skb_get(). 225
183 * With lockless listener, it is a dead end. 226 tcp_fastopen_add_skb(child, skb);
184 * Do not think about it. 227
185 * 228 tcp_rsk(req)->rcv_nxt = tp->rcv_nxt;
186 * XXX (TFO) - we honor a zero-payload TFO request for now,
187 * (any reason not to?) but no need to queue the skb since
188 * there is no data. How about SYN+FIN?
189 */
190 end_seq = TCP_SKB_CB(skb)->end_seq;
191 if (end_seq != TCP_SKB_CB(skb)->seq + 1) {
192 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
193
194 if (likely(skb2)) {
195 skb_dst_drop(skb2);
196 __skb_pull(skb2, tcp_hdrlen(skb));
197 skb_set_owner_r(skb2, child);
198 __skb_queue_tail(&child->sk_receive_queue, skb2);
199 tp->syn_data_acked = 1;
200
201 /* u64_stats_update_begin(&tp->syncp) not needed here,
202 * as we certainly are not changing upper 32bit value (0)
203 */
204 tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1;
205 } else {
206 end_seq = TCP_SKB_CB(skb)->seq + 1;
207 }
208 }
209 tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq;
210 /* tcp_conn_request() is sending the SYNACK, 229 /* tcp_conn_request() is sending the SYNACK,
211 * and queues the child into listener accept queue. 230 * and queues the child into listener accept queue.
212 */ 231 */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3b2c8e90a475..c124c3c12f7c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -80,9 +80,7 @@ int sysctl_tcp_timestamps __read_mostly = 1;
80int sysctl_tcp_window_scaling __read_mostly = 1; 80int sysctl_tcp_window_scaling __read_mostly = 1;
81int sysctl_tcp_sack __read_mostly = 1; 81int sysctl_tcp_sack __read_mostly = 1;
82int sysctl_tcp_fack __read_mostly = 1; 82int sysctl_tcp_fack __read_mostly = 1;
83int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
84int sysctl_tcp_max_reordering __read_mostly = 300; 83int sysctl_tcp_max_reordering __read_mostly = 300;
85EXPORT_SYMBOL(sysctl_tcp_reordering);
86int sysctl_tcp_dsack __read_mostly = 1; 84int sysctl_tcp_dsack __read_mostly = 1;
87int sysctl_tcp_app_win __read_mostly = 31; 85int sysctl_tcp_app_win __read_mostly = 31;
88int sysctl_tcp_adv_win_scale __read_mostly = 1; 86int sysctl_tcp_adv_win_scale __read_mostly = 1;
@@ -126,6 +124,10 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
126#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) 124#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
127#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) 125#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
128 126
127#define REXMIT_NONE 0 /* no loss recovery to do */
128#define REXMIT_LOST 1 /* retransmit packets marked lost */
129#define REXMIT_NEW 2 /* FRTO-style transmit of unsent/new packets */
130
129/* Adapt the MSS value used to make delayed ack decision to the 131/* Adapt the MSS value used to make delayed ack decision to the
130 * real world. 132 * real world.
131 */ 133 */
@@ -1210,6 +1212,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
1210 sacked |= TCPCB_SACKED_ACKED; 1212 sacked |= TCPCB_SACKED_ACKED;
1211 state->flag |= FLAG_DATA_SACKED; 1213 state->flag |= FLAG_DATA_SACKED;
1212 tp->sacked_out += pcount; 1214 tp->sacked_out += pcount;
1215 tp->delivered += pcount; /* Out-of-order packets delivered */
1213 1216
1214 fack_count += pcount; 1217 fack_count += pcount;
1215 1218
@@ -1306,6 +1309,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1306 if (skb == tcp_highest_sack(sk)) 1309 if (skb == tcp_highest_sack(sk))
1307 tcp_advance_highest_sack(sk, skb); 1310 tcp_advance_highest_sack(sk, skb);
1308 1311
1312 tcp_skb_collapse_tstamp(prev, skb);
1309 tcp_unlink_write_queue(skb, sk); 1313 tcp_unlink_write_queue(skb, sk);
1310 sk_wmem_free_skb(sk, skb); 1314 sk_wmem_free_skb(sk, skb);
1311 1315
@@ -1821,8 +1825,12 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1821static void tcp_add_reno_sack(struct sock *sk) 1825static void tcp_add_reno_sack(struct sock *sk)
1822{ 1826{
1823 struct tcp_sock *tp = tcp_sk(sk); 1827 struct tcp_sock *tp = tcp_sk(sk);
1828 u32 prior_sacked = tp->sacked_out;
1829
1824 tp->sacked_out++; 1830 tp->sacked_out++;
1825 tcp_check_reno_reordering(sk, 0); 1831 tcp_check_reno_reordering(sk, 0);
1832 if (tp->sacked_out > prior_sacked)
1833 tp->delivered++; /* Some out-of-order packet is delivered */
1826 tcp_verify_left_out(tp); 1834 tcp_verify_left_out(tp);
1827} 1835}
1828 1836
@@ -1834,6 +1842,7 @@ static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1834 1842
1835 if (acked > 0) { 1843 if (acked > 0) {
1836 /* One ACK acked hole. The rest eat duplicate ACKs. */ 1844 /* One ACK acked hole. The rest eat duplicate ACKs. */
1845 tp->delivered += max_t(int, acked - tp->sacked_out, 1);
1837 if (acked - 1 >= tp->sacked_out) 1846 if (acked - 1 >= tp->sacked_out)
1838 tp->sacked_out = 0; 1847 tp->sacked_out = 0;
1839 else 1848 else
@@ -1873,6 +1882,7 @@ void tcp_enter_loss(struct sock *sk)
1873{ 1882{
1874 const struct inet_connection_sock *icsk = inet_csk(sk); 1883 const struct inet_connection_sock *icsk = inet_csk(sk);
1875 struct tcp_sock *tp = tcp_sk(sk); 1884 struct tcp_sock *tp = tcp_sk(sk);
1885 struct net *net = sock_net(sk);
1876 struct sk_buff *skb; 1886 struct sk_buff *skb;
1877 bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; 1887 bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
1878 bool is_reneg; /* is receiver reneging on SACKs? */ 1888 bool is_reneg; /* is receiver reneging on SACKs? */
@@ -1923,9 +1933,9 @@ void tcp_enter_loss(struct sock *sk)
1923 * suggests that the degree of reordering is over-estimated. 1933 * suggests that the degree of reordering is over-estimated.
1924 */ 1934 */
1925 if (icsk->icsk_ca_state <= TCP_CA_Disorder && 1935 if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
1926 tp->sacked_out >= sysctl_tcp_reordering) 1936 tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
1927 tp->reordering = min_t(unsigned int, tp->reordering, 1937 tp->reordering = min_t(unsigned int, tp->reordering,
1928 sysctl_tcp_reordering); 1938 net->ipv4.sysctl_tcp_reordering);
1929 tcp_set_ca_state(sk, TCP_CA_Loss); 1939 tcp_set_ca_state(sk, TCP_CA_Loss);
1930 tp->high_seq = tp->snd_nxt; 1940 tp->high_seq = tp->snd_nxt;
1931 tcp_ecn_queue_cwr(tp); 1941 tcp_ecn_queue_cwr(tp);
@@ -2109,6 +2119,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
2109{ 2119{
2110 struct tcp_sock *tp = tcp_sk(sk); 2120 struct tcp_sock *tp = tcp_sk(sk);
2111 __u32 packets_out; 2121 __u32 packets_out;
2122 int tcp_reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
2112 2123
2113 /* Trick#1: The loss is proven. */ 2124 /* Trick#1: The loss is proven. */
2114 if (tp->lost_out) 2125 if (tp->lost_out)
@@ -2123,7 +2134,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
2123 */ 2134 */
2124 packets_out = tp->packets_out; 2135 packets_out = tp->packets_out;
2125 if (packets_out <= tp->reordering && 2136 if (packets_out <= tp->reordering &&
2126 tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) && 2137 tp->sacked_out >= max_t(__u32, packets_out/2, tcp_reordering) &&
2127 !tcp_may_send_now(sk)) { 2138 !tcp_may_send_now(sk)) {
2128 /* We have nothing to send. This connection is limited 2139 /* We have nothing to send. This connection is limited
2129 * either by receiver window or by application. 2140 * either by receiver window or by application.
@@ -2467,14 +2478,12 @@ static void tcp_init_cwnd_reduction(struct sock *sk)
2467 tcp_ecn_queue_cwr(tp); 2478 tcp_ecn_queue_cwr(tp);
2468} 2479}
2469 2480
2470static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked, 2481static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
2471 int fast_rexmit, int flag) 2482 int flag)
2472{ 2483{
2473 struct tcp_sock *tp = tcp_sk(sk); 2484 struct tcp_sock *tp = tcp_sk(sk);
2474 int sndcnt = 0; 2485 int sndcnt = 0;
2475 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); 2486 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2476 int newly_acked_sacked = prior_unsacked -
2477 (tp->packets_out - tp->sacked_out);
2478 2487
2479 if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd)) 2488 if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
2480 return; 2489 return;
@@ -2492,7 +2501,8 @@ static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
2492 } else { 2501 } else {
2493 sndcnt = min(delta, newly_acked_sacked); 2502 sndcnt = min(delta, newly_acked_sacked);
2494 } 2503 }
2495 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); 2504 /* Force a fast retransmit upon entering fast recovery */
2505 sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
2496 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; 2506 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2497} 2507}
2498 2508
@@ -2537,7 +2547,7 @@ static void tcp_try_keep_open(struct sock *sk)
2537 } 2547 }
2538} 2548}
2539 2549
2540static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked) 2550static void tcp_try_to_open(struct sock *sk, int flag)
2541{ 2551{
2542 struct tcp_sock *tp = tcp_sk(sk); 2552 struct tcp_sock *tp = tcp_sk(sk);
2543 2553
@@ -2551,8 +2561,6 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
2551 2561
2552 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { 2562 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2553 tcp_try_keep_open(sk); 2563 tcp_try_keep_open(sk);
2554 } else {
2555 tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
2556 } 2564 }
2557} 2565}
2558 2566
@@ -2662,7 +2670,8 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2662/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are 2670/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
2663 * recovered or spurious. Otherwise retransmits more on partial ACKs. 2671 * recovered or spurious. Otherwise retransmits more on partial ACKs.
2664 */ 2672 */
2665static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) 2673static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
2674 int *rexmit)
2666{ 2675{
2667 struct tcp_sock *tp = tcp_sk(sk); 2676 struct tcp_sock *tp = tcp_sk(sk);
2668 bool recovered = !before(tp->snd_una, tp->high_seq); 2677 bool recovered = !before(tp->snd_una, tp->high_seq);
@@ -2684,10 +2693,15 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2684 tp->frto = 0; /* Step 3.a. loss was real */ 2693 tp->frto = 0; /* Step 3.a. loss was real */
2685 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) { 2694 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
2686 tp->high_seq = tp->snd_nxt; 2695 tp->high_seq = tp->snd_nxt;
2687 __tcp_push_pending_frames(sk, tcp_current_mss(sk), 2696 /* Step 2.b. Try send new data (but deferred until cwnd
2688 TCP_NAGLE_OFF); 2697 * is updated in tcp_ack()). Otherwise fall back to
2689 if (after(tp->snd_nxt, tp->high_seq)) 2698 * the conventional recovery.
2690 return; /* Step 2.b */ 2699 */
2700 if (tcp_send_head(sk) &&
2701 after(tcp_wnd_end(tp), tp->snd_nxt)) {
2702 *rexmit = REXMIT_NEW;
2703 return;
2704 }
2691 tp->frto = 0; 2705 tp->frto = 0;
2692 } 2706 }
2693 } 2707 }
@@ -2706,12 +2720,11 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2706 else if (flag & FLAG_SND_UNA_ADVANCED) 2720 else if (flag & FLAG_SND_UNA_ADVANCED)
2707 tcp_reset_reno_sack(tp); 2721 tcp_reset_reno_sack(tp);
2708 } 2722 }
2709 tcp_xmit_retransmit_queue(sk); 2723 *rexmit = REXMIT_LOST;
2710} 2724}
2711 2725
2712/* Undo during fast recovery after partial ACK. */ 2726/* Undo during fast recovery after partial ACK. */
2713static bool tcp_try_undo_partial(struct sock *sk, const int acked, 2727static bool tcp_try_undo_partial(struct sock *sk, const int acked)
2714 const int prior_unsacked, int flag)
2715{ 2728{
2716 struct tcp_sock *tp = tcp_sk(sk); 2729 struct tcp_sock *tp = tcp_sk(sk);
2717 2730
@@ -2726,10 +2739,8 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked,
2726 * can undo. Otherwise we clock out new packets but do not 2739 * can undo. Otherwise we clock out new packets but do not
2727 * mark more packets lost or retransmit more. 2740 * mark more packets lost or retransmit more.
2728 */ 2741 */
2729 if (tp->retrans_out) { 2742 if (tp->retrans_out)
2730 tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
2731 return true; 2743 return true;
2732 }
2733 2744
2734 if (!tcp_any_retrans_done(sk)) 2745 if (!tcp_any_retrans_done(sk))
2735 tp->retrans_stamp = 0; 2746 tp->retrans_stamp = 0;
@@ -2748,21 +2759,21 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked,
2748 * taking into account both packets sitting in receiver's buffer and 2759 * taking into account both packets sitting in receiver's buffer and
2749 * packets lost by network. 2760 * packets lost by network.
2750 * 2761 *
2751 * Besides that it does CWND reduction, when packet loss is detected 2762 * Besides that it updates the congestion state when packet loss or ECN
2752 * and changes state of machine. 2763 * is detected. But it does not reduce the cwnd, it is done by the
2764 * congestion control later.
2753 * 2765 *
2754 * It does _not_ decide what to send, it is made in function 2766 * It does _not_ decide what to send, it is made in function
2755 * tcp_xmit_retransmit_queue(). 2767 * tcp_xmit_retransmit_queue().
2756 */ 2768 */
2757static void tcp_fastretrans_alert(struct sock *sk, const int acked, 2769static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2758 const int prior_unsacked, 2770 bool is_dupack, int *ack_flag, int *rexmit)
2759 bool is_dupack, int flag)
2760{ 2771{
2761 struct inet_connection_sock *icsk = inet_csk(sk); 2772 struct inet_connection_sock *icsk = inet_csk(sk);
2762 struct tcp_sock *tp = tcp_sk(sk); 2773 struct tcp_sock *tp = tcp_sk(sk);
2774 int fast_rexmit = 0, flag = *ack_flag;
2763 bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && 2775 bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2764 (tcp_fackets_out(tp) > tp->reordering)); 2776 (tcp_fackets_out(tp) > tp->reordering));
2765 int fast_rexmit = 0;
2766 2777
2767 if (WARN_ON(!tp->packets_out && tp->sacked_out)) 2778 if (WARN_ON(!tp->packets_out && tp->sacked_out))
2768 tp->sacked_out = 0; 2779 tp->sacked_out = 0;
@@ -2809,8 +2820,10 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2809 2820
2810 /* Use RACK to detect loss */ 2821 /* Use RACK to detect loss */
2811 if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS && 2822 if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS &&
2812 tcp_rack_mark_lost(sk)) 2823 tcp_rack_mark_lost(sk)) {
2813 flag |= FLAG_LOST_RETRANS; 2824 flag |= FLAG_LOST_RETRANS;
2825 *ack_flag |= FLAG_LOST_RETRANS;
2826 }
2814 2827
2815 /* E. Process state. */ 2828 /* E. Process state. */
2816 switch (icsk->icsk_ca_state) { 2829 switch (icsk->icsk_ca_state) {
@@ -2819,7 +2832,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2819 if (tcp_is_reno(tp) && is_dupack) 2832 if (tcp_is_reno(tp) && is_dupack)
2820 tcp_add_reno_sack(sk); 2833 tcp_add_reno_sack(sk);
2821 } else { 2834 } else {
2822 if (tcp_try_undo_partial(sk, acked, prior_unsacked, flag)) 2835 if (tcp_try_undo_partial(sk, acked))
2823 return; 2836 return;
2824 /* Partial ACK arrived. Force fast retransmit. */ 2837 /* Partial ACK arrived. Force fast retransmit. */
2825 do_lost = tcp_is_reno(tp) || 2838 do_lost = tcp_is_reno(tp) ||
@@ -2831,7 +2844,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2831 } 2844 }
2832 break; 2845 break;
2833 case TCP_CA_Loss: 2846 case TCP_CA_Loss:
2834 tcp_process_loss(sk, flag, is_dupack); 2847 tcp_process_loss(sk, flag, is_dupack, rexmit);
2835 if (icsk->icsk_ca_state != TCP_CA_Open && 2848 if (icsk->icsk_ca_state != TCP_CA_Open &&
2836 !(flag & FLAG_LOST_RETRANS)) 2849 !(flag & FLAG_LOST_RETRANS))
2837 return; 2850 return;
@@ -2848,7 +2861,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2848 tcp_try_undo_dsack(sk); 2861 tcp_try_undo_dsack(sk);
2849 2862
2850 if (!tcp_time_to_recover(sk, flag)) { 2863 if (!tcp_time_to_recover(sk, flag)) {
2851 tcp_try_to_open(sk, flag, prior_unsacked); 2864 tcp_try_to_open(sk, flag);
2852 return; 2865 return;
2853 } 2866 }
2854 2867
@@ -2870,8 +2883,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2870 2883
2871 if (do_lost) 2884 if (do_lost)
2872 tcp_update_scoreboard(sk, fast_rexmit); 2885 tcp_update_scoreboard(sk, fast_rexmit);
2873 tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit, flag); 2886 *rexmit = REXMIT_LOST;
2874 tcp_xmit_retransmit_queue(sk);
2875} 2887}
2876 2888
2877/* Kathleen Nichols' algorithm for tracking the minimum value of 2889/* Kathleen Nichols' algorithm for tracking the minimum value of
@@ -3087,7 +3099,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
3087 3099
3088 shinfo = skb_shinfo(skb); 3100 shinfo = skb_shinfo(skb);
3089 if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) && 3101 if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
3090 between(shinfo->tskey, prior_snd_una, tcp_sk(sk)->snd_una - 1)) 3102 !before(shinfo->tskey, prior_snd_una) &&
3103 before(shinfo->tskey, tcp_sk(sk)->snd_una))
3091 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); 3104 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
3092} 3105}
3093 3106
@@ -3096,7 +3109,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
3096 * arrived at the other end. 3109 * arrived at the other end.
3097 */ 3110 */
3098static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, 3111static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3099 u32 prior_snd_una, 3112 u32 prior_snd_una, int *acked,
3100 struct tcp_sacktag_state *sack) 3113 struct tcp_sacktag_state *sack)
3101{ 3114{
3102 const struct inet_connection_sock *icsk = inet_csk(sk); 3115 const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -3154,10 +3167,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3154 flag |= FLAG_ORIG_SACK_ACKED; 3167 flag |= FLAG_ORIG_SACK_ACKED;
3155 } 3168 }
3156 3169
3157 if (sacked & TCPCB_SACKED_ACKED) 3170 if (sacked & TCPCB_SACKED_ACKED) {
3158 tp->sacked_out -= acked_pcount; 3171 tp->sacked_out -= acked_pcount;
3159 else if (tcp_is_sack(tp) && !tcp_skb_spurious_retrans(tp, skb)) 3172 } else if (tcp_is_sack(tp)) {
3160 tcp_rack_advance(tp, &skb->skb_mstamp, sacked); 3173 tp->delivered += acked_pcount;
3174 if (!tcp_skb_spurious_retrans(tp, skb))
3175 tcp_rack_advance(tp, &skb->skb_mstamp, sacked);
3176 }
3161 if (sacked & TCPCB_LOST) 3177 if (sacked & TCPCB_LOST)
3162 tp->lost_out -= acked_pcount; 3178 tp->lost_out -= acked_pcount;
3163 3179
@@ -3266,6 +3282,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3266 } 3282 }
3267 } 3283 }
3268#endif 3284#endif
3285 *acked = pkts_acked;
3269 return flag; 3286 return flag;
3270} 3287}
3271 3288
@@ -3299,21 +3316,36 @@ static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3299/* Decide wheather to run the increase function of congestion control. */ 3316/* Decide wheather to run the increase function of congestion control. */
3300static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) 3317static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3301{ 3318{
3302 if (tcp_in_cwnd_reduction(sk))
3303 return false;
3304
3305 /* If reordering is high then always grow cwnd whenever data is 3319 /* If reordering is high then always grow cwnd whenever data is
3306 * delivered regardless of its ordering. Otherwise stay conservative 3320 * delivered regardless of its ordering. Otherwise stay conservative
3307 * and only grow cwnd on in-order delivery (RFC5681). A stretched ACK w/ 3321 * and only grow cwnd on in-order delivery (RFC5681). A stretched ACK w/
3308 * new SACK or ECE mark may first advance cwnd here and later reduce 3322 * new SACK or ECE mark may first advance cwnd here and later reduce
3309 * cwnd in tcp_fastretrans_alert() based on more states. 3323 * cwnd in tcp_fastretrans_alert() based on more states.
3310 */ 3324 */
3311 if (tcp_sk(sk)->reordering > sysctl_tcp_reordering) 3325 if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
3312 return flag & FLAG_FORWARD_PROGRESS; 3326 return flag & FLAG_FORWARD_PROGRESS;
3313 3327
3314 return flag & FLAG_DATA_ACKED; 3328 return flag & FLAG_DATA_ACKED;
3315} 3329}
3316 3330
3331/* The "ultimate" congestion control function that aims to replace the rigid
3332 * cwnd increase and decrease control (tcp_cong_avoid,tcp_*cwnd_reduction).
3333 * It's called toward the end of processing an ACK with precise rate
3334 * information. All transmission or retransmission are delayed afterwards.
3335 */
3336static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
3337 int flag)
3338{
3339 if (tcp_in_cwnd_reduction(sk)) {
3340 /* Reduce cwnd if state mandates */
3341 tcp_cwnd_reduction(sk, acked_sacked, flag);
3342 } else if (tcp_may_raise_cwnd(sk, flag)) {
3343 /* Advance cwnd if state allows */
3344 tcp_cong_avoid(sk, ack, acked_sacked);
3345 }
3346 tcp_update_pacing_rate(sk);
3347}
3348
3317/* Check that window update is acceptable. 3349/* Check that window update is acceptable.
3318 * The function assumes that snd_una<=ack<=snd_next. 3350 * The function assumes that snd_una<=ack<=snd_next.
3319 */ 3351 */
@@ -3509,6 +3541,27 @@ static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
3509 icsk->icsk_ca_ops->in_ack_event(sk, flags); 3541 icsk->icsk_ca_ops->in_ack_event(sk, flags);
3510} 3542}
3511 3543
3544/* Congestion control has updated the cwnd already. So if we're in
3545 * loss recovery then now we do any new sends (for FRTO) or
3546 * retransmits (for CA_Loss or CA_recovery) that make sense.
3547 */
3548static void tcp_xmit_recovery(struct sock *sk, int rexmit)
3549{
3550 struct tcp_sock *tp = tcp_sk(sk);
3551
3552 if (rexmit == REXMIT_NONE)
3553 return;
3554
3555 if (unlikely(rexmit == 2)) {
3556 __tcp_push_pending_frames(sk, tcp_current_mss(sk),
3557 TCP_NAGLE_OFF);
3558 if (after(tp->snd_nxt, tp->high_seq))
3559 return;
3560 tp->frto = 0;
3561 }
3562 tcp_xmit_retransmit_queue(sk);
3563}
3564
3512/* This routine deals with incoming acks, but not outgoing ones. */ 3565/* This routine deals with incoming acks, but not outgoing ones. */
3513static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) 3566static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3514{ 3567{
@@ -3521,8 +3574,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3521 bool is_dupack = false; 3574 bool is_dupack = false;
3522 u32 prior_fackets; 3575 u32 prior_fackets;
3523 int prior_packets = tp->packets_out; 3576 int prior_packets = tp->packets_out;
3524 const int prior_unsacked = tp->packets_out - tp->sacked_out; 3577 u32 prior_delivered = tp->delivered;
3525 int acked = 0; /* Number of packets newly acked */ 3578 int acked = 0; /* Number of packets newly acked */
3579 int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
3526 3580
3527 sack_state.first_sackt.v64 = 0; 3581 sack_state.first_sackt.v64 = 0;
3528 3582
@@ -3611,23 +3665,16 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3611 goto no_queue; 3665 goto no_queue;
3612 3666
3613 /* See if we can take anything off of the retransmit queue. */ 3667 /* See if we can take anything off of the retransmit queue. */
3614 acked = tp->packets_out; 3668 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
3615 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
3616 &sack_state); 3669 &sack_state);
3617 acked -= tp->packets_out;
3618 3670
3619 if (tcp_ack_is_dubious(sk, flag)) { 3671 if (tcp_ack_is_dubious(sk, flag)) {
3620 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 3672 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3621 tcp_fastretrans_alert(sk, acked, prior_unsacked, 3673 tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
3622 is_dupack, flag);
3623 } 3674 }
3624 if (tp->tlp_high_seq) 3675 if (tp->tlp_high_seq)
3625 tcp_process_tlp_ack(sk, ack, flag); 3676 tcp_process_tlp_ack(sk, ack, flag);
3626 3677
3627 /* Advance cwnd if state allows */
3628 if (tcp_may_raise_cwnd(sk, flag))
3629 tcp_cong_avoid(sk, ack, acked);
3630
3631 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { 3678 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
3632 struct dst_entry *dst = __sk_dst_get(sk); 3679 struct dst_entry *dst = __sk_dst_get(sk);
3633 if (dst) 3680 if (dst)
@@ -3636,14 +3683,14 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3636 3683
3637 if (icsk->icsk_pending == ICSK_TIME_RETRANS) 3684 if (icsk->icsk_pending == ICSK_TIME_RETRANS)
3638 tcp_schedule_loss_probe(sk); 3685 tcp_schedule_loss_probe(sk);
3639 tcp_update_pacing_rate(sk); 3686 tcp_cong_control(sk, ack, tp->delivered - prior_delivered, flag);
3687 tcp_xmit_recovery(sk, rexmit);
3640 return 1; 3688 return 1;
3641 3689
3642no_queue: 3690no_queue:
3643 /* If data was DSACKed, see if we can undo a cwnd reduction. */ 3691 /* If data was DSACKed, see if we can undo a cwnd reduction. */
3644 if (flag & FLAG_DSACKING_ACK) 3692 if (flag & FLAG_DSACKING_ACK)
3645 tcp_fastretrans_alert(sk, acked, prior_unsacked, 3693 tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
3646 is_dupack, flag);
3647 /* If this ack opens up a zero window, clear backoff. It was 3694 /* If this ack opens up a zero window, clear backoff. It was
3648 * being used to time the probes, and is probably far higher than 3695 * being used to time the probes, and is probably far higher than
3649 * it needs to be for normal retransmission. 3696 * it needs to be for normal retransmission.
@@ -3666,8 +3713,8 @@ old_ack:
3666 if (TCP_SKB_CB(skb)->sacked) { 3713 if (TCP_SKB_CB(skb)->sacked) {
3667 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, 3714 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3668 &sack_state); 3715 &sack_state);
3669 tcp_fastretrans_alert(sk, acked, prior_unsacked, 3716 tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
3670 is_dupack, flag); 3717 tcp_xmit_recovery(sk, rexmit);
3671 } 3718 }
3672 3719
3673 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); 3720 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
@@ -3998,7 +4045,7 @@ void tcp_reset(struct sock *sk)
3998 * 4045 *
3999 * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT. 4046 * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
4000 */ 4047 */
4001static void tcp_fin(struct sock *sk) 4048void tcp_fin(struct sock *sk)
4002{ 4049{
4003 struct tcp_sock *tp = tcp_sk(sk); 4050 struct tcp_sock *tp = tcp_sk(sk);
4004 4051
@@ -5512,6 +5559,9 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5512 tp->syn_data_acked = tp->syn_data; 5559 tp->syn_data_acked = tp->syn_data;
5513 if (tp->syn_data_acked) 5560 if (tp->syn_data_acked)
5514 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); 5561 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
5562
5563 tcp_fastopen_add_skb(sk, synack);
5564
5515 return false; 5565 return false;
5516} 5566}
5517 5567
@@ -6118,9 +6168,10 @@ static bool tcp_syn_flood_action(const struct sock *sk,
6118 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 6168 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
6119 const char *msg = "Dropping request"; 6169 const char *msg = "Dropping request";
6120 bool want_cookie = false; 6170 bool want_cookie = false;
6171 struct net *net = sock_net(sk);
6121 6172
6122#ifdef CONFIG_SYN_COOKIES 6173#ifdef CONFIG_SYN_COOKIES
6123 if (sysctl_tcp_syncookies) { 6174 if (net->ipv4.sysctl_tcp_syncookies) {
6124 msg = "Sending cookies"; 6175 msg = "Sending cookies";
6125 want_cookie = true; 6176 want_cookie = true;
6126 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); 6177 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
@@ -6129,7 +6180,7 @@ static bool tcp_syn_flood_action(const struct sock *sk,
6129 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); 6180 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
6130 6181
6131 if (!queue->synflood_warned && 6182 if (!queue->synflood_warned &&
6132 sysctl_tcp_syncookies != 2 && 6183 net->ipv4.sysctl_tcp_syncookies != 2 &&
6133 xchg(&queue->synflood_warned, 1) == 0) 6184 xchg(&queue->synflood_warned, 1) == 0)
6134 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", 6185 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
6135 proto, ntohs(tcp_hdr(skb)->dest), msg); 6186 proto, ntohs(tcp_hdr(skb)->dest), msg);
@@ -6162,6 +6213,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6162 __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn; 6213 __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
6163 struct tcp_options_received tmp_opt; 6214 struct tcp_options_received tmp_opt;
6164 struct tcp_sock *tp = tcp_sk(sk); 6215 struct tcp_sock *tp = tcp_sk(sk);
6216 struct net *net = sock_net(sk);
6165 struct sock *fastopen_sk = NULL; 6217 struct sock *fastopen_sk = NULL;
6166 struct dst_entry *dst = NULL; 6218 struct dst_entry *dst = NULL;
6167 struct request_sock *req; 6219 struct request_sock *req;
@@ -6172,7 +6224,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6172 * limitations, they conserve resources and peer is 6224 * limitations, they conserve resources and peer is
6173 * evidently real one. 6225 * evidently real one.
6174 */ 6226 */
6175 if ((sysctl_tcp_syncookies == 2 || 6227 if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
6176 inet_csk_reqsk_queue_is_full(sk)) && !isn) { 6228 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
6177 want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name); 6229 want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
6178 if (!want_cookie) 6230 if (!want_cookie)
@@ -6238,7 +6290,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6238 } 6290 }
6239 } 6291 }
6240 /* Kill the following clause, if you dislike this way. */ 6292 /* Kill the following clause, if you dislike this way. */
6241 else if (!sysctl_tcp_syncookies && 6293 else if (!net->ipv4.sysctl_tcp_syncookies &&
6242 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < 6294 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
6243 (sysctl_max_syn_backlog >> 2)) && 6295 (sysctl_max_syn_backlog >> 2)) &&
6244 !tcp_peer_is_proven(req, dst, false, 6296 !tcp_peer_is_proven(req, dst, false,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 487ac67059e2..ad450509029b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -81,7 +81,7 @@
81#include <linux/proc_fs.h> 81#include <linux/proc_fs.h>
82#include <linux/seq_file.h> 82#include <linux/seq_file.h>
83 83
84#include <linux/crypto.h> 84#include <crypto/hash.h>
85#include <linux/scatterlist.h> 85#include <linux/scatterlist.h>
86 86
87int sysctl_tcp_tw_reuse __read_mostly; 87int sysctl_tcp_tw_reuse __read_mostly;
@@ -319,8 +319,6 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort)
319 /* ICMPs are not backlogged, hence we cannot get 319 /* ICMPs are not backlogged, hence we cannot get
320 * an established socket here. 320 * an established socket here.
321 */ 321 */
322 WARN_ON(req->sk);
323
324 if (seq != tcp_rsk(req)->snt_isn) { 322 if (seq != tcp_rsk(req)->snt_isn) {
325 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 323 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
326 } else if (abort) { 324 } else if (abort) {
@@ -642,8 +640,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
642 * Incoming packet is checked with md5 hash with finding key, 640 * Incoming packet is checked with md5 hash with finding key,
643 * no RST generated if md5 hash doesn't match. 641 * no RST generated if md5 hash doesn't match.
644 */ 642 */
645 sk1 = __inet_lookup_listener(net, 643 sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
646 &tcp_hashinfo, ip_hdr(skb)->saddr, 644 ip_hdr(skb)->saddr,
647 th->source, ip_hdr(skb)->daddr, 645 th->source, ip_hdr(skb)->daddr,
648 ntohs(th->source), inet_iif(skb)); 646 ntohs(th->source), inet_iif(skb));
649 /* don't send rst if it can't find key */ 647 /* don't send rst if it can't find key */
@@ -865,7 +863,6 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
865 kfree(inet_rsk(req)->opt); 863 kfree(inet_rsk(req)->opt);
866} 864}
867 865
868
869#ifdef CONFIG_TCP_MD5SIG 866#ifdef CONFIG_TCP_MD5SIG
870/* 867/*
871 * RFC2385 MD5 checksumming requires a mapping of 868 * RFC2385 MD5 checksumming requires a mapping of
@@ -1039,21 +1036,22 @@ static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1039 bp->len = cpu_to_be16(nbytes); 1036 bp->len = cpu_to_be16(nbytes);
1040 1037
1041 sg_init_one(&sg, bp, sizeof(*bp)); 1038 sg_init_one(&sg, bp, sizeof(*bp));
1042 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); 1039 ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp));
1040 return crypto_ahash_update(hp->md5_req);
1043} 1041}
1044 1042
1045static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 1043static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1046 __be32 daddr, __be32 saddr, const struct tcphdr *th) 1044 __be32 daddr, __be32 saddr, const struct tcphdr *th)
1047{ 1045{
1048 struct tcp_md5sig_pool *hp; 1046 struct tcp_md5sig_pool *hp;
1049 struct hash_desc *desc; 1047 struct ahash_request *req;
1050 1048
1051 hp = tcp_get_md5sig_pool(); 1049 hp = tcp_get_md5sig_pool();
1052 if (!hp) 1050 if (!hp)
1053 goto clear_hash_noput; 1051 goto clear_hash_noput;
1054 desc = &hp->md5_desc; 1052 req = hp->md5_req;
1055 1053
1056 if (crypto_hash_init(desc)) 1054 if (crypto_ahash_init(req))
1057 goto clear_hash; 1055 goto clear_hash;
1058 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) 1056 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1059 goto clear_hash; 1057 goto clear_hash;
@@ -1061,7 +1059,8 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1061 goto clear_hash; 1059 goto clear_hash;
1062 if (tcp_md5_hash_key(hp, key)) 1060 if (tcp_md5_hash_key(hp, key))
1063 goto clear_hash; 1061 goto clear_hash;
1064 if (crypto_hash_final(desc, md5_hash)) 1062 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1063 if (crypto_ahash_final(req))
1065 goto clear_hash; 1064 goto clear_hash;
1066 1065
1067 tcp_put_md5sig_pool(); 1066 tcp_put_md5sig_pool();
@@ -1079,7 +1078,7 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1079 const struct sk_buff *skb) 1078 const struct sk_buff *skb)
1080{ 1079{
1081 struct tcp_md5sig_pool *hp; 1080 struct tcp_md5sig_pool *hp;
1082 struct hash_desc *desc; 1081 struct ahash_request *req;
1083 const struct tcphdr *th = tcp_hdr(skb); 1082 const struct tcphdr *th = tcp_hdr(skb);
1084 __be32 saddr, daddr; 1083 __be32 saddr, daddr;
1085 1084
@@ -1095,9 +1094,9 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1095 hp = tcp_get_md5sig_pool(); 1094 hp = tcp_get_md5sig_pool();
1096 if (!hp) 1095 if (!hp)
1097 goto clear_hash_noput; 1096 goto clear_hash_noput;
1098 desc = &hp->md5_desc; 1097 req = hp->md5_req;
1099 1098
1100 if (crypto_hash_init(desc)) 1099 if (crypto_ahash_init(req))
1101 goto clear_hash; 1100 goto clear_hash;
1102 1101
1103 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) 1102 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
@@ -1108,7 +1107,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1108 goto clear_hash; 1107 goto clear_hash;
1109 if (tcp_md5_hash_key(hp, key)) 1108 if (tcp_md5_hash_key(hp, key))
1110 goto clear_hash; 1109 goto clear_hash;
1111 if (crypto_hash_final(desc, md5_hash)) 1110 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1111 if (crypto_ahash_final(req))
1112 goto clear_hash; 1112 goto clear_hash;
1113 1113
1114 tcp_put_md5sig_pool(); 1114 tcp_put_md5sig_pool();
@@ -1587,7 +1587,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
1587 TCP_SKB_CB(skb)->sacked = 0; 1587 TCP_SKB_CB(skb)->sacked = 0;
1588 1588
1589lookup: 1589lookup:
1590 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); 1590 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
1591 th->dest);
1591 if (!sk) 1592 if (!sk)
1592 goto no_tcp_socket; 1593 goto no_tcp_socket;
1593 1594
@@ -1650,7 +1651,7 @@ process:
1650 sk_incoming_cpu_update(sk); 1651 sk_incoming_cpu_update(sk);
1651 1652
1652 bh_lock_sock_nested(sk); 1653 bh_lock_sock_nested(sk);
1653 tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); 1654 tcp_segs_in(tcp_sk(sk), skb);
1654 ret = 0; 1655 ret = 0;
1655 if (!sock_owned_by_user(sk)) { 1656 if (!sock_owned_by_user(sk)) {
1656 if (!tcp_prequeue(sk, skb)) 1657 if (!tcp_prequeue(sk, skb))
@@ -1703,7 +1704,8 @@ do_time_wait:
1703 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1704 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1704 case TCP_TW_SYN: { 1705 case TCP_TW_SYN: {
1705 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), 1706 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1706 &tcp_hashinfo, 1707 &tcp_hashinfo, skb,
1708 __tcp_hdrlen(th),
1707 iph->saddr, th->source, 1709 iph->saddr, th->source,
1708 iph->daddr, th->dest, 1710 iph->daddr, th->dest,
1709 inet_iif(skb)); 1711 inet_iif(skb));
@@ -2395,6 +2397,16 @@ static int __net_init tcp_sk_init(struct net *net)
2395 net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; 2397 net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
2396 net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; 2398 net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
2397 2399
2400 net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
2401 net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
2402 net->ipv4.sysctl_tcp_syncookies = 1;
2403 net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
2404 net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
2405 net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
2406 net->ipv4.sysctl_tcp_orphan_retries = 0;
2407 net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
2408 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
2409
2398 return 0; 2410 return 0;
2399fail: 2411fail:
2400 tcp_sk_exit(net); 2412 tcp_sk_exit(net);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index a726d7853ce5..7b7eec439906 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -369,6 +369,7 @@ void tcp_update_metrics(struct sock *sk)
369 const struct inet_connection_sock *icsk = inet_csk(sk); 369 const struct inet_connection_sock *icsk = inet_csk(sk);
370 struct dst_entry *dst = __sk_dst_get(sk); 370 struct dst_entry *dst = __sk_dst_get(sk);
371 struct tcp_sock *tp = tcp_sk(sk); 371 struct tcp_sock *tp = tcp_sk(sk);
372 struct net *net = sock_net(sk);
372 struct tcp_metrics_block *tm; 373 struct tcp_metrics_block *tm;
373 unsigned long rtt; 374 unsigned long rtt;
374 u32 val; 375 u32 val;
@@ -473,7 +474,7 @@ void tcp_update_metrics(struct sock *sk)
473 if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) { 474 if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) {
474 val = tcp_metric_get(tm, TCP_METRIC_REORDERING); 475 val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
475 if (val < tp->reordering && 476 if (val < tp->reordering &&
476 tp->reordering != sysctl_tcp_reordering) 477 tp->reordering != net->ipv4.sysctl_tcp_reordering)
477 tcp_metric_set(tm, TCP_METRIC_REORDERING, 478 tcp_metric_set(tm, TCP_METRIC_REORDERING,
478 tp->reordering); 479 tp->reordering);
479 } 480 }
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 9b02af2139d3..acb366dd61e6 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -27,9 +27,6 @@
27#include <net/inet_common.h> 27#include <net/inet_common.h>
28#include <net/xfrm.h> 28#include <net/xfrm.h>
29 29
30int sysctl_tcp_syncookies __read_mostly = 1;
31EXPORT_SYMBOL(sysctl_tcp_syncookies);
32
33int sysctl_tcp_abort_on_overflow __read_mostly; 30int sysctl_tcp_abort_on_overflow __read_mostly;
34 31
35struct inet_timewait_death_row tcp_death_row = { 32struct inet_timewait_death_row tcp_death_row = {
@@ -815,7 +812,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
815 int ret = 0; 812 int ret = 0;
816 int state = child->sk_state; 813 int state = child->sk_state;
817 814
818 tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); 815 tcp_segs_in(tcp_sk(child), skb);
819 if (!sock_owned_by_user(child)) { 816 if (!sock_owned_by_user(child)) {
820 ret = tcp_rcv_state_process(child, skb); 817 ret = tcp_rcv_state_process(child, skb);
821 /* Wakeup parent, send SIGIO */ 818 /* Wakeup parent, send SIGIO */
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 9864a2dbadce..773083b7f1e9 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -135,7 +135,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
135 th->fin = th->psh = 0; 135 th->fin = th->psh = 0;
136 th->check = newcheck; 136 th->check = newcheck;
137 137
138 if (skb->ip_summed != CHECKSUM_PARTIAL) 138 if (skb->ip_summed == CHECKSUM_PARTIAL)
139 gso_reset_checksum(skb, ~th->check);
140 else
139 th->check = gso_make_checksum(skb, ~th->check); 141 th->check = gso_make_checksum(skb, ~th->check);
140 142
141 seq += mss; 143 seq += mss;
@@ -169,7 +171,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
169 skb->data_len); 171 skb->data_len);
170 th->check = ~csum_fold((__force __wsum)((__force u32)th->check + 172 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
171 (__force u32)delta)); 173 (__force u32)delta));
172 if (skb->ip_summed != CHECKSUM_PARTIAL) 174 if (skb->ip_summed == CHECKSUM_PARTIAL)
175 gso_reset_checksum(skb, ~th->check);
176 else
173 th->check = gso_make_checksum(skb, ~th->check); 177 th->check = gso_make_checksum(skb, ~th->check);
174out: 178out:
175 return segs; 179 return segs;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index fda379cd600d..79a03b87a771 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -62,9 +62,6 @@ int sysctl_tcp_tso_win_divisor __read_mostly = 3;
62/* By default, RFC2861 behavior. */ 62/* By default, RFC2861 behavior. */
63int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 63int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
64 64
65unsigned int sysctl_tcp_notsent_lowat __read_mostly = UINT_MAX;
66EXPORT_SYMBOL(sysctl_tcp_notsent_lowat);
67
68static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, 65static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
69 int push_one, gfp_t gfp); 66 int push_one, gfp_t gfp);
70 67
@@ -1006,8 +1003,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1006 if (likely(tcb->tcp_flags & TCPHDR_ACK)) 1003 if (likely(tcb->tcp_flags & TCPHDR_ACK))
1007 tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); 1004 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
1008 1005
1009 if (skb->len != tcp_header_size) 1006 if (skb->len != tcp_header_size) {
1010 tcp_event_data_sent(tp, sk); 1007 tcp_event_data_sent(tp, sk);
1008 tp->data_segs_out += tcp_skb_pcount(skb);
1009 }
1011 1010
1012 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) 1011 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
1013 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, 1012 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
@@ -2442,6 +2441,20 @@ u32 __tcp_select_window(struct sock *sk)
2442 return window; 2441 return window;
2443} 2442}
2444 2443
2444void tcp_skb_collapse_tstamp(struct sk_buff *skb,
2445 const struct sk_buff *next_skb)
2446{
2447 const struct skb_shared_info *next_shinfo = skb_shinfo(next_skb);
2448 u8 tsflags = next_shinfo->tx_flags & SKBTX_ANY_TSTAMP;
2449
2450 if (unlikely(tsflags)) {
2451 struct skb_shared_info *shinfo = skb_shinfo(skb);
2452
2453 shinfo->tx_flags |= tsflags;
2454 shinfo->tskey = next_shinfo->tskey;
2455 }
2456}
2457
2445/* Collapses two adjacent SKB's during retransmission. */ 2458/* Collapses two adjacent SKB's during retransmission. */
2446static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) 2459static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2447{ 2460{
@@ -2485,6 +2498,8 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2485 2498
2486 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb)); 2499 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
2487 2500
2501 tcp_skb_collapse_tstamp(skb, next_skb);
2502
2488 sk_wmem_free_skb(sk, next_skb); 2503 sk_wmem_free_skb(sk, next_skb);
2489} 2504}
2490 2505
@@ -2625,8 +2640,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2625 */ 2640 */
2626 if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) || 2641 if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
2627 skb_headroom(skb) >= 0xFFFF)) { 2642 skb_headroom(skb) >= 0xFFFF)) {
2628 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, 2643 struct sk_buff *nskb;
2629 GFP_ATOMIC); 2644
2645 skb_mstamp_get(&skb->skb_mstamp);
2646 nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
2630 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : 2647 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2631 -ENOBUFS; 2648 -ENOBUFS;
2632 } else { 2649 } else {
@@ -3476,6 +3493,7 @@ void tcp_send_probe0(struct sock *sk)
3476{ 3493{
3477 struct inet_connection_sock *icsk = inet_csk(sk); 3494 struct inet_connection_sock *icsk = inet_csk(sk);
3478 struct tcp_sock *tp = tcp_sk(sk); 3495 struct tcp_sock *tp = tcp_sk(sk);
3496 struct net *net = sock_net(sk);
3479 unsigned long probe_max; 3497 unsigned long probe_max;
3480 int err; 3498 int err;
3481 3499
@@ -3489,7 +3507,7 @@ void tcp_send_probe0(struct sock *sk)
3489 } 3507 }
3490 3508
3491 if (err <= 0) { 3509 if (err <= 0) {
3492 if (icsk->icsk_backoff < sysctl_tcp_retries2) 3510 if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
3493 icsk->icsk_backoff++; 3511 icsk->icsk_backoff++;
3494 icsk->icsk_probes_out++; 3512 icsk->icsk_probes_out++;
3495 probe_max = TCP_RTO_MAX; 3513 probe_max = TCP_RTO_MAX;
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index ebf5ff57526e..f6c50af24a64 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -187,13 +187,13 @@ static int tcpprobe_sprint(char *tbuf, int n)
187{ 187{
188 const struct tcp_log *p 188 const struct tcp_log *p
189 = tcp_probe.log + tcp_probe.tail; 189 = tcp_probe.log + tcp_probe.tail;
190 struct timespec tv 190 struct timespec64 ts
191 = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); 191 = ktime_to_timespec64(ktime_sub(p->tstamp, tcp_probe.start));
192 192
193 return scnprintf(tbuf, n, 193 return scnprintf(tbuf, n,
194 "%lu.%09lu %pISpc %pISpc %d %#x %#x %u %u %u %u %u\n", 194 "%lu.%09lu %pISpc %pISpc %d %#x %#x %u %u %u %u %u\n",
195 (unsigned long)tv.tv_sec, 195 (unsigned long)ts.tv_sec,
196 (unsigned long)tv.tv_nsec, 196 (unsigned long)ts.tv_nsec,
197 &p->src, &p->dst, p->length, p->snd_nxt, p->snd_una, 197 &p->src, &p->dst, p->length, p->snd_nxt, p->snd_una,
198 p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt, p->rcv_wnd); 198 p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt, p->rcv_wnd);
199} 199}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index a4730a28b220..49bc474f8e35 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -22,11 +22,6 @@
22#include <linux/gfp.h> 22#include <linux/gfp.h>
23#include <net/tcp.h> 23#include <net/tcp.h>
24 24
25int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES;
26int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES;
27int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
28int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
29int sysctl_tcp_orphan_retries __read_mostly;
30int sysctl_tcp_thin_linear_timeouts __read_mostly; 25int sysctl_tcp_thin_linear_timeouts __read_mostly;
31 26
32static void tcp_write_err(struct sock *sk) 27static void tcp_write_err(struct sock *sk)
@@ -82,7 +77,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
82/* Calculate maximal number or retries on an orphaned socket. */ 77/* Calculate maximal number or retries on an orphaned socket. */
83static int tcp_orphan_retries(struct sock *sk, bool alive) 78static int tcp_orphan_retries(struct sock *sk, bool alive)
84{ 79{
85 int retries = sysctl_tcp_orphan_retries; /* May be zero. */ 80 int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */
86 81
87 /* We know from an ICMP that something is wrong. */ 82 /* We know from an ICMP that something is wrong. */
88 if (sk->sk_err_soft && !alive) 83 if (sk->sk_err_soft && !alive)
@@ -157,6 +152,7 @@ static int tcp_write_timeout(struct sock *sk)
157{ 152{
158 struct inet_connection_sock *icsk = inet_csk(sk); 153 struct inet_connection_sock *icsk = inet_csk(sk);
159 struct tcp_sock *tp = tcp_sk(sk); 154 struct tcp_sock *tp = tcp_sk(sk);
155 struct net *net = sock_net(sk);
160 int retry_until; 156 int retry_until;
161 bool do_reset, syn_set = false; 157 bool do_reset, syn_set = false;
162 158
@@ -169,10 +165,10 @@ static int tcp_write_timeout(struct sock *sk)
169 NET_INC_STATS_BH(sock_net(sk), 165 NET_INC_STATS_BH(sock_net(sk),
170 LINUX_MIB_TCPFASTOPENACTIVEFAIL); 166 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
171 } 167 }
172 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 168 retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
173 syn_set = true; 169 syn_set = true;
174 } else { 170 } else {
175 if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) { 171 if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0, 0)) {
176 /* Some middle-boxes may black-hole Fast Open _after_ 172 /* Some middle-boxes may black-hole Fast Open _after_
177 * the handshake. Therefore we conservatively disable 173 * the handshake. Therefore we conservatively disable
178 * Fast Open on this path on recurring timeouts with 174 * Fast Open on this path on recurring timeouts with
@@ -181,7 +177,7 @@ static int tcp_write_timeout(struct sock *sk)
181 if (tp->syn_data_acked && 177 if (tp->syn_data_acked &&
182 tp->bytes_acked <= tp->rx_opt.mss_clamp) { 178 tp->bytes_acked <= tp->rx_opt.mss_clamp) {
183 tcp_fastopen_cache_set(sk, 0, NULL, true, 0); 179 tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
184 if (icsk->icsk_retransmits == sysctl_tcp_retries1) 180 if (icsk->icsk_retransmits == net->ipv4.sysctl_tcp_retries1)
185 NET_INC_STATS_BH(sock_net(sk), 181 NET_INC_STATS_BH(sock_net(sk),
186 LINUX_MIB_TCPFASTOPENACTIVEFAIL); 182 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
187 } 183 }
@@ -191,7 +187,7 @@ static int tcp_write_timeout(struct sock *sk)
191 dst_negative_advice(sk); 187 dst_negative_advice(sk);
192 } 188 }
193 189
194 retry_until = sysctl_tcp_retries2; 190 retry_until = net->ipv4.sysctl_tcp_retries2;
195 if (sock_flag(sk, SOCK_DEAD)) { 191 if (sock_flag(sk, SOCK_DEAD)) {
196 const bool alive = icsk->icsk_rto < TCP_RTO_MAX; 192 const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
197 193
@@ -305,7 +301,7 @@ static void tcp_probe_timer(struct sock *sk)
305 (s32)(tcp_time_stamp - start_ts) > icsk->icsk_user_timeout) 301 (s32)(tcp_time_stamp - start_ts) > icsk->icsk_user_timeout)
306 goto abort; 302 goto abort;
307 303
308 max_probes = sysctl_tcp_retries2; 304 max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
309 if (sock_flag(sk, SOCK_DEAD)) { 305 if (sock_flag(sk, SOCK_DEAD)) {
310 const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; 306 const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
311 307
@@ -332,7 +328,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
332{ 328{
333 struct inet_connection_sock *icsk = inet_csk(sk); 329 struct inet_connection_sock *icsk = inet_csk(sk);
334 int max_retries = icsk->icsk_syn_retries ? : 330 int max_retries = icsk->icsk_syn_retries ? :
335 sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ 331 sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
336 struct request_sock *req; 332 struct request_sock *req;
337 333
338 req = tcp_sk(sk)->fastopen_rsk; 334 req = tcp_sk(sk)->fastopen_rsk;
@@ -360,6 +356,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
360void tcp_retransmit_timer(struct sock *sk) 356void tcp_retransmit_timer(struct sock *sk)
361{ 357{
362 struct tcp_sock *tp = tcp_sk(sk); 358 struct tcp_sock *tp = tcp_sk(sk);
359 struct net *net = sock_net(sk);
363 struct inet_connection_sock *icsk = inet_csk(sk); 360 struct inet_connection_sock *icsk = inet_csk(sk);
364 361
365 if (tp->fastopen_rsk) { 362 if (tp->fastopen_rsk) {
@@ -490,7 +487,7 @@ out_reset_timer:
490 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); 487 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
491 } 488 }
492 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); 489 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
493 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0, 0)) 490 if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0, 0))
494 __sk_dst_reset(sk); 491 __sk_dst_reset(sk);
495 492
496out:; 493out:;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 95d2f198017e..a2e7f55a1f61 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -339,8 +339,13 @@ found:
339 339
340 hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); 340 hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
341 spin_lock(&hslot2->lock); 341 spin_lock(&hslot2->lock);
342 hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, 342 if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
343 &hslot2->head); 343 sk->sk_family == AF_INET6)
344 hlist_nulls_add_tail_rcu(&udp_sk(sk)->udp_portaddr_node,
345 &hslot2->head);
346 else
347 hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
348 &hslot2->head);
344 hslot2->count++; 349 hslot2->count++;
345 spin_unlock(&hslot2->lock); 350 spin_unlock(&hslot2->lock);
346 } 351 }
@@ -356,8 +361,8 @@ EXPORT_SYMBOL(udp_lib_get_port);
356 * match_wildcard == false: addresses must be exactly the same, i.e. 361 * match_wildcard == false: addresses must be exactly the same, i.e.
357 * 0.0.0.0 only equals to 0.0.0.0 362 * 0.0.0.0 only equals to 0.0.0.0
358 */ 363 */
359static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2, 364int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
360 bool match_wildcard) 365 bool match_wildcard)
361{ 366{
362 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); 367 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
363 368
@@ -848,32 +853,20 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
848{ 853{
849 struct udphdr *uh = udp_hdr(skb); 854 struct udphdr *uh = udp_hdr(skb);
850 855
851 if (nocheck) 856 if (nocheck) {
852 uh->check = 0; 857 uh->check = 0;
853 else if (skb_is_gso(skb)) 858 } else if (skb_is_gso(skb)) {
854 uh->check = ~udp_v4_check(len, saddr, daddr, 0); 859 uh->check = ~udp_v4_check(len, saddr, daddr, 0);
855 else if (skb_dst(skb) && skb_dst(skb)->dev && 860 } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
856 (skb_dst(skb)->dev->features & 861 uh->check = 0;
857 (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) { 862 uh->check = udp_v4_check(len, saddr, daddr, lco_csum(skb));
858 863 if (uh->check == 0)
859 BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); 864 uh->check = CSUM_MANGLED_0;
860 865 } else {
861 skb->ip_summed = CHECKSUM_PARTIAL; 866 skb->ip_summed = CHECKSUM_PARTIAL;
862 skb->csum_start = skb_transport_header(skb) - skb->head; 867 skb->csum_start = skb_transport_header(skb) - skb->head;
863 skb->csum_offset = offsetof(struct udphdr, check); 868 skb->csum_offset = offsetof(struct udphdr, check);
864 uh->check = ~udp_v4_check(len, saddr, daddr, 0); 869 uh->check = ~udp_v4_check(len, saddr, daddr, 0);
865 } else {
866 __wsum csum;
867
868 BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
869
870 uh->check = 0;
871 csum = skb_checksum(skb, 0, len, 0);
872 uh->check = udp_v4_check(len, saddr, daddr, csum);
873 if (uh->check == 0)
874 uh->check = CSUM_MANGLED_0;
875
876 skb->ip_summed = CHECKSUM_UNNECESSARY;
877 } 870 }
878} 871}
879EXPORT_SYMBOL(udp_set_csum); 872EXPORT_SYMBOL(udp_set_csum);
@@ -2082,10 +2075,14 @@ void udp_v4_early_demux(struct sk_buff *skb)
2082 if (!in_dev) 2075 if (!in_dev)
2083 return; 2076 return;
2084 2077
2085 ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, 2078 /* we are supposed to accept bcast packets */
2086 iph->protocol); 2079 if (skb->pkt_type == PACKET_MULTICAST) {
2087 if (!ours) 2080 ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
2088 return; 2081 iph->protocol);
2082 if (!ours)
2083 return;
2084 }
2085
2089 sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, 2086 sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
2090 uh->source, iph->saddr, dif); 2087 uh->source, iph->saddr, dif);
2091 } else if (skb->pkt_type == PACKET_HOST) { 2088 } else if (skb->pkt_type == PACKET_HOST) {
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 4c519c1dc161..e330c0e56b11 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -32,42 +32,65 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
32 netdev_features_t features), 32 netdev_features_t features),
33 __be16 new_protocol, bool is_ipv6) 33 __be16 new_protocol, bool is_ipv6)
34{ 34{
35 int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
36 bool remcsum, need_csum, offload_csum, ufo;
35 struct sk_buff *segs = ERR_PTR(-EINVAL); 37 struct sk_buff *segs = ERR_PTR(-EINVAL);
38 struct udphdr *uh = udp_hdr(skb);
36 u16 mac_offset = skb->mac_header; 39 u16 mac_offset = skb->mac_header;
37 int mac_len = skb->mac_len;
38 int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
39 __be16 protocol = skb->protocol; 40 __be16 protocol = skb->protocol;
40 netdev_features_t enc_features; 41 u16 mac_len = skb->mac_len;
41 int udp_offset, outer_hlen; 42 int udp_offset, outer_hlen;
42 unsigned int oldlen; 43 __wsum partial;
43 bool need_csum = !!(skb_shinfo(skb)->gso_type &
44 SKB_GSO_UDP_TUNNEL_CSUM);
45 bool remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
46 bool offload_csum = false, dont_encap = (need_csum || remcsum);
47
48 oldlen = (u16)~skb->len;
49 44
50 if (unlikely(!pskb_may_pull(skb, tnl_hlen))) 45 if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
51 goto out; 46 goto out;
52 47
48 /* Adjust partial header checksum to negate old length.
49 * We cannot rely on the value contained in uh->len as it is
50 * possible that the actual value exceeds the boundaries of the
51 * 16 bit length field due to the header being added outside of an
52 * IP or IPv6 frame that was already limited to 64K - 1.
53 */
54 partial = csum_sub(csum_unfold(uh->check),
55 (__force __wsum)htonl(skb->len));
56
57 /* setup inner skb. */
53 skb->encapsulation = 0; 58 skb->encapsulation = 0;
59 SKB_GSO_CB(skb)->encap_level = 0;
54 __skb_pull(skb, tnl_hlen); 60 __skb_pull(skb, tnl_hlen);
55 skb_reset_mac_header(skb); 61 skb_reset_mac_header(skb);
56 skb_set_network_header(skb, skb_inner_network_offset(skb)); 62 skb_set_network_header(skb, skb_inner_network_offset(skb));
57 skb->mac_len = skb_inner_network_offset(skb); 63 skb->mac_len = skb_inner_network_offset(skb);
58 skb->protocol = new_protocol; 64 skb->protocol = new_protocol;
65
66 need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
59 skb->encap_hdr_csum = need_csum; 67 skb->encap_hdr_csum = need_csum;
68
69 remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
60 skb->remcsum_offload = remcsum; 70 skb->remcsum_offload = remcsum;
61 71
72 ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
73
62 /* Try to offload checksum if possible */ 74 /* Try to offload checksum if possible */
63 offload_csum = !!(need_csum && 75 offload_csum = !!(need_csum &&
64 ((skb->dev->features & NETIF_F_HW_CSUM) || 76 (skb->dev->features &
65 (skb->dev->features & (is_ipv6 ? 77 (is_ipv6 ? (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) :
66 NETIF_F_IPV6_CSUM : NETIF_F_IP_CSUM)))); 78 (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM))));
79
80 features &= skb->dev->hw_enc_features;
81
82 /* The only checksum offload we care about from here on out is the
83 * outer one so strip the existing checksum feature flags and
84 * instead set the flag based on our outer checksum offload value.
85 */
86 if (remcsum || ufo) {
87 features &= ~NETIF_F_CSUM_MASK;
88 if (!need_csum || offload_csum)
89 features |= NETIF_F_HW_CSUM;
90 }
67 91
68 /* segment inner packet. */ 92 /* segment inner packet. */
69 enc_features = skb->dev->hw_enc_features & features; 93 segs = gso_inner_segment(skb, features);
70 segs = gso_inner_segment(skb, enc_features);
71 if (IS_ERR_OR_NULL(segs)) { 94 if (IS_ERR_OR_NULL(segs)) {
72 skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, 95 skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
73 mac_len); 96 mac_len);
@@ -78,17 +101,13 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
78 udp_offset = outer_hlen - tnl_hlen; 101 udp_offset = outer_hlen - tnl_hlen;
79 skb = segs; 102 skb = segs;
80 do { 103 do {
81 struct udphdr *uh; 104 __be16 len;
82 int len;
83 __be32 delta;
84 105
85 if (dont_encap) { 106 if (remcsum)
86 skb->encapsulation = 0;
87 skb->ip_summed = CHECKSUM_NONE; 107 skb->ip_summed = CHECKSUM_NONE;
88 } else { 108
89 /* Only set up inner headers if we might be offloading 109 /* Set up inner headers if we are offloading inner checksum */
90 * inner checksum. 110 if (skb->ip_summed == CHECKSUM_PARTIAL) {
91 */
92 skb_reset_inner_headers(skb); 111 skb_reset_inner_headers(skb);
93 skb->encapsulation = 1; 112 skb->encapsulation = 1;
94 } 113 }
@@ -96,43 +115,27 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
96 skb->mac_len = mac_len; 115 skb->mac_len = mac_len;
97 skb->protocol = protocol; 116 skb->protocol = protocol;
98 117
99 skb_push(skb, outer_hlen); 118 __skb_push(skb, outer_hlen);
100 skb_reset_mac_header(skb); 119 skb_reset_mac_header(skb);
101 skb_set_network_header(skb, mac_len); 120 skb_set_network_header(skb, mac_len);
102 skb_set_transport_header(skb, udp_offset); 121 skb_set_transport_header(skb, udp_offset);
103 len = skb->len - udp_offset; 122 len = htons(skb->len - udp_offset);
104 uh = udp_hdr(skb); 123 uh = udp_hdr(skb);
105 uh->len = htons(len); 124 uh->len = len;
106 125
107 if (!need_csum) 126 if (!need_csum)
108 continue; 127 continue;
109 128
110 delta = htonl(oldlen + len); 129 uh->check = ~csum_fold(csum_add(partial, (__force __wsum)len));
111 130
112 uh->check = ~csum_fold((__force __wsum) 131 if (skb->encapsulation || !offload_csum) {
113 ((__force u32)uh->check +
114 (__force u32)delta));
115 if (offload_csum) {
116 skb->ip_summed = CHECKSUM_PARTIAL;
117 skb->csum_start = skb_transport_header(skb) - skb->head;
118 skb->csum_offset = offsetof(struct udphdr, check);
119 } else if (remcsum) {
120 /* Need to calculate checksum from scratch,
121 * inner checksums are never when doing
122 * remote_checksum_offload.
123 */
124
125 skb->csum = skb_checksum(skb, udp_offset,
126 skb->len - udp_offset,
127 0);
128 uh->check = csum_fold(skb->csum);
129 if (uh->check == 0)
130 uh->check = CSUM_MANGLED_0;
131 } else {
132 uh->check = gso_make_checksum(skb, ~uh->check); 132 uh->check = gso_make_checksum(skb, ~uh->check);
133
134 if (uh->check == 0) 133 if (uh->check == 0)
135 uh->check = CSUM_MANGLED_0; 134 uh->check = CSUM_MANGLED_0;
135 } else {
136 skb->ip_summed = CHECKSUM_PARTIAL;
137 skb->csum_start = skb_transport_header(skb) - skb->head;
138 skb->csum_offset = offsetof(struct udphdr, check);
136 } 139 }
137 } while ((skb = skb->next)); 140 } while ((skb = skb->next));
138out: 141out:
@@ -235,6 +238,13 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
235 238
236 skb->ip_summed = CHECKSUM_NONE; 239 skb->ip_summed = CHECKSUM_NONE;
237 240
241 /* If there is no outer header we can fake a checksum offload
242 * due to the fact that we have already done the checksum in
243 * software prior to segmenting the frame.
244 */
245 if (!skb->encap_hdr_csum)
246 features |= NETIF_F_HW_CSUM;
247
238 /* Fragment the skb. IP headers of the fragments are updated in 248 /* Fragment the skb. IP headers of the fragments are updated in
239 * inet_gso_segment() 249 * inet_gso_segment()
240 */ 250 */
@@ -302,14 +312,14 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
302 unsigned int off = skb_gro_offset(skb); 312 unsigned int off = skb_gro_offset(skb);
303 int flush = 1; 313 int flush = 1;
304 314
305 if (NAPI_GRO_CB(skb)->udp_mark || 315 if (NAPI_GRO_CB(skb)->encap_mark ||
306 (skb->ip_summed != CHECKSUM_PARTIAL && 316 (skb->ip_summed != CHECKSUM_PARTIAL &&
307 NAPI_GRO_CB(skb)->csum_cnt == 0 && 317 NAPI_GRO_CB(skb)->csum_cnt == 0 &&
308 !NAPI_GRO_CB(skb)->csum_valid)) 318 !NAPI_GRO_CB(skb)->csum_valid))
309 goto out; 319 goto out;
310 320
311 /* mark that this skb passed once through the udp gro layer */ 321 /* mark that this skb passed once through the tunnel gro layer */
312 NAPI_GRO_CB(skb)->udp_mark = 1; 322 NAPI_GRO_CB(skb)->encap_mark = 1;
313 323
314 rcu_read_lock(); 324 rcu_read_lock();
315 uo_priv = rcu_dereference(udp_offload_base); 325 uo_priv = rcu_dereference(udp_offload_base);
@@ -389,6 +399,11 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff)
389 399
390 uh->len = newlen; 400 uh->len = newlen;
391 401
402 /* Set encapsulation before calling into inner gro_complete() functions
403 * to make them set up the inner offsets.
404 */
405 skb->encapsulation = 1;
406
392 rcu_read_lock(); 407 rcu_read_lock();
393 408
394 uo_priv = rcu_dereference(udp_offload_base); 409 uo_priv = rcu_dereference(udp_offload_base);
@@ -411,9 +426,6 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff)
411 if (skb->remcsum_offload) 426 if (skb->remcsum_offload)
412 skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM; 427 skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM;
413 428
414 skb->encapsulation = 1;
415 skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr));
416
417 return err; 429 return err;
418} 430}
419 431
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 40c897515ddc..11e875ffd7ac 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -207,6 +207,7 @@ config IPV6_NDISC_NODETYPE
207config IPV6_TUNNEL 207config IPV6_TUNNEL
208 tristate "IPv6: IP-in-IPv6 tunnel (RFC2473)" 208 tristate "IPv6: IP-in-IPv6 tunnel (RFC2473)"
209 select INET6_TUNNEL 209 select INET6_TUNNEL
210 select DST_CACHE
210 ---help--- 211 ---help---
211 Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in 212 Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in
212 RFC 2473. 213 RFC 2473.
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index bdd7eac4307a..8ec4b3089e20 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -216,6 +216,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
216 }, 216 },
217 .use_oif_addrs_only = 0, 217 .use_oif_addrs_only = 0,
218 .ignore_routes_with_linkdown = 0, 218 .ignore_routes_with_linkdown = 0,
219 .keep_addr_on_down = 0,
219}; 220};
220 221
221static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { 222static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -260,6 +261,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
260 }, 261 },
261 .use_oif_addrs_only = 0, 262 .use_oif_addrs_only = 0,
262 .ignore_routes_with_linkdown = 0, 263 .ignore_routes_with_linkdown = 0,
264 .keep_addr_on_down = 0,
263}; 265};
264 266
265/* Check if a valid qdisc is available */ 267/* Check if a valid qdisc is available */
@@ -471,18 +473,21 @@ static int inet6_netconf_msgsize_devconf(int type)
471{ 473{
472 int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) 474 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
473 + nla_total_size(4); /* NETCONFA_IFINDEX */ 475 + nla_total_size(4); /* NETCONFA_IFINDEX */
476 bool all = false;
474 477
475 /* type -1 is used for ALL */ 478 if (type == NETCONFA_ALL)
476 if (type == -1 || type == NETCONFA_FORWARDING) 479 all = true;
480
481 if (all || type == NETCONFA_FORWARDING)
477 size += nla_total_size(4); 482 size += nla_total_size(4);
478#ifdef CONFIG_IPV6_MROUTE 483#ifdef CONFIG_IPV6_MROUTE
479 if (type == -1 || type == NETCONFA_MC_FORWARDING) 484 if (all || type == NETCONFA_MC_FORWARDING)
480 size += nla_total_size(4); 485 size += nla_total_size(4);
481#endif 486#endif
482 if (type == -1 || type == NETCONFA_PROXY_NEIGH) 487 if (all || type == NETCONFA_PROXY_NEIGH)
483 size += nla_total_size(4); 488 size += nla_total_size(4);
484 489
485 if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) 490 if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
486 size += nla_total_size(4); 491 size += nla_total_size(4);
487 492
488 return size; 493 return size;
@@ -495,33 +500,36 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
495{ 500{
496 struct nlmsghdr *nlh; 501 struct nlmsghdr *nlh;
497 struct netconfmsg *ncm; 502 struct netconfmsg *ncm;
503 bool all = false;
498 504
499 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), 505 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
500 flags); 506 flags);
501 if (!nlh) 507 if (!nlh)
502 return -EMSGSIZE; 508 return -EMSGSIZE;
503 509
510 if (type == NETCONFA_ALL)
511 all = true;
512
504 ncm = nlmsg_data(nlh); 513 ncm = nlmsg_data(nlh);
505 ncm->ncm_family = AF_INET6; 514 ncm->ncm_family = AF_INET6;
506 515
507 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) 516 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
508 goto nla_put_failure; 517 goto nla_put_failure;
509 518
510 /* type -1 is used for ALL */ 519 if ((all || type == NETCONFA_FORWARDING) &&
511 if ((type == -1 || type == NETCONFA_FORWARDING) &&
512 nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0) 520 nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0)
513 goto nla_put_failure; 521 goto nla_put_failure;
514#ifdef CONFIG_IPV6_MROUTE 522#ifdef CONFIG_IPV6_MROUTE
515 if ((type == -1 || type == NETCONFA_MC_FORWARDING) && 523 if ((all || type == NETCONFA_MC_FORWARDING) &&
516 nla_put_s32(skb, NETCONFA_MC_FORWARDING, 524 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
517 devconf->mc_forwarding) < 0) 525 devconf->mc_forwarding) < 0)
518 goto nla_put_failure; 526 goto nla_put_failure;
519#endif 527#endif
520 if ((type == -1 || type == NETCONFA_PROXY_NEIGH) && 528 if ((all || type == NETCONFA_PROXY_NEIGH) &&
521 nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0) 529 nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0)
522 goto nla_put_failure; 530 goto nla_put_failure;
523 531
524 if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && 532 if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
525 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, 533 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
526 devconf->ignore_routes_with_linkdown) < 0) 534 devconf->ignore_routes_with_linkdown) < 0)
527 goto nla_put_failure; 535 goto nla_put_failure;
@@ -607,14 +615,14 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
607 } 615 }
608 616
609 err = -ENOBUFS; 617 err = -ENOBUFS;
610 skb = nlmsg_new(inet6_netconf_msgsize_devconf(-1), GFP_ATOMIC); 618 skb = nlmsg_new(inet6_netconf_msgsize_devconf(NETCONFA_ALL), GFP_ATOMIC);
611 if (!skb) 619 if (!skb)
612 goto errout; 620 goto errout;
613 621
614 err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 622 err = inet6_netconf_fill_devconf(skb, ifindex, devconf,
615 NETLINK_CB(in_skb).portid, 623 NETLINK_CB(in_skb).portid,
616 nlh->nlmsg_seq, RTM_NEWNETCONF, 0, 624 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
617 -1); 625 NETCONFA_ALL);
618 if (err < 0) { 626 if (err < 0) {
619 /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */ 627 /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */
620 WARN_ON(err == -EMSGSIZE); 628 WARN_ON(err == -EMSGSIZE);
@@ -658,7 +666,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb,
658 cb->nlh->nlmsg_seq, 666 cb->nlh->nlmsg_seq,
659 RTM_NEWNETCONF, 667 RTM_NEWNETCONF,
660 NLM_F_MULTI, 668 NLM_F_MULTI,
661 -1) < 0) { 669 NETCONFA_ALL) < 0) {
662 rcu_read_unlock(); 670 rcu_read_unlock();
663 goto done; 671 goto done;
664 } 672 }
@@ -674,7 +682,7 @@ cont:
674 NETLINK_CB(cb->skb).portid, 682 NETLINK_CB(cb->skb).portid,
675 cb->nlh->nlmsg_seq, 683 cb->nlh->nlmsg_seq,
676 RTM_NEWNETCONF, NLM_F_MULTI, 684 RTM_NEWNETCONF, NLM_F_MULTI,
677 -1) < 0) 685 NETCONFA_ALL) < 0)
678 goto done; 686 goto done;
679 else 687 else
680 h++; 688 h++;
@@ -685,7 +693,7 @@ cont:
685 NETLINK_CB(cb->skb).portid, 693 NETLINK_CB(cb->skb).portid,
686 cb->nlh->nlmsg_seq, 694 cb->nlh->nlmsg_seq,
687 RTM_NEWNETCONF, NLM_F_MULTI, 695 RTM_NEWNETCONF, NLM_F_MULTI,
688 -1) < 0) 696 NETCONFA_ALL) < 0)
689 goto done; 697 goto done;
690 else 698 else
691 h++; 699 h++;
@@ -3168,10 +3176,60 @@ static void addrconf_gre_config(struct net_device *dev)
3168} 3176}
3169#endif 3177#endif
3170 3178
3179static int fixup_permanent_addr(struct inet6_dev *idev,
3180 struct inet6_ifaddr *ifp)
3181{
3182 if (!ifp->rt) {
3183 struct rt6_info *rt;
3184
3185 rt = addrconf_dst_alloc(idev, &ifp->addr, false);
3186 if (unlikely(IS_ERR(rt)))
3187 return PTR_ERR(rt);
3188
3189 ifp->rt = rt;
3190 }
3191
3192 if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
3193 addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
3194 idev->dev, 0, 0);
3195 }
3196
3197 addrconf_dad_start(ifp);
3198
3199 return 0;
3200}
3201
3202static void addrconf_permanent_addr(struct net_device *dev)
3203{
3204 struct inet6_ifaddr *ifp, *tmp;
3205 struct inet6_dev *idev;
3206
3207 idev = __in6_dev_get(dev);
3208 if (!idev)
3209 return;
3210
3211 write_lock_bh(&idev->lock);
3212
3213 list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) {
3214 if ((ifp->flags & IFA_F_PERMANENT) &&
3215 fixup_permanent_addr(idev, ifp) < 0) {
3216 write_unlock_bh(&idev->lock);
3217 ipv6_del_addr(ifp);
3218 write_lock_bh(&idev->lock);
3219
3220 net_info_ratelimited("%s: Failed to add prefix route for address %pI6c; dropping\n",
3221 idev->dev->name, &ifp->addr);
3222 }
3223 }
3224
3225 write_unlock_bh(&idev->lock);
3226}
3227
3171static int addrconf_notify(struct notifier_block *this, unsigned long event, 3228static int addrconf_notify(struct notifier_block *this, unsigned long event,
3172 void *ptr) 3229 void *ptr)
3173{ 3230{
3174 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 3231 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3232 struct netdev_notifier_changeupper_info *info;
3175 struct inet6_dev *idev = __in6_dev_get(dev); 3233 struct inet6_dev *idev = __in6_dev_get(dev);
3176 int run_pending = 0; 3234 int run_pending = 0;
3177 int err; 3235 int err;
@@ -3220,6 +3278,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
3220 break; 3278 break;
3221 3279
3222 if (event == NETDEV_UP) { 3280 if (event == NETDEV_UP) {
3281 /* restore routes for permanent addresses */
3282 addrconf_permanent_addr(dev);
3283
3223 if (!addrconf_qdisc_ok(dev)) { 3284 if (!addrconf_qdisc_ok(dev)) {
3224 /* device is not ready yet. */ 3285 /* device is not ready yet. */
3225 pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n", 3286 pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n",
@@ -3327,6 +3388,15 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
3327 if (idev) 3388 if (idev)
3328 addrconf_type_change(dev, event); 3389 addrconf_type_change(dev, event);
3329 break; 3390 break;
3391
3392 case NETDEV_CHANGEUPPER:
3393 info = ptr;
3394
3395 /* flush all routes if dev is linked to or unlinked from
3396 * an L3 master device (e.g., VRF)
3397 */
3398 if (info->upper_dev && netif_is_l3_master(info->upper_dev))
3399 addrconf_ifdown(dev, 0);
3330 } 3400 }
3331 3401
3332 return NOTIFY_OK; 3402 return NOTIFY_OK;
@@ -3352,11 +3422,20 @@ static void addrconf_type_change(struct net_device *dev, unsigned long event)
3352 ipv6_mc_unmap(idev); 3422 ipv6_mc_unmap(idev);
3353} 3423}
3354 3424
3425static bool addr_is_local(const struct in6_addr *addr)
3426{
3427 return ipv6_addr_type(addr) &
3428 (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
3429}
3430
3355static int addrconf_ifdown(struct net_device *dev, int how) 3431static int addrconf_ifdown(struct net_device *dev, int how)
3356{ 3432{
3357 struct net *net = dev_net(dev); 3433 struct net *net = dev_net(dev);
3358 struct inet6_dev *idev; 3434 struct inet6_dev *idev;
3359 struct inet6_ifaddr *ifa; 3435 struct inet6_ifaddr *ifa, *tmp;
3436 struct list_head del_list;
3437 int _keep_addr;
3438 bool keep_addr;
3360 int state, i; 3439 int state, i;
3361 3440
3362 ASSERT_RTNL(); 3441 ASSERT_RTNL();
@@ -3383,6 +3462,16 @@ static int addrconf_ifdown(struct net_device *dev, int how)
3383 3462
3384 } 3463 }
3385 3464
3465 /* aggregate the system setting and interface setting */
3466 _keep_addr = net->ipv6.devconf_all->keep_addr_on_down;
3467 if (!_keep_addr)
3468 _keep_addr = idev->cnf.keep_addr_on_down;
3469
3470 /* combine the user config with event to determine if permanent
3471 * addresses are to be removed from address hash table
3472 */
3473 keep_addr = !(how || _keep_addr <= 0);
3474
3386 /* Step 2: clear hash table */ 3475 /* Step 2: clear hash table */
3387 for (i = 0; i < IN6_ADDR_HSIZE; i++) { 3476 for (i = 0; i < IN6_ADDR_HSIZE; i++) {
3388 struct hlist_head *h = &inet6_addr_lst[i]; 3477 struct hlist_head *h = &inet6_addr_lst[i];
@@ -3391,9 +3480,16 @@ static int addrconf_ifdown(struct net_device *dev, int how)
3391restart: 3480restart:
3392 hlist_for_each_entry_rcu(ifa, h, addr_lst) { 3481 hlist_for_each_entry_rcu(ifa, h, addr_lst) {
3393 if (ifa->idev == idev) { 3482 if (ifa->idev == idev) {
3394 hlist_del_init_rcu(&ifa->addr_lst);
3395 addrconf_del_dad_work(ifa); 3483 addrconf_del_dad_work(ifa);
3396 goto restart; 3484 /* combined flag + permanent flag decide if
3485 * address is retained on a down event
3486 */
3487 if (!keep_addr ||
3488 !(ifa->flags & IFA_F_PERMANENT) ||
3489 addr_is_local(&ifa->addr)) {
3490 hlist_del_init_rcu(&ifa->addr_lst);
3491 goto restart;
3492 }
3397 } 3493 }
3398 } 3494 }
3399 spin_unlock_bh(&addrconf_hash_lock); 3495 spin_unlock_bh(&addrconf_hash_lock);
@@ -3427,31 +3523,62 @@ restart:
3427 write_lock_bh(&idev->lock); 3523 write_lock_bh(&idev->lock);
3428 } 3524 }
3429 3525
3430 while (!list_empty(&idev->addr_list)) { 3526 /* re-combine the user config with event to determine if permanent
3431 ifa = list_first_entry(&idev->addr_list, 3527 * addresses are to be removed from the interface list
3432 struct inet6_ifaddr, if_list); 3528 */
3433 addrconf_del_dad_work(ifa); 3529 keep_addr = (!how && _keep_addr > 0);
3434 3530
3435 list_del(&ifa->if_list); 3531 INIT_LIST_HEAD(&del_list);
3532 list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
3533 struct rt6_info *rt = NULL;
3436 3534
3437 write_unlock_bh(&idev->lock); 3535 addrconf_del_dad_work(ifa);
3438 3536
3537 write_unlock_bh(&idev->lock);
3439 spin_lock_bh(&ifa->lock); 3538 spin_lock_bh(&ifa->lock);
3440 state = ifa->state; 3539
3441 ifa->state = INET6_IFADDR_STATE_DEAD; 3540 if (keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
3541 !addr_is_local(&ifa->addr)) {
3542 /* set state to skip the notifier below */
3543 state = INET6_IFADDR_STATE_DEAD;
3544 ifa->state = 0;
3545 if (!(ifa->flags & IFA_F_NODAD))
3546 ifa->flags |= IFA_F_TENTATIVE;
3547
3548 rt = ifa->rt;
3549 ifa->rt = NULL;
3550 } else {
3551 state = ifa->state;
3552 ifa->state = INET6_IFADDR_STATE_DEAD;
3553
3554 list_del(&ifa->if_list);
3555 list_add(&ifa->if_list, &del_list);
3556 }
3557
3442 spin_unlock_bh(&ifa->lock); 3558 spin_unlock_bh(&ifa->lock);
3443 3559
3560 if (rt)
3561 ip6_del_rt(rt);
3562
3444 if (state != INET6_IFADDR_STATE_DEAD) { 3563 if (state != INET6_IFADDR_STATE_DEAD) {
3445 __ipv6_ifa_notify(RTM_DELADDR, ifa); 3564 __ipv6_ifa_notify(RTM_DELADDR, ifa);
3446 inet6addr_notifier_call_chain(NETDEV_DOWN, ifa); 3565 inet6addr_notifier_call_chain(NETDEV_DOWN, ifa);
3447 } 3566 }
3448 in6_ifa_put(ifa);
3449 3567
3450 write_lock_bh(&idev->lock); 3568 write_lock_bh(&idev->lock);
3451 } 3569 }
3452 3570
3453 write_unlock_bh(&idev->lock); 3571 write_unlock_bh(&idev->lock);
3454 3572
3573 /* now clean up addresses to be removed */
3574 while (!list_empty(&del_list)) {
3575 ifa = list_first_entry(&del_list,
3576 struct inet6_ifaddr, if_list);
3577 list_del(&ifa->if_list);
3578
3579 in6_ifa_put(ifa);
3580 }
3581
3455 /* Step 5: Discard anycast and multicast list */ 3582 /* Step 5: Discard anycast and multicast list */
3456 if (how) { 3583 if (how) {
3457 ipv6_ac_destroy_dev(idev); 3584 ipv6_ac_destroy_dev(idev);
@@ -4714,6 +4841,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
4714 array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown; 4841 array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown;
4715 /* we omit DEVCONF_STABLE_SECRET for now */ 4842 /* we omit DEVCONF_STABLE_SECRET for now */
4716 array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only; 4843 array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only;
4844 array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
4845 array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
4846 array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down;
4717} 4847}
4718 4848
4719static inline size_t inet6_ifla6_size(void) 4849static inline size_t inet6_ifla6_size(void)
@@ -5195,10 +5325,10 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
5195 if (rt) 5325 if (rt)
5196 ip6_del_rt(rt); 5326 ip6_del_rt(rt);
5197 } 5327 }
5198 dst_hold(&ifp->rt->dst); 5328 if (ifp->rt) {
5199 5329 dst_hold(&ifp->rt->dst);
5200 ip6_del_rt(ifp->rt); 5330 ip6_del_rt(ifp->rt);
5201 5331 }
5202 rt_genid_bump_ipv6(net); 5332 rt_genid_bump_ipv6(net);
5203 break; 5333 break;
5204 } 5334 }
@@ -5788,6 +5918,28 @@ static struct addrconf_sysctl_table
5788 .proc_handler = addrconf_sysctl_ignore_routes_with_linkdown, 5918 .proc_handler = addrconf_sysctl_ignore_routes_with_linkdown,
5789 }, 5919 },
5790 { 5920 {
5921 .procname = "drop_unicast_in_l2_multicast",
5922 .data = &ipv6_devconf.drop_unicast_in_l2_multicast,
5923 .maxlen = sizeof(int),
5924 .mode = 0644,
5925 .proc_handler = proc_dointvec,
5926 },
5927 {
5928 .procname = "drop_unsolicited_na",
5929 .data = &ipv6_devconf.drop_unsolicited_na,
5930 .maxlen = sizeof(int),
5931 .mode = 0644,
5932 .proc_handler = proc_dointvec,
5933 },
5934 {
5935 .procname = "keep_addr_on_down",
5936 .data = &ipv6_devconf.keep_addr_on_down,
5937 .maxlen = sizeof(int),
5938 .mode = 0644,
5939 .proc_handler = proc_dointvec,
5940
5941 },
5942 {
5791 /* sentinel */ 5943 /* sentinel */
5792 } 5944 }
5793 }, 5945 },
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 9f5137cd604e..b11c37cfd67c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -235,7 +235,11 @@ lookup_protocol:
235 * creation time automatically shares. 235 * creation time automatically shares.
236 */ 236 */
237 inet->inet_sport = htons(inet->inet_num); 237 inet->inet_sport = htons(inet->inet_num);
238 sk->sk_prot->hash(sk); 238 err = sk->sk_prot->hash(sk);
239 if (err) {
240 sk_common_release(sk);
241 goto out;
242 }
239 } 243 }
240 if (sk->sk_prot->init) { 244 if (sk->sk_prot->init) {
241 err = sk->sk_prot->init(sk); 245 err = sk->sk_prot->init(sk);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 428162155280..9dd3882fe6bf 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -40,18 +40,114 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a)
40 return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0); 40 return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0);
41} 41}
42 42
43static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk)
44{
45 struct inet_sock *inet = inet_sk(sk);
46 struct ipv6_pinfo *np = inet6_sk(sk);
47
48 memset(fl6, 0, sizeof(*fl6));
49 fl6->flowi6_proto = sk->sk_protocol;
50 fl6->daddr = sk->sk_v6_daddr;
51 fl6->saddr = np->saddr;
52 fl6->flowi6_oif = sk->sk_bound_dev_if;
53 fl6->flowi6_mark = sk->sk_mark;
54 fl6->fl6_dport = inet->inet_dport;
55 fl6->fl6_sport = inet->inet_sport;
56 fl6->flowlabel = np->flow_label;
57
58 if (!fl6->flowi6_oif)
59 fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
60
61 if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr))
62 fl6->flowi6_oif = np->mcast_oif;
63
64 security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
65}
66
67int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr)
68{
69 struct ip6_flowlabel *flowlabel = NULL;
70 struct in6_addr *final_p, final;
71 struct ipv6_txoptions *opt;
72 struct dst_entry *dst;
73 struct inet_sock *inet = inet_sk(sk);
74 struct ipv6_pinfo *np = inet6_sk(sk);
75 struct flowi6 fl6;
76 int err = 0;
77
78 if (np->sndflow && (np->flow_label & IPV6_FLOWLABEL_MASK)) {
79 flowlabel = fl6_sock_lookup(sk, np->flow_label);
80 if (!flowlabel)
81 return -EINVAL;
82 }
83 ip6_datagram_flow_key_init(&fl6, sk);
84
85 rcu_read_lock();
86 opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt);
87 final_p = fl6_update_dst(&fl6, opt, &final);
88 rcu_read_unlock();
89
90 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
91 if (IS_ERR(dst)) {
92 err = PTR_ERR(dst);
93 goto out;
94 }
95
96 if (fix_sk_saddr) {
97 if (ipv6_addr_any(&np->saddr))
98 np->saddr = fl6.saddr;
99
100 if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
101 sk->sk_v6_rcv_saddr = fl6.saddr;
102 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
103 if (sk->sk_prot->rehash)
104 sk->sk_prot->rehash(sk);
105 }
106 }
107
108 ip6_dst_store(sk, dst,
109 ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ?
110 &sk->sk_v6_daddr : NULL,
111#ifdef CONFIG_IPV6_SUBTREES
112 ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
113 &np->saddr :
114#endif
115 NULL);
116
117out:
118 fl6_sock_release(flowlabel);
119 return err;
120}
121
122void ip6_datagram_release_cb(struct sock *sk)
123{
124 struct dst_entry *dst;
125
126 if (ipv6_addr_v4mapped(&sk->sk_v6_daddr))
127 return;
128
129 rcu_read_lock();
130 dst = __sk_dst_get(sk);
131 if (!dst || !dst->obsolete ||
132 dst->ops->check(dst, inet6_sk(sk)->dst_cookie)) {
133 rcu_read_unlock();
134 return;
135 }
136 rcu_read_unlock();
137
138 ip6_datagram_dst_update(sk, false);
139}
140EXPORT_SYMBOL_GPL(ip6_datagram_release_cb);
141
43static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 142static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
44{ 143{
45 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 144 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
46 struct inet_sock *inet = inet_sk(sk); 145 struct inet_sock *inet = inet_sk(sk);
47 struct ipv6_pinfo *np = inet6_sk(sk); 146 struct ipv6_pinfo *np = inet6_sk(sk);
48 struct in6_addr *daddr, *final_p, final; 147 struct in6_addr *daddr;
49 struct dst_entry *dst;
50 struct flowi6 fl6;
51 struct ip6_flowlabel *flowlabel = NULL;
52 struct ipv6_txoptions *opt;
53 int addr_type; 148 int addr_type;
54 int err; 149 int err;
150 __be32 fl6_flowlabel = 0;
55 151
56 if (usin->sin6_family == AF_INET) { 152 if (usin->sin6_family == AF_INET) {
57 if (__ipv6_only_sock(sk)) 153 if (__ipv6_only_sock(sk))
@@ -66,15 +162,8 @@ static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int a
66 if (usin->sin6_family != AF_INET6) 162 if (usin->sin6_family != AF_INET6)
67 return -EAFNOSUPPORT; 163 return -EAFNOSUPPORT;
68 164
69 memset(&fl6, 0, sizeof(fl6)); 165 if (np->sndflow)
70 if (np->sndflow) { 166 fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
71 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
72 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
73 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
74 if (!flowlabel)
75 return -EINVAL;
76 }
77 }
78 167
79 addr_type = ipv6_addr_type(&usin->sin6_addr); 168 addr_type = ipv6_addr_type(&usin->sin6_addr);
80 169
@@ -145,7 +234,7 @@ ipv4_connected:
145 } 234 }
146 235
147 sk->sk_v6_daddr = *daddr; 236 sk->sk_v6_daddr = *daddr;
148 np->flow_label = fl6.flowlabel; 237 np->flow_label = fl6_flowlabel;
149 238
150 inet->inet_dport = usin->sin6_port; 239 inet->inet_dport = usin->sin6_port;
151 240
@@ -154,59 +243,13 @@ ipv4_connected:
154 * destination cache for it. 243 * destination cache for it.
155 */ 244 */
156 245
157 fl6.flowi6_proto = sk->sk_protocol; 246 err = ip6_datagram_dst_update(sk, true);
158 fl6.daddr = sk->sk_v6_daddr; 247 if (err)
159 fl6.saddr = np->saddr;
160 fl6.flowi6_oif = sk->sk_bound_dev_if;
161 fl6.flowi6_mark = sk->sk_mark;
162 fl6.fl6_dport = inet->inet_dport;
163 fl6.fl6_sport = inet->inet_sport;
164
165 if (!fl6.flowi6_oif)
166 fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
167
168 if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
169 fl6.flowi6_oif = np->mcast_oif;
170
171 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
172
173 rcu_read_lock();
174 opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt);
175 final_p = fl6_update_dst(&fl6, opt, &final);
176 rcu_read_unlock();
177
178 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
179 err = 0;
180 if (IS_ERR(dst)) {
181 err = PTR_ERR(dst);
182 goto out; 248 goto out;
183 }
184
185 /* source address lookup done in ip6_dst_lookup */
186
187 if (ipv6_addr_any(&np->saddr))
188 np->saddr = fl6.saddr;
189
190 if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
191 sk->sk_v6_rcv_saddr = fl6.saddr;
192 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
193 if (sk->sk_prot->rehash)
194 sk->sk_prot->rehash(sk);
195 }
196
197 ip6_dst_store(sk, dst,
198 ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ?
199 &sk->sk_v6_daddr : NULL,
200#ifdef CONFIG_IPV6_SUBTREES
201 ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
202 &np->saddr :
203#endif
204 NULL);
205 249
206 sk->sk_state = TCP_ESTABLISHED; 250 sk->sk_state = TCP_ESTABLISHED;
207 sk_set_txhash(sk); 251 sk_set_txhash(sk);
208out: 252out:
209 fl6_sock_release(flowlabel);
210 return err; 253 return err;
211} 254}
212 255
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 0a37ddc7af51..0013cacf7164 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -445,6 +445,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
445 445
446 if (__ipv6_addr_needs_scope_id(addr_type)) 446 if (__ipv6_addr_needs_scope_id(addr_type))
447 iif = skb->dev->ifindex; 447 iif = skb->dev->ifindex;
448 else
449 iif = l3mdev_master_ifindex(skb->dev);
448 450
449 /* 451 /*
450 * Must not send error if the source does not uniquely 452 * Must not send error if the source does not uniquely
@@ -499,9 +501,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
499 else if (!fl6.flowi6_oif) 501 else if (!fl6.flowi6_oif)
500 fl6.flowi6_oif = np->ucast_oif; 502 fl6.flowi6_oif = np->ucast_oif;
501 503
502 if (!fl6.flowi6_oif)
503 fl6.flowi6_oif = l3mdev_master_ifindex(skb->dev);
504
505 dst = icmpv6_route_lookup(net, skb, sk, &fl6); 504 dst = icmpv6_route_lookup(net, skb, sk, &fl6);
506 if (IS_ERR(dst)) 505 if (IS_ERR(dst))
507 goto out; 506 goto out;
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index 32dc9aab7297..30613050e4ca 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -99,5 +99,6 @@ static void __exit ila_fini(void)
99 99
100module_init(ila_init); 100module_init(ila_init);
101module_exit(ila_fini); 101module_exit(ila_fini);
102MODULE_ALIAS_RTNL_LWT(ILA);
102MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>"); 103MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
103MODULE_LICENSE("GPL"); 104MODULE_LICENSE("GPL");
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 2ae3c4fd8aab..41f18de5dcc2 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -120,8 +120,7 @@ nla_put_failure:
120 120
121static int ila_encap_nlsize(struct lwtunnel_state *lwtstate) 121static int ila_encap_nlsize(struct lwtunnel_state *lwtstate)
122{ 122{
123 /* No encapsulation overhead */ 123 return nla_total_size(sizeof(u64)); /* ILA_ATTR_LOCATOR */
124 return 0;
125} 124}
126 125
127static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) 126static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 36c3f0155010..532c3ef282c5 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -26,6 +26,7 @@
26#include <net/ip6_route.h> 26#include <net/ip6_route.h>
27#include <net/sock.h> 27#include <net/sock.h>
28#include <net/inet6_connection_sock.h> 28#include <net/inet6_connection_sock.h>
29#include <net/sock_reuseport.h>
29 30
30int inet6_csk_bind_conflict(const struct sock *sk, 31int inet6_csk_bind_conflict(const struct sock *sk,
31 const struct inet_bind_bucket *tb, bool relax) 32 const struct inet_bind_bucket *tb, bool relax)
@@ -48,6 +49,7 @@ int inet6_csk_bind_conflict(const struct sock *sk,
48 if ((!reuse || !sk2->sk_reuse || 49 if ((!reuse || !sk2->sk_reuse ||
49 sk2->sk_state == TCP_LISTEN) && 50 sk2->sk_state == TCP_LISTEN) &&
50 (!reuseport || !sk2->sk_reuseport || 51 (!reuseport || !sk2->sk_reuseport ||
52 rcu_access_pointer(sk->sk_reuseport_cb) ||
51 (sk2->sk_state != TCP_TIME_WAIT && 53 (sk2->sk_state != TCP_TIME_WAIT &&
52 !uid_eq(uid, 54 !uid_eq(uid,
53 sock_i_uid((struct sock *)sk2))))) { 55 sock_i_uid((struct sock *)sk2))))) {
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 21ace5a2bf7c..70f2628be6fa 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -17,11 +17,13 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/random.h> 18#include <linux/random.h>
19 19
20#include <net/addrconf.h>
20#include <net/inet_connection_sock.h> 21#include <net/inet_connection_sock.h>
21#include <net/inet_hashtables.h> 22#include <net/inet_hashtables.h>
22#include <net/inet6_hashtables.h> 23#include <net/inet6_hashtables.h>
23#include <net/secure_seq.h> 24#include <net/secure_seq.h>
24#include <net/ip.h> 25#include <net/ip.h>
26#include <net/sock_reuseport.h>
25 27
26u32 inet6_ehashfn(const struct net *net, 28u32 inet6_ehashfn(const struct net *net,
27 const struct in6_addr *laddr, const u16 lport, 29 const struct in6_addr *laddr, const u16 lport,
@@ -121,7 +123,9 @@ static inline int compute_score(struct sock *sk, struct net *net,
121} 123}
122 124
123struct sock *inet6_lookup_listener(struct net *net, 125struct sock *inet6_lookup_listener(struct net *net,
124 struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, 126 struct inet_hashinfo *hashinfo,
127 struct sk_buff *skb, int doff,
128 const struct in6_addr *saddr,
125 const __be16 sport, const struct in6_addr *daddr, 129 const __be16 sport, const struct in6_addr *daddr,
126 const unsigned short hnum, const int dif) 130 const unsigned short hnum, const int dif)
127{ 131{
@@ -129,6 +133,7 @@ struct sock *inet6_lookup_listener(struct net *net,
129 const struct hlist_nulls_node *node; 133 const struct hlist_nulls_node *node;
130 struct sock *result; 134 struct sock *result;
131 int score, hiscore, matches = 0, reuseport = 0; 135 int score, hiscore, matches = 0, reuseport = 0;
136 bool select_ok = true;
132 u32 phash = 0; 137 u32 phash = 0;
133 unsigned int hash = inet_lhashfn(net, hnum); 138 unsigned int hash = inet_lhashfn(net, hnum);
134 struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; 139 struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
@@ -146,6 +151,15 @@ begin:
146 if (reuseport) { 151 if (reuseport) {
147 phash = inet6_ehashfn(net, daddr, hnum, 152 phash = inet6_ehashfn(net, daddr, hnum,
148 saddr, sport); 153 saddr, sport);
154 if (select_ok) {
155 struct sock *sk2;
156 sk2 = reuseport_select_sock(sk, phash,
157 skb, doff);
158 if (sk2) {
159 result = sk2;
160 goto found;
161 }
162 }
149 matches = 1; 163 matches = 1;
150 } 164 }
151 } else if (score == hiscore && reuseport) { 165 } else if (score == hiscore && reuseport) {
@@ -163,11 +177,13 @@ begin:
163 if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) 177 if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
164 goto begin; 178 goto begin;
165 if (result) { 179 if (result) {
180found:
166 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) 181 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
167 result = NULL; 182 result = NULL;
168 else if (unlikely(compute_score(result, net, hnum, daddr, 183 else if (unlikely(compute_score(result, net, hnum, daddr,
169 dif) < hiscore)) { 184 dif) < hiscore)) {
170 sock_put(result); 185 sock_put(result);
186 select_ok = false;
171 goto begin; 187 goto begin;
172 } 188 }
173 } 189 }
@@ -177,6 +193,7 @@ begin:
177EXPORT_SYMBOL_GPL(inet6_lookup_listener); 193EXPORT_SYMBOL_GPL(inet6_lookup_listener);
178 194
179struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, 195struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
196 struct sk_buff *skb, int doff,
180 const struct in6_addr *saddr, const __be16 sport, 197 const struct in6_addr *saddr, const __be16 sport,
181 const struct in6_addr *daddr, const __be16 dport, 198 const struct in6_addr *daddr, const __be16 dport,
182 const int dif) 199 const int dif)
@@ -184,7 +201,8 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
184 struct sock *sk; 201 struct sock *sk;
185 202
186 local_bh_disable(); 203 local_bh_disable();
187 sk = __inet6_lookup(net, hashinfo, saddr, sport, daddr, ntohs(dport), dif); 204 sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
205 ntohs(dport), dif);
188 local_bh_enable(); 206 local_bh_enable();
189 207
190 return sk; 208 return sk;
@@ -274,3 +292,59 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row,
274 __inet6_check_established); 292 __inet6_check_established);
275} 293}
276EXPORT_SYMBOL_GPL(inet6_hash_connect); 294EXPORT_SYMBOL_GPL(inet6_hash_connect);
295
296int inet6_hash(struct sock *sk)
297{
298 if (sk->sk_state != TCP_CLOSE) {
299 local_bh_disable();
300 __inet_hash(sk, NULL, ipv6_rcv_saddr_equal);
301 local_bh_enable();
302 }
303
304 return 0;
305}
306EXPORT_SYMBOL_GPL(inet6_hash);
307
308/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
309 * only, and any IPv4 addresses if not IPv6 only
310 * match_wildcard == false: addresses must be exactly the same, i.e.
311 * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
312 * and 0.0.0.0 equals to 0.0.0.0 only
313 */
314int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
315 bool match_wildcard)
316{
317 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
318 int sk2_ipv6only = inet_v6_ipv6only(sk2);
319 int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
320 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
321
322 /* if both are mapped, treat as IPv4 */
323 if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
324 if (!sk2_ipv6only) {
325 if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
326 return 1;
327 if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
328 return match_wildcard;
329 }
330 return 0;
331 }
332
333 if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
334 return 1;
335
336 if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
337 !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
338 return 1;
339
340 if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
341 !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
342 return 1;
343
344 if (sk2_rcv_saddr6 &&
345 ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
346 return 1;
347
348 return 0;
349}
350EXPORT_SYMBOL_GPL(ipv6_rcv_saddr_equal);
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index 9a4d7322fb22..b2025bf3da4a 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -6,8 +6,7 @@
6#ifndef _HAVE_ARCH_IPV6_CSUM 6#ifndef _HAVE_ARCH_IPV6_CSUM
7__sum16 csum_ipv6_magic(const struct in6_addr *saddr, 7__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
8 const struct in6_addr *daddr, 8 const struct in6_addr *daddr,
9 __u32 len, unsigned short proto, 9 __u32 len, __u8 proto, __wsum csum)
10 __wsum csum)
11{ 10{
12 11
13 int carry; 12 int carry;
@@ -98,27 +97,16 @@ void udp6_set_csum(bool nocheck, struct sk_buff *skb,
98 uh->check = 0; 97 uh->check = 0;
99 else if (skb_is_gso(skb)) 98 else if (skb_is_gso(skb))
100 uh->check = ~udp_v6_check(len, saddr, daddr, 0); 99 uh->check = ~udp_v6_check(len, saddr, daddr, 0);
101 else if (skb_dst(skb) && skb_dst(skb)->dev && 100 else if (skb->ip_summed == CHECKSUM_PARTIAL) {
102 (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) { 101 uh->check = 0;
103 102 uh->check = udp_v6_check(len, saddr, daddr, lco_csum(skb));
104 BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); 103 if (uh->check == 0)
105 104 uh->check = CSUM_MANGLED_0;
105 } else {
106 skb->ip_summed = CHECKSUM_PARTIAL; 106 skb->ip_summed = CHECKSUM_PARTIAL;
107 skb->csum_start = skb_transport_header(skb) - skb->head; 107 skb->csum_start = skb_transport_header(skb) - skb->head;
108 skb->csum_offset = offsetof(struct udphdr, check); 108 skb->csum_offset = offsetof(struct udphdr, check);
109 uh->check = ~udp_v6_check(len, saddr, daddr, 0); 109 uh->check = ~udp_v6_check(len, saddr, daddr, 0);
110 } else {
111 __wsum csum;
112
113 BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
114
115 uh->check = 0;
116 csum = skb_checksum(skb, 0, len, 0);
117 uh->check = udp_v6_check(len, saddr, daddr, csum);
118 if (uh->check == 0)
119 uh->check = CSUM_MANGLED_0;
120
121 skb->ip_summed = CHECKSUM_UNNECESSARY;
122 } 110 }
123} 111}
124EXPORT_SYMBOL(udp6_set_csum); 112EXPORT_SYMBOL(udp6_set_csum);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 0c7e276c230e..ea071fad67a0 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -55,8 +55,6 @@ struct fib6_cleaner {
55 void *arg; 55 void *arg;
56}; 56};
57 57
58static DEFINE_RWLOCK(fib6_walker_lock);
59
60#ifdef CONFIG_IPV6_SUBTREES 58#ifdef CONFIG_IPV6_SUBTREES
61#define FWS_INIT FWS_S 59#define FWS_INIT FWS_S
62#else 60#else
@@ -66,7 +64,7 @@ static DEFINE_RWLOCK(fib6_walker_lock);
66static void fib6_prune_clones(struct net *net, struct fib6_node *fn); 64static void fib6_prune_clones(struct net *net, struct fib6_node *fn);
67static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); 65static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
68static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); 66static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
69static int fib6_walk(struct fib6_walker *w); 67static int fib6_walk(struct net *net, struct fib6_walker *w);
70static int fib6_walk_continue(struct fib6_walker *w); 68static int fib6_walk_continue(struct fib6_walker *w);
71 69
72/* 70/*
@@ -78,21 +76,21 @@ static int fib6_walk_continue(struct fib6_walker *w);
78 76
79static void fib6_gc_timer_cb(unsigned long arg); 77static void fib6_gc_timer_cb(unsigned long arg);
80 78
81static LIST_HEAD(fib6_walkers); 79#define FOR_WALKERS(net, w) \
82#define FOR_WALKERS(w) list_for_each_entry(w, &fib6_walkers, lh) 80 list_for_each_entry(w, &(net)->ipv6.fib6_walkers, lh)
83 81
84static void fib6_walker_link(struct fib6_walker *w) 82static void fib6_walker_link(struct net *net, struct fib6_walker *w)
85{ 83{
86 write_lock_bh(&fib6_walker_lock); 84 write_lock_bh(&net->ipv6.fib6_walker_lock);
87 list_add(&w->lh, &fib6_walkers); 85 list_add(&w->lh, &net->ipv6.fib6_walkers);
88 write_unlock_bh(&fib6_walker_lock); 86 write_unlock_bh(&net->ipv6.fib6_walker_lock);
89} 87}
90 88
91static void fib6_walker_unlink(struct fib6_walker *w) 89static void fib6_walker_unlink(struct net *net, struct fib6_walker *w)
92{ 90{
93 write_lock_bh(&fib6_walker_lock); 91 write_lock_bh(&net->ipv6.fib6_walker_lock);
94 list_del(&w->lh); 92 list_del(&w->lh);
95 write_unlock_bh(&fib6_walker_lock); 93 write_unlock_bh(&net->ipv6.fib6_walker_lock);
96} 94}
97 95
98static int fib6_new_sernum(struct net *net) 96static int fib6_new_sernum(struct net *net)
@@ -325,12 +323,13 @@ static int fib6_dump_node(struct fib6_walker *w)
325 323
326static void fib6_dump_end(struct netlink_callback *cb) 324static void fib6_dump_end(struct netlink_callback *cb)
327{ 325{
326 struct net *net = sock_net(cb->skb->sk);
328 struct fib6_walker *w = (void *)cb->args[2]; 327 struct fib6_walker *w = (void *)cb->args[2];
329 328
330 if (w) { 329 if (w) {
331 if (cb->args[4]) { 330 if (cb->args[4]) {
332 cb->args[4] = 0; 331 cb->args[4] = 0;
333 fib6_walker_unlink(w); 332 fib6_walker_unlink(net, w);
334 } 333 }
335 cb->args[2] = 0; 334 cb->args[2] = 0;
336 kfree(w); 335 kfree(w);
@@ -348,6 +347,7 @@ static int fib6_dump_done(struct netlink_callback *cb)
348static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, 347static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
349 struct netlink_callback *cb) 348 struct netlink_callback *cb)
350{ 349{
350 struct net *net = sock_net(skb->sk);
351 struct fib6_walker *w; 351 struct fib6_walker *w;
352 int res; 352 int res;
353 353
@@ -359,7 +359,7 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
359 w->skip = 0; 359 w->skip = 0;
360 360
361 read_lock_bh(&table->tb6_lock); 361 read_lock_bh(&table->tb6_lock);
362 res = fib6_walk(w); 362 res = fib6_walk(net, w);
363 read_unlock_bh(&table->tb6_lock); 363 read_unlock_bh(&table->tb6_lock);
364 if (res > 0) { 364 if (res > 0) {
365 cb->args[4] = 1; 365 cb->args[4] = 1;
@@ -379,7 +379,7 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
379 res = fib6_walk_continue(w); 379 res = fib6_walk_continue(w);
380 read_unlock_bh(&table->tb6_lock); 380 read_unlock_bh(&table->tb6_lock);
381 if (res <= 0) { 381 if (res <= 0) {
382 fib6_walker_unlink(w); 382 fib6_walker_unlink(net, w);
383 cb->args[4] = 0; 383 cb->args[4] = 0;
384 } 384 }
385 } 385 }
@@ -1340,8 +1340,8 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
1340 } 1340 }
1341#endif 1341#endif
1342 1342
1343 read_lock(&fib6_walker_lock); 1343 read_lock(&net->ipv6.fib6_walker_lock);
1344 FOR_WALKERS(w) { 1344 FOR_WALKERS(net, w) {
1345 if (!child) { 1345 if (!child) {
1346 if (w->root == fn) { 1346 if (w->root == fn) {
1347 w->root = w->node = NULL; 1347 w->root = w->node = NULL;
@@ -1368,7 +1368,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
1368 } 1368 }
1369 } 1369 }
1370 } 1370 }
1371 read_unlock(&fib6_walker_lock); 1371 read_unlock(&net->ipv6.fib6_walker_lock);
1372 1372
1373 node_free(fn); 1373 node_free(fn);
1374 if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn)) 1374 if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
@@ -1411,8 +1411,8 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
1411 } 1411 }
1412 1412
1413 /* Adjust walkers */ 1413 /* Adjust walkers */
1414 read_lock(&fib6_walker_lock); 1414 read_lock(&net->ipv6.fib6_walker_lock);
1415 FOR_WALKERS(w) { 1415 FOR_WALKERS(net, w) {
1416 if (w->state == FWS_C && w->leaf == rt) { 1416 if (w->state == FWS_C && w->leaf == rt) {
1417 RT6_TRACE("walker %p adjusted by delroute\n", w); 1417 RT6_TRACE("walker %p adjusted by delroute\n", w);
1418 w->leaf = rt->dst.rt6_next; 1418 w->leaf = rt->dst.rt6_next;
@@ -1420,7 +1420,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
1420 w->state = FWS_U; 1420 w->state = FWS_U;
1421 } 1421 }
1422 } 1422 }
1423 read_unlock(&fib6_walker_lock); 1423 read_unlock(&net->ipv6.fib6_walker_lock);
1424 1424
1425 rt->dst.rt6_next = NULL; 1425 rt->dst.rt6_next = NULL;
1426 1426
@@ -1588,17 +1588,17 @@ skip:
1588 } 1588 }
1589} 1589}
1590 1590
1591static int fib6_walk(struct fib6_walker *w) 1591static int fib6_walk(struct net *net, struct fib6_walker *w)
1592{ 1592{
1593 int res; 1593 int res;
1594 1594
1595 w->state = FWS_INIT; 1595 w->state = FWS_INIT;
1596 w->node = w->root; 1596 w->node = w->root;
1597 1597
1598 fib6_walker_link(w); 1598 fib6_walker_link(net, w);
1599 res = fib6_walk_continue(w); 1599 res = fib6_walk_continue(w);
1600 if (res <= 0) 1600 if (res <= 0)
1601 fib6_walker_unlink(w); 1601 fib6_walker_unlink(net, w);
1602 return res; 1602 return res;
1603} 1603}
1604 1604
@@ -1668,7 +1668,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
1668 c.arg = arg; 1668 c.arg = arg;
1669 c.net = net; 1669 c.net = net;
1670 1670
1671 fib6_walk(&c.w); 1671 fib6_walk(net, &c.w);
1672} 1672}
1673 1673
1674static void __fib6_clean_all(struct net *net, 1674static void __fib6_clean_all(struct net *net,
@@ -1725,14 +1725,15 @@ static void fib6_flush_trees(struct net *net)
1725 * Garbage collection 1725 * Garbage collection
1726 */ 1726 */
1727 1727
1728static struct fib6_gc_args 1728struct fib6_gc_args
1729{ 1729{
1730 int timeout; 1730 int timeout;
1731 int more; 1731 int more;
1732} gc_args; 1732};
1733 1733
1734static int fib6_age(struct rt6_info *rt, void *arg) 1734static int fib6_age(struct rt6_info *rt, void *arg)
1735{ 1735{
1736 struct fib6_gc_args *gc_args = arg;
1736 unsigned long now = jiffies; 1737 unsigned long now = jiffies;
1737 1738
1738 /* 1739 /*
@@ -1748,10 +1749,10 @@ static int fib6_age(struct rt6_info *rt, void *arg)
1748 RT6_TRACE("expiring %p\n", rt); 1749 RT6_TRACE("expiring %p\n", rt);
1749 return -1; 1750 return -1;
1750 } 1751 }
1751 gc_args.more++; 1752 gc_args->more++;
1752 } else if (rt->rt6i_flags & RTF_CACHE) { 1753 } else if (rt->rt6i_flags & RTF_CACHE) {
1753 if (atomic_read(&rt->dst.__refcnt) == 0 && 1754 if (atomic_read(&rt->dst.__refcnt) == 0 &&
1754 time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) { 1755 time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1755 RT6_TRACE("aging clone %p\n", rt); 1756 RT6_TRACE("aging clone %p\n", rt);
1756 return -1; 1757 return -1;
1757 } else if (rt->rt6i_flags & RTF_GATEWAY) { 1758 } else if (rt->rt6i_flags & RTF_GATEWAY) {
@@ -1769,21 +1770,20 @@ static int fib6_age(struct rt6_info *rt, void *arg)
1769 return -1; 1770 return -1;
1770 } 1771 }
1771 } 1772 }
1772 gc_args.more++; 1773 gc_args->more++;
1773 } 1774 }
1774 1775
1775 return 0; 1776 return 0;
1776} 1777}
1777 1778
1778static DEFINE_SPINLOCK(fib6_gc_lock);
1779
1780void fib6_run_gc(unsigned long expires, struct net *net, bool force) 1779void fib6_run_gc(unsigned long expires, struct net *net, bool force)
1781{ 1780{
1781 struct fib6_gc_args gc_args;
1782 unsigned long now; 1782 unsigned long now;
1783 1783
1784 if (force) { 1784 if (force) {
1785 spin_lock_bh(&fib6_gc_lock); 1785 spin_lock_bh(&net->ipv6.fib6_gc_lock);
1786 } else if (!spin_trylock_bh(&fib6_gc_lock)) { 1786 } else if (!spin_trylock_bh(&net->ipv6.fib6_gc_lock)) {
1787 mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); 1787 mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
1788 return; 1788 return;
1789 } 1789 }
@@ -1792,7 +1792,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
1792 1792
1793 gc_args.more = icmp6_dst_gc(); 1793 gc_args.more = icmp6_dst_gc();
1794 1794
1795 fib6_clean_all(net, fib6_age, NULL); 1795 fib6_clean_all(net, fib6_age, &gc_args);
1796 now = jiffies; 1796 now = jiffies;
1797 net->ipv6.ip6_rt_last_gc = now; 1797 net->ipv6.ip6_rt_last_gc = now;
1798 1798
@@ -1802,7 +1802,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
1802 + net->ipv6.sysctl.ip6_rt_gc_interval)); 1802 + net->ipv6.sysctl.ip6_rt_gc_interval));
1803 else 1803 else
1804 del_timer(&net->ipv6.ip6_fib_timer); 1804 del_timer(&net->ipv6.ip6_fib_timer);
1805 spin_unlock_bh(&fib6_gc_lock); 1805 spin_unlock_bh(&net->ipv6.fib6_gc_lock);
1806} 1806}
1807 1807
1808static void fib6_gc_timer_cb(unsigned long arg) 1808static void fib6_gc_timer_cb(unsigned long arg)
@@ -1814,6 +1814,9 @@ static int __net_init fib6_net_init(struct net *net)
1814{ 1814{
1815 size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ; 1815 size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
1816 1816
1817 spin_lock_init(&net->ipv6.fib6_gc_lock);
1818 rwlock_init(&net->ipv6.fib6_walker_lock);
1819 INIT_LIST_HEAD(&net->ipv6.fib6_walkers);
1817 setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net); 1820 setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net);
1818 1821
1819 net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); 1822 net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
@@ -1974,7 +1977,8 @@ static int ipv6_route_yield(struct fib6_walker *w)
1974 return 0; 1977 return 0;
1975} 1978}
1976 1979
1977static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter) 1980static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter,
1981 struct net *net)
1978{ 1982{
1979 memset(&iter->w, 0, sizeof(iter->w)); 1983 memset(&iter->w, 0, sizeof(iter->w));
1980 iter->w.func = ipv6_route_yield; 1984 iter->w.func = ipv6_route_yield;
@@ -1984,7 +1988,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter)
1984 iter->w.args = iter; 1988 iter->w.args = iter;
1985 iter->sernum = iter->w.root->fn_sernum; 1989 iter->sernum = iter->w.root->fn_sernum;
1986 INIT_LIST_HEAD(&iter->w.lh); 1990 INIT_LIST_HEAD(&iter->w.lh);
1987 fib6_walker_link(&iter->w); 1991 fib6_walker_link(net, &iter->w);
1988} 1992}
1989 1993
1990static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl, 1994static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
@@ -2045,16 +2049,16 @@ iter_table:
2045 ++*pos; 2049 ++*pos;
2046 return iter->w.leaf; 2050 return iter->w.leaf;
2047 } else if (r < 0) { 2051 } else if (r < 0) {
2048 fib6_walker_unlink(&iter->w); 2052 fib6_walker_unlink(net, &iter->w);
2049 return NULL; 2053 return NULL;
2050 } 2054 }
2051 fib6_walker_unlink(&iter->w); 2055 fib6_walker_unlink(net, &iter->w);
2052 2056
2053 iter->tbl = ipv6_route_seq_next_table(iter->tbl, net); 2057 iter->tbl = ipv6_route_seq_next_table(iter->tbl, net);
2054 if (!iter->tbl) 2058 if (!iter->tbl)
2055 return NULL; 2059 return NULL;
2056 2060
2057 ipv6_route_seq_setup_walk(iter); 2061 ipv6_route_seq_setup_walk(iter, net);
2058 goto iter_table; 2062 goto iter_table;
2059} 2063}
2060 2064
@@ -2069,7 +2073,7 @@ static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
2069 iter->skip = *pos; 2073 iter->skip = *pos;
2070 2074
2071 if (iter->tbl) { 2075 if (iter->tbl) {
2072 ipv6_route_seq_setup_walk(iter); 2076 ipv6_route_seq_setup_walk(iter, net);
2073 return ipv6_route_seq_next(seq, NULL, pos); 2077 return ipv6_route_seq_next(seq, NULL, pos);
2074 } else { 2078 } else {
2075 return NULL; 2079 return NULL;
@@ -2085,10 +2089,11 @@ static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
2085static void ipv6_route_seq_stop(struct seq_file *seq, void *v) 2089static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
2086 __releases(RCU_BH) 2090 __releases(RCU_BH)
2087{ 2091{
2092 struct net *net = seq_file_net(seq);
2088 struct ipv6_route_iter *iter = seq->private; 2093 struct ipv6_route_iter *iter = seq->private;
2089 2094
2090 if (ipv6_route_iter_active(iter)) 2095 if (ipv6_route_iter_active(iter))
2091 fib6_walker_unlink(&iter->w); 2096 fib6_walker_unlink(net, &iter->w);
2092 2097
2093 rcu_read_unlock_bh(); 2098 rcu_read_unlock_bh();
2094} 2099}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index c0d4dc1c5ea4..4e636e60a360 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -360,7 +360,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
360 struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); 360 struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
361 361
362 ip6gre_tunnel_unlink(ign, t); 362 ip6gre_tunnel_unlink(ign, t);
363 ip6_tnl_dst_reset(t); 363 dst_cache_reset(&t->dst_cache);
364 dev_put(dev); 364 dev_put(dev);
365} 365}
366 366
@@ -633,7 +633,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
633 } 633 }
634 634
635 if (!fl6->flowi6_mark) 635 if (!fl6->flowi6_mark)
636 dst = ip6_tnl_dst_get(tunnel); 636 dst = dst_cache_get(&tunnel->dst_cache);
637 637
638 if (!dst) { 638 if (!dst) {
639 dst = ip6_route_output(net, NULL, fl6); 639 dst = ip6_route_output(net, NULL, fl6);
@@ -702,7 +702,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
702 } 702 }
703 703
704 if (!fl6->flowi6_mark && ndst) 704 if (!fl6->flowi6_mark && ndst)
705 ip6_tnl_dst_set(tunnel, ndst); 705 dst_cache_set_ip6(&tunnel->dst_cache, ndst, &fl6->saddr);
706 skb_dst_set(skb, dst); 706 skb_dst_set(skb, dst);
707 707
708 proto = NEXTHDR_GRE; 708 proto = NEXTHDR_GRE;
@@ -1011,7 +1011,7 @@ static int ip6gre_tnl_change(struct ip6_tnl *t,
1011 t->parms.o_key = p->o_key; 1011 t->parms.o_key = p->o_key;
1012 t->parms.i_flags = p->i_flags; 1012 t->parms.i_flags = p->i_flags;
1013 t->parms.o_flags = p->o_flags; 1013 t->parms.o_flags = p->o_flags;
1014 ip6_tnl_dst_reset(t); 1014 dst_cache_reset(&t->dst_cache);
1015 ip6gre_tnl_link_config(t, set_mtu); 1015 ip6gre_tnl_link_config(t, set_mtu);
1016 return 0; 1016 return 0;
1017} 1017}
@@ -1221,7 +1221,7 @@ static void ip6gre_dev_free(struct net_device *dev)
1221{ 1221{
1222 struct ip6_tnl *t = netdev_priv(dev); 1222 struct ip6_tnl *t = netdev_priv(dev);
1223 1223
1224 ip6_tnl_dst_destroy(t); 1224 dst_cache_destroy(&t->dst_cache);
1225 free_percpu(dev->tstats); 1225 free_percpu(dev->tstats);
1226 free_netdev(dev); 1226 free_netdev(dev);
1227} 1227}
@@ -1259,7 +1259,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
1259 if (!dev->tstats) 1259 if (!dev->tstats)
1260 return -ENOMEM; 1260 return -ENOMEM;
1261 1261
1262 ret = ip6_tnl_dst_init(tunnel); 1262 ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1263 if (ret) { 1263 if (ret) {
1264 free_percpu(dev->tstats); 1264 free_percpu(dev->tstats);
1265 dev->tstats = NULL; 1265 dev->tstats = NULL;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 9075acf081dd..c05c425c2389 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -49,7 +49,7 @@
49 49
50int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 50int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
51{ 51{
52 if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { 52 if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
53 const struct inet6_protocol *ipprot; 53 const struct inet6_protocol *ipprot;
54 54
55 ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); 55 ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
@@ -134,6 +134,16 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
134 IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1) 134 IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1)
135 goto err; 135 goto err;
136 136
137 /* If enabled, drop unicast packets that were encapsulated in link-layer
138 * multicast or broadcast to protected against the so-called "hole-196"
139 * attack in 802.11 wireless.
140 */
141 if (!ipv6_addr_is_multicast(&hdr->daddr) &&
142 (skb->pkt_type == PACKET_BROADCAST ||
143 skb->pkt_type == PACKET_MULTICAST) &&
144 idev->cnf.drop_unicast_in_l2_multicast)
145 goto err;
146
137 /* RFC4291 2.7 147 /* RFC4291 2.7
138 * Nodes must not originate a packet to a multicast address whose scope 148 * Nodes must not originate a packet to a multicast address whose scope
139 * field contains the reserved value 0; if such a packet is received, it 149 * field contains the reserved value 0; if such a packet is received, it
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index eeca943f12dc..82e9f3076028 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -258,6 +258,19 @@ out:
258 return pp; 258 return pp;
259} 259}
260 260
261static struct sk_buff **sit_gro_receive(struct sk_buff **head,
262 struct sk_buff *skb)
263{
264 if (NAPI_GRO_CB(skb)->encap_mark) {
265 NAPI_GRO_CB(skb)->flush = 1;
266 return NULL;
267 }
268
269 NAPI_GRO_CB(skb)->encap_mark = 1;
270
271 return ipv6_gro_receive(head, skb);
272}
273
261static int ipv6_gro_complete(struct sk_buff *skb, int nhoff) 274static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
262{ 275{
263 const struct net_offload *ops; 276 const struct net_offload *ops;
@@ -302,7 +315,7 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {
302static const struct net_offload sit_offload = { 315static const struct net_offload sit_offload = {
303 .callbacks = { 316 .callbacks = {
304 .gso_segment = ipv6_gso_segment, 317 .gso_segment = ipv6_gso_segment,
305 .gro_receive = ipv6_gro_receive, 318 .gro_receive = sit_gro_receive,
306 .gro_complete = sit_gro_complete, 319 .gro_complete = sit_gro_complete,
307 }, 320 },
308}; 321};
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a163102f1803..bc972e7152c7 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -332,7 +332,6 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
332static inline int ip6_forward_finish(struct net *net, struct sock *sk, 332static inline int ip6_forward_finish(struct net *net, struct sock *sk,
333 struct sk_buff *skb) 333 struct sk_buff *skb)
334{ 334{
335 skb_sender_cpu_clear(skb);
336 return dst_output(net, sk, skb); 335 return dst_output(net, sk, skb);
337} 336}
338 337
@@ -1091,8 +1090,8 @@ static inline int ip6_ufo_append_data(struct sock *sk,
1091 int getfrag(void *from, char *to, int offset, int len, 1090 int getfrag(void *from, char *to, int offset, int len,
1092 int odd, struct sk_buff *skb), 1091 int odd, struct sk_buff *skb),
1093 void *from, int length, int hh_len, int fragheaderlen, 1092 void *from, int length, int hh_len, int fragheaderlen,
1094 int transhdrlen, int mtu, unsigned int flags, 1093 int exthdrlen, int transhdrlen, int mtu,
1095 const struct flowi6 *fl6) 1094 unsigned int flags, const struct flowi6 *fl6)
1096 1095
1097{ 1096{
1098 struct sk_buff *skb; 1097 struct sk_buff *skb;
@@ -1117,7 +1116,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
1117 skb_put(skb, fragheaderlen + transhdrlen); 1116 skb_put(skb, fragheaderlen + transhdrlen);
1118 1117
1119 /* initialize network header pointer */ 1118 /* initialize network header pointer */
1120 skb_reset_network_header(skb); 1119 skb_set_network_header(skb, exthdrlen);
1121 1120
1122 /* initialize protocol header pointer */ 1121 /* initialize protocol header pointer */
1123 skb->transport_header = skb->network_header + fragheaderlen; 1122 skb->transport_header = skb->network_header + fragheaderlen;
@@ -1359,7 +1358,7 @@ emsgsize:
1359 (rt->dst.dev->features & NETIF_F_UFO) && 1358 (rt->dst.dev->features & NETIF_F_UFO) &&
1360 (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { 1359 (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
1361 err = ip6_ufo_append_data(sk, queue, getfrag, from, length, 1360 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
1362 hh_len, fragheaderlen, 1361 hh_len, fragheaderlen, exthdrlen,
1363 transhdrlen, mtu, flags, fl6); 1362 transhdrlen, mtu, flags, fl6);
1364 if (err) 1363 if (err)
1365 goto error; 1364 goto error;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 6c5dfec7a377..1f20345cbc97 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -122,97 +122,6 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
122 return &dev->stats; 122 return &dev->stats;
123} 123}
124 124
125/*
126 * Locking : hash tables are protected by RCU and RTNL
127 */
128
129static void ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst,
130 struct dst_entry *dst)
131{
132 write_seqlock_bh(&idst->lock);
133 dst_release(rcu_dereference_protected(
134 idst->dst,
135 lockdep_is_held(&idst->lock.lock)));
136 if (dst) {
137 dst_hold(dst);
138 idst->cookie = rt6_get_cookie((struct rt6_info *)dst);
139 } else {
140 idst->cookie = 0;
141 }
142 rcu_assign_pointer(idst->dst, dst);
143 write_sequnlock_bh(&idst->lock);
144}
145
146struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t)
147{
148 struct ip6_tnl_dst *idst;
149 struct dst_entry *dst;
150 unsigned int seq;
151 u32 cookie;
152
153 idst = raw_cpu_ptr(t->dst_cache);
154
155 rcu_read_lock();
156 do {
157 seq = read_seqbegin(&idst->lock);
158 dst = rcu_dereference(idst->dst);
159 cookie = idst->cookie;
160 } while (read_seqretry(&idst->lock, seq));
161
162 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
163 dst = NULL;
164 rcu_read_unlock();
165
166 if (dst && dst->obsolete && !dst->ops->check(dst, cookie)) {
167 ip6_tnl_per_cpu_dst_set(idst, NULL);
168 dst_release(dst);
169 dst = NULL;
170 }
171 return dst;
172}
173EXPORT_SYMBOL_GPL(ip6_tnl_dst_get);
174
175void ip6_tnl_dst_reset(struct ip6_tnl *t)
176{
177 int i;
178
179 for_each_possible_cpu(i)
180 ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
181}
182EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
183
184void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst)
185{
186 ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), dst);
187
188}
189EXPORT_SYMBOL_GPL(ip6_tnl_dst_set);
190
191void ip6_tnl_dst_destroy(struct ip6_tnl *t)
192{
193 if (!t->dst_cache)
194 return;
195
196 ip6_tnl_dst_reset(t);
197 free_percpu(t->dst_cache);
198}
199EXPORT_SYMBOL_GPL(ip6_tnl_dst_destroy);
200
201int ip6_tnl_dst_init(struct ip6_tnl *t)
202{
203 int i;
204
205 t->dst_cache = alloc_percpu(struct ip6_tnl_dst);
206 if (!t->dst_cache)
207 return -ENOMEM;
208
209 for_each_possible_cpu(i)
210 seqlock_init(&per_cpu_ptr(t->dst_cache, i)->lock);
211
212 return 0;
213}
214EXPORT_SYMBOL_GPL(ip6_tnl_dst_init);
215
216/** 125/**
217 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses 126 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
218 * @remote: the address of the tunnel exit-point 127 * @remote: the address of the tunnel exit-point
@@ -329,7 +238,7 @@ static void ip6_dev_free(struct net_device *dev)
329{ 238{
330 struct ip6_tnl *t = netdev_priv(dev); 239 struct ip6_tnl *t = netdev_priv(dev);
331 240
332 ip6_tnl_dst_destroy(t); 241 dst_cache_destroy(&t->dst_cache);
333 free_percpu(dev->tstats); 242 free_percpu(dev->tstats);
334 free_netdev(dev); 243 free_netdev(dev);
335} 244}
@@ -343,12 +252,12 @@ static int ip6_tnl_create2(struct net_device *dev)
343 252
344 t = netdev_priv(dev); 253 t = netdev_priv(dev);
345 254
255 dev->rtnl_link_ops = &ip6_link_ops;
346 err = register_netdevice(dev); 256 err = register_netdevice(dev);
347 if (err < 0) 257 if (err < 0)
348 goto out; 258 goto out;
349 259
350 strcpy(t->parms.name, dev->name); 260 strcpy(t->parms.name, dev->name);
351 dev->rtnl_link_ops = &ip6_link_ops;
352 261
353 dev_hold(dev); 262 dev_hold(dev);
354 ip6_tnl_link(ip6n, t); 263 ip6_tnl_link(ip6n, t);
@@ -462,7 +371,7 @@ ip6_tnl_dev_uninit(struct net_device *dev)
462 RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); 371 RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
463 else 372 else
464 ip6_tnl_unlink(ip6n, t); 373 ip6_tnl_unlink(ip6n, t);
465 ip6_tnl_dst_reset(t); 374 dst_cache_reset(&t->dst_cache);
466 dev_put(dev); 375 dev_put(dev);
467} 376}
468 377
@@ -1069,7 +978,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1069 memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); 978 memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
1070 neigh_release(neigh); 979 neigh_release(neigh);
1071 } else if (!fl6->flowi6_mark) 980 } else if (!fl6->flowi6_mark)
1072 dst = ip6_tnl_dst_get(t); 981 dst = dst_cache_get(&t->dst_cache);
1073 982
1074 if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) 983 if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
1075 goto tx_err_link_failure; 984 goto tx_err_link_failure;
@@ -1133,7 +1042,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1133 } 1042 }
1134 1043
1135 if (!fl6->flowi6_mark && ndst) 1044 if (!fl6->flowi6_mark && ndst)
1136 ip6_tnl_dst_set(t, ndst); 1045 dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
1137 skb_dst_set(skb, dst); 1046 skb_dst_set(skb, dst);
1138 1047
1139 skb->transport_header = skb->network_header; 1048 skb->transport_header = skb->network_header;
@@ -1368,7 +1277,7 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
1368 t->parms.flowinfo = p->flowinfo; 1277 t->parms.flowinfo = p->flowinfo;
1369 t->parms.link = p->link; 1278 t->parms.link = p->link;
1370 t->parms.proto = p->proto; 1279 t->parms.proto = p->proto;
1371 ip6_tnl_dst_reset(t); 1280 dst_cache_reset(&t->dst_cache);
1372 ip6_tnl_link_config(t); 1281 ip6_tnl_link_config(t);
1373 return 0; 1282 return 0;
1374} 1283}
@@ -1639,7 +1548,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
1639 if (!dev->tstats) 1548 if (!dev->tstats)
1640 return -ENOMEM; 1549 return -ENOMEM;
1641 1550
1642 ret = ip6_tnl_dst_init(t); 1551 ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
1643 if (ret) { 1552 if (ret) {
1644 free_percpu(dev->tstats); 1553 free_percpu(dev->tstats);
1645 dev->tstats = NULL; 1554 dev->tstats = NULL;
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index 14dacf1df529..a7520528ecd2 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -73,8 +73,8 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
73 struct sk_buff *skb, 73 struct sk_buff *skb,
74 struct net_device *dev, struct in6_addr *saddr, 74 struct net_device *dev, struct in6_addr *saddr,
75 struct in6_addr *daddr, 75 struct in6_addr *daddr,
76 __u8 prio, __u8 ttl, __be16 src_port, 76 __u8 prio, __u8 ttl, __be32 label,
77 __be16 dst_port, bool nocheck) 77 __be16 src_port, __be16 dst_port, bool nocheck)
78{ 78{
79 struct udphdr *uh; 79 struct udphdr *uh;
80 struct ipv6hdr *ip6h; 80 struct ipv6hdr *ip6h;
@@ -98,7 +98,7 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
98 __skb_push(skb, sizeof(*ip6h)); 98 __skb_push(skb, sizeof(*ip6h));
99 skb_reset_network_header(skb); 99 skb_reset_network_header(skb);
100 ip6h = ipv6_hdr(skb); 100 ip6h = ipv6_hdr(skb);
101 ip6_flow_hdr(ip6h, prio, htonl(0)); 101 ip6_flow_hdr(ip6h, prio, label);
102 ip6h->payload_len = htons(skb->len); 102 ip6h->payload_len = htons(skb->len);
103 ip6h->nexthdr = IPPROTO_UDP; 103 ip6h->nexthdr = IPPROTO_UDP;
104 ip6h->hop_limit = ttl; 104 ip6h->hop_limit = ttl;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 0a8610b33d79..d90a11f14040 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -640,7 +640,7 @@ vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
640 t->parms.i_key = p->i_key; 640 t->parms.i_key = p->i_key;
641 t->parms.o_key = p->o_key; 641 t->parms.o_key = p->o_key;
642 t->parms.proto = p->proto; 642 t->parms.proto = p->proto;
643 ip6_tnl_dst_reset(t); 643 dst_cache_reset(&t->dst_cache);
644 vti6_link_config(t); 644 vti6_link_config(t);
645 return 0; 645 return 0;
646} 646}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 84afb9a77278..c245895a3d41 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -883,6 +883,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
883 offsetof(struct nd_msg, opt)); 883 offsetof(struct nd_msg, opt));
884 struct ndisc_options ndopts; 884 struct ndisc_options ndopts;
885 struct net_device *dev = skb->dev; 885 struct net_device *dev = skb->dev;
886 struct inet6_dev *idev = __in6_dev_get(dev);
886 struct inet6_ifaddr *ifp; 887 struct inet6_ifaddr *ifp;
887 struct neighbour *neigh; 888 struct neighbour *neigh;
888 889
@@ -902,6 +903,14 @@ static void ndisc_recv_na(struct sk_buff *skb)
902 return; 903 return;
903 } 904 }
904 905
906 /* For some 802.11 wireless deployments (and possibly other networks),
907 * there will be a NA proxy and unsolicitd packets are attacks
908 * and thus should not be accepted.
909 */
910 if (!msg->icmph.icmp6_solicited && idev &&
911 idev->cnf.drop_unsolicited_na)
912 return;
913
905 if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) { 914 if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
906 ND_PRINTK(2, warn, "NS: invalid ND option\n"); 915 ND_PRINTK(2, warn, "NS: invalid ND option\n");
907 return; 916 return;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 99425cf2819b..86b67b70b626 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -198,11 +198,12 @@ get_entry(const void *base, unsigned int offset)
198 198
199/* All zeroes == unconditional rule. */ 199/* All zeroes == unconditional rule. */
200/* Mildly perf critical (only if packet tracing is on) */ 200/* Mildly perf critical (only if packet tracing is on) */
201static inline bool unconditional(const struct ip6t_ip6 *ipv6) 201static inline bool unconditional(const struct ip6t_entry *e)
202{ 202{
203 static const struct ip6t_ip6 uncond; 203 static const struct ip6t_ip6 uncond;
204 204
205 return memcmp(ipv6, &uncond, sizeof(uncond)) == 0; 205 return e->target_offset == sizeof(struct ip6t_entry) &&
206 memcmp(&e->ipv6, &uncond, sizeof(uncond)) == 0;
206} 207}
207 208
208static inline const struct xt_entry_target * 209static inline const struct xt_entry_target *
@@ -258,11 +259,10 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e,
258 } else if (s == e) { 259 } else if (s == e) {
259 (*rulenum)++; 260 (*rulenum)++;
260 261
261 if (s->target_offset == sizeof(struct ip6t_entry) && 262 if (unconditional(s) &&
262 strcmp(t->target.u.kernel.target->name, 263 strcmp(t->target.u.kernel.target->name,
263 XT_STANDARD_TARGET) == 0 && 264 XT_STANDARD_TARGET) == 0 &&
264 t->verdict < 0 && 265 t->verdict < 0) {
265 unconditional(&s->ipv6)) {
266 /* Tail of chains: STANDARD target (return/policy) */ 266 /* Tail of chains: STANDARD target (return/policy) */
267 *comment = *chainname == hookname 267 *comment = *chainname == hookname
268 ? comments[NF_IP6_TRACE_COMMENT_POLICY] 268 ? comments[NF_IP6_TRACE_COMMENT_POLICY]
@@ -488,11 +488,10 @@ mark_source_chains(const struct xt_table_info *newinfo,
488 e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); 488 e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
489 489
490 /* Unconditional return/END. */ 490 /* Unconditional return/END. */
491 if ((e->target_offset == sizeof(struct ip6t_entry) && 491 if ((unconditional(e) &&
492 (strcmp(t->target.u.user.name, 492 (strcmp(t->target.u.user.name,
493 XT_STANDARD_TARGET) == 0) && 493 XT_STANDARD_TARGET) == 0) &&
494 t->verdict < 0 && 494 t->verdict < 0) || visited) {
495 unconditional(&e->ipv6)) || visited) {
496 unsigned int oldpos, size; 495 unsigned int oldpos, size;
497 496
498 if ((strcmp(t->target.u.user.name, 497 if ((strcmp(t->target.u.user.name,
@@ -581,14 +580,12 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net)
581} 580}
582 581
583static int 582static int
584check_entry(const struct ip6t_entry *e, const char *name) 583check_entry(const struct ip6t_entry *e)
585{ 584{
586 const struct xt_entry_target *t; 585 const struct xt_entry_target *t;
587 586
588 if (!ip6_checkentry(&e->ipv6)) { 587 if (!ip6_checkentry(&e->ipv6))
589 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
590 return -EINVAL; 588 return -EINVAL;
591 }
592 589
593 if (e->target_offset + sizeof(struct xt_entry_target) > 590 if (e->target_offset + sizeof(struct xt_entry_target) >
594 e->next_offset) 591 e->next_offset)
@@ -679,10 +676,6 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
679 struct xt_mtchk_param mtpar; 676 struct xt_mtchk_param mtpar;
680 struct xt_entry_match *ematch; 677 struct xt_entry_match *ematch;
681 678
682 ret = check_entry(e, name);
683 if (ret)
684 return ret;
685
686 e->counters.pcnt = xt_percpu_counter_alloc(); 679 e->counters.pcnt = xt_percpu_counter_alloc();
687 if (IS_ERR_VALUE(e->counters.pcnt)) 680 if (IS_ERR_VALUE(e->counters.pcnt))
688 return -ENOMEM; 681 return -ENOMEM;
@@ -733,7 +726,7 @@ static bool check_underflow(const struct ip6t_entry *e)
733 const struct xt_entry_target *t; 726 const struct xt_entry_target *t;
734 unsigned int verdict; 727 unsigned int verdict;
735 728
736 if (!unconditional(&e->ipv6)) 729 if (!unconditional(e))
737 return false; 730 return false;
738 t = ip6t_get_target_c(e); 731 t = ip6t_get_target_c(e);
739 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) 732 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
@@ -753,9 +746,11 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
753 unsigned int valid_hooks) 746 unsigned int valid_hooks)
754{ 747{
755 unsigned int h; 748 unsigned int h;
749 int err;
756 750
757 if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 || 751 if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 ||
758 (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) { 752 (unsigned char *)e + sizeof(struct ip6t_entry) >= limit ||
753 (unsigned char *)e + e->next_offset > limit) {
759 duprintf("Bad offset %p\n", e); 754 duprintf("Bad offset %p\n", e);
760 return -EINVAL; 755 return -EINVAL;
761 } 756 }
@@ -767,6 +762,10 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
767 return -EINVAL; 762 return -EINVAL;
768 } 763 }
769 764
765 err = check_entry(e);
766 if (err)
767 return err;
768
770 /* Check hooks & underflows */ 769 /* Check hooks & underflows */
771 for (h = 0; h < NF_INET_NUMHOOKS; h++) { 770 for (h = 0; h < NF_INET_NUMHOOKS; h++) {
772 if (!(valid_hooks & (1 << h))) 771 if (!(valid_hooks & (1 << h)))
@@ -775,9 +774,9 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
775 newinfo->hook_entry[h] = hook_entries[h]; 774 newinfo->hook_entry[h] = hook_entries[h];
776 if ((unsigned char *)e - base == underflows[h]) { 775 if ((unsigned char *)e - base == underflows[h]) {
777 if (!check_underflow(e)) { 776 if (!check_underflow(e)) {
778 pr_err("Underflows must be unconditional and " 777 pr_debug("Underflows must be unconditional and "
779 "use the STANDARD target with " 778 "use the STANDARD target with "
780 "ACCEPT/DROP\n"); 779 "ACCEPT/DROP\n");
781 return -EINVAL; 780 return -EINVAL;
782 } 781 }
783 newinfo->underflow[h] = underflows[h]; 782 newinfo->underflow[h] = underflows[h];
@@ -1169,6 +1168,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
1169 *len, sizeof(get) + get.size); 1168 *len, sizeof(get) + get.size);
1170 return -EINVAL; 1169 return -EINVAL;
1171 } 1170 }
1171 get.name[sizeof(get.name) - 1] = '\0';
1172 1172
1173 t = xt_find_table_lock(net, AF_INET6, get.name); 1173 t = xt_find_table_lock(net, AF_INET6, get.name);
1174 if (!IS_ERR_OR_NULL(t)) { 1174 if (!IS_ERR_OR_NULL(t)) {
@@ -1505,7 +1505,8 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
1505 1505
1506 duprintf("check_compat_entry_size_and_hooks %p\n", e); 1506 duprintf("check_compat_entry_size_and_hooks %p\n", e);
1507 if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || 1507 if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 ||
1508 (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) { 1508 (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit ||
1509 (unsigned char *)e + e->next_offset > limit) {
1509 duprintf("Bad offset %p, limit = %p\n", e, limit); 1510 duprintf("Bad offset %p, limit = %p\n", e, limit);
1510 return -EINVAL; 1511 return -EINVAL;
1511 } 1512 }
@@ -1518,7 +1519,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
1518 } 1519 }
1519 1520
1520 /* For purposes of check_entry casting the compat entry is fine */ 1521 /* For purposes of check_entry casting the compat entry is fine */
1521 ret = check_entry((struct ip6t_entry *)e, name); 1522 ret = check_entry((struct ip6t_entry *)e);
1522 if (ret) 1523 if (ret)
1523 return ret; 1524 return ret;
1524 1525
@@ -1944,6 +1945,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
1944 *len, sizeof(get) + get.size); 1945 *len, sizeof(get) + get.size);
1945 return -EINVAL; 1946 return -EINVAL;
1946 } 1947 }
1948 get.name[sizeof(get.name) - 1] = '\0';
1947 1949
1948 xt_compat_lock(AF_INET6); 1950 xt_compat_lock(AF_INET6);
1949 t = xt_find_table_lock(net, AF_INET6, get.name); 1951 t = xt_find_table_lock(net, AF_INET6, get.name);
@@ -2071,9 +2073,28 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2071 return ret; 2073 return ret;
2072} 2074}
2073 2075
2074struct xt_table *ip6t_register_table(struct net *net, 2076static void __ip6t_unregister_table(struct net *net, struct xt_table *table)
2075 const struct xt_table *table, 2077{
2076 const struct ip6t_replace *repl) 2078 struct xt_table_info *private;
2079 void *loc_cpu_entry;
2080 struct module *table_owner = table->me;
2081 struct ip6t_entry *iter;
2082
2083 private = xt_unregister_table(table);
2084
2085 /* Decrease module usage counts and free resources */
2086 loc_cpu_entry = private->entries;
2087 xt_entry_foreach(iter, loc_cpu_entry, private->size)
2088 cleanup_entry(iter, net);
2089 if (private->number > private->initial_entries)
2090 module_put(table_owner);
2091 xt_free_table_info(private);
2092}
2093
2094int ip6t_register_table(struct net *net, const struct xt_table *table,
2095 const struct ip6t_replace *repl,
2096 const struct nf_hook_ops *ops,
2097 struct xt_table **res)
2077{ 2098{
2078 int ret; 2099 int ret;
2079 struct xt_table_info *newinfo; 2100 struct xt_table_info *newinfo;
@@ -2082,10 +2103,8 @@ struct xt_table *ip6t_register_table(struct net *net,
2082 struct xt_table *new_table; 2103 struct xt_table *new_table;
2083 2104
2084 newinfo = xt_alloc_table_info(repl->size); 2105 newinfo = xt_alloc_table_info(repl->size);
2085 if (!newinfo) { 2106 if (!newinfo)
2086 ret = -ENOMEM; 2107 return -ENOMEM;
2087 goto out;
2088 }
2089 2108
2090 loc_cpu_entry = newinfo->entries; 2109 loc_cpu_entry = newinfo->entries;
2091 memcpy(loc_cpu_entry, repl->entries, repl->size); 2110 memcpy(loc_cpu_entry, repl->entries, repl->size);
@@ -2099,30 +2118,28 @@ struct xt_table *ip6t_register_table(struct net *net,
2099 ret = PTR_ERR(new_table); 2118 ret = PTR_ERR(new_table);
2100 goto out_free; 2119 goto out_free;
2101 } 2120 }
2102 return new_table; 2121
2122 /* set res now, will see skbs right after nf_register_net_hooks */
2123 WRITE_ONCE(*res, new_table);
2124
2125 ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
2126 if (ret != 0) {
2127 __ip6t_unregister_table(net, new_table);
2128 *res = NULL;
2129 }
2130
2131 return ret;
2103 2132
2104out_free: 2133out_free:
2105 xt_free_table_info(newinfo); 2134 xt_free_table_info(newinfo);
2106out: 2135 return ret;
2107 return ERR_PTR(ret);
2108} 2136}
2109 2137
2110void ip6t_unregister_table(struct net *net, struct xt_table *table) 2138void ip6t_unregister_table(struct net *net, struct xt_table *table,
2139 const struct nf_hook_ops *ops)
2111{ 2140{
2112 struct xt_table_info *private; 2141 nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
2113 void *loc_cpu_entry; 2142 __ip6t_unregister_table(net, table);
2114 struct module *table_owner = table->me;
2115 struct ip6t_entry *iter;
2116
2117 private = xt_unregister_table(table);
2118
2119 /* Decrease module usage counts and free resources */
2120 loc_cpu_entry = private->entries;
2121 xt_entry_foreach(iter, loc_cpu_entry, private->size)
2122 cleanup_entry(iter, net);
2123 if (private->number > private->initial_entries)
2124 module_put(table_owner);
2125 xt_free_table_info(private);
2126} 2143}
2127 2144
2128/* Returns 1 if the type and code is matched by the range, 0 otherwise */ 2145/* Returns 1 if the type and code is matched by the range, 0 otherwise */
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 8b277b983ca5..1343077dde93 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -22,12 +22,15 @@ MODULE_DESCRIPTION("ip6tables filter table");
22 (1 << NF_INET_FORWARD) | \ 22 (1 << NF_INET_FORWARD) | \
23 (1 << NF_INET_LOCAL_OUT)) 23 (1 << NF_INET_LOCAL_OUT))
24 24
25static int __net_init ip6table_filter_table_init(struct net *net);
26
25static const struct xt_table packet_filter = { 27static const struct xt_table packet_filter = {
26 .name = "filter", 28 .name = "filter",
27 .valid_hooks = FILTER_VALID_HOOKS, 29 .valid_hooks = FILTER_VALID_HOOKS,
28 .me = THIS_MODULE, 30 .me = THIS_MODULE,
29 .af = NFPROTO_IPV6, 31 .af = NFPROTO_IPV6,
30 .priority = NF_IP6_PRI_FILTER, 32 .priority = NF_IP6_PRI_FILTER,
33 .table_init = ip6table_filter_table_init,
31}; 34};
32 35
33/* The work comes in here from netfilter.c. */ 36/* The work comes in here from netfilter.c. */
@@ -44,9 +47,13 @@ static struct nf_hook_ops *filter_ops __read_mostly;
44static bool forward = true; 47static bool forward = true;
45module_param(forward, bool, 0000); 48module_param(forward, bool, 0000);
46 49
47static int __net_init ip6table_filter_net_init(struct net *net) 50static int __net_init ip6table_filter_table_init(struct net *net)
48{ 51{
49 struct ip6t_replace *repl; 52 struct ip6t_replace *repl;
53 int err;
54
55 if (net->ipv6.ip6table_filter)
56 return 0;
50 57
51 repl = ip6t_alloc_initial_table(&packet_filter); 58 repl = ip6t_alloc_initial_table(&packet_filter);
52 if (repl == NULL) 59 if (repl == NULL)
@@ -55,15 +62,26 @@ static int __net_init ip6table_filter_net_init(struct net *net)
55 ((struct ip6t_standard *)repl->entries)[1].target.verdict = 62 ((struct ip6t_standard *)repl->entries)[1].target.verdict =
56 forward ? -NF_ACCEPT - 1 : -NF_DROP - 1; 63 forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
57 64
58 net->ipv6.ip6table_filter = 65 err = ip6t_register_table(net, &packet_filter, repl, filter_ops,
59 ip6t_register_table(net, &packet_filter, repl); 66 &net->ipv6.ip6table_filter);
60 kfree(repl); 67 kfree(repl);
61 return PTR_ERR_OR_ZERO(net->ipv6.ip6table_filter); 68 return err;
69}
70
71static int __net_init ip6table_filter_net_init(struct net *net)
72{
73 if (net == &init_net || !forward)
74 return ip6table_filter_table_init(net);
75
76 return 0;
62} 77}
63 78
64static void __net_exit ip6table_filter_net_exit(struct net *net) 79static void __net_exit ip6table_filter_net_exit(struct net *net)
65{ 80{
66 ip6t_unregister_table(net, net->ipv6.ip6table_filter); 81 if (!net->ipv6.ip6table_filter)
82 return;
83 ip6t_unregister_table(net, net->ipv6.ip6table_filter, filter_ops);
84 net->ipv6.ip6table_filter = NULL;
67} 85}
68 86
69static struct pernet_operations ip6table_filter_net_ops = { 87static struct pernet_operations ip6table_filter_net_ops = {
@@ -75,28 +93,21 @@ static int __init ip6table_filter_init(void)
75{ 93{
76 int ret; 94 int ret;
77 95
96 filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook);
97 if (IS_ERR(filter_ops))
98 return PTR_ERR(filter_ops);
99
78 ret = register_pernet_subsys(&ip6table_filter_net_ops); 100 ret = register_pernet_subsys(&ip6table_filter_net_ops);
79 if (ret < 0) 101 if (ret < 0)
80 return ret; 102 kfree(filter_ops);
81
82 /* Register hooks */
83 filter_ops = xt_hook_link(&packet_filter, ip6table_filter_hook);
84 if (IS_ERR(filter_ops)) {
85 ret = PTR_ERR(filter_ops);
86 goto cleanup_table;
87 }
88 103
89 return ret; 104 return ret;
90
91 cleanup_table:
92 unregister_pernet_subsys(&ip6table_filter_net_ops);
93 return ret;
94} 105}
95 106
96static void __exit ip6table_filter_fini(void) 107static void __exit ip6table_filter_fini(void)
97{ 108{
98 xt_hook_unlink(&packet_filter, filter_ops);
99 unregister_pernet_subsys(&ip6table_filter_net_ops); 109 unregister_pernet_subsys(&ip6table_filter_net_ops);
110 kfree(filter_ops);
100} 111}
101 112
102module_init(ip6table_filter_init); 113module_init(ip6table_filter_init);
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index abe278b07932..cb2b28883252 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -23,12 +23,15 @@ MODULE_DESCRIPTION("ip6tables mangle table");
23 (1 << NF_INET_LOCAL_OUT) | \ 23 (1 << NF_INET_LOCAL_OUT) | \
24 (1 << NF_INET_POST_ROUTING)) 24 (1 << NF_INET_POST_ROUTING))
25 25
26static int __net_init ip6table_mangle_table_init(struct net *net);
27
26static const struct xt_table packet_mangler = { 28static const struct xt_table packet_mangler = {
27 .name = "mangle", 29 .name = "mangle",
28 .valid_hooks = MANGLE_VALID_HOOKS, 30 .valid_hooks = MANGLE_VALID_HOOKS,
29 .me = THIS_MODULE, 31 .me = THIS_MODULE,
30 .af = NFPROTO_IPV6, 32 .af = NFPROTO_IPV6,
31 .priority = NF_IP6_PRI_MANGLE, 33 .priority = NF_IP6_PRI_MANGLE,
34 .table_init = ip6table_mangle_table_init,
32}; 35};
33 36
34static unsigned int 37static unsigned int
@@ -88,26 +91,33 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb,
88} 91}
89 92
90static struct nf_hook_ops *mangle_ops __read_mostly; 93static struct nf_hook_ops *mangle_ops __read_mostly;
91static int __net_init ip6table_mangle_net_init(struct net *net) 94static int __net_init ip6table_mangle_table_init(struct net *net)
92{ 95{
93 struct ip6t_replace *repl; 96 struct ip6t_replace *repl;
97 int ret;
98
99 if (net->ipv6.ip6table_mangle)
100 return 0;
94 101
95 repl = ip6t_alloc_initial_table(&packet_mangler); 102 repl = ip6t_alloc_initial_table(&packet_mangler);
96 if (repl == NULL) 103 if (repl == NULL)
97 return -ENOMEM; 104 return -ENOMEM;
98 net->ipv6.ip6table_mangle = 105 ret = ip6t_register_table(net, &packet_mangler, repl, mangle_ops,
99 ip6t_register_table(net, &packet_mangler, repl); 106 &net->ipv6.ip6table_mangle);
100 kfree(repl); 107 kfree(repl);
101 return PTR_ERR_OR_ZERO(net->ipv6.ip6table_mangle); 108 return ret;
102} 109}
103 110
104static void __net_exit ip6table_mangle_net_exit(struct net *net) 111static void __net_exit ip6table_mangle_net_exit(struct net *net)
105{ 112{
106 ip6t_unregister_table(net, net->ipv6.ip6table_mangle); 113 if (!net->ipv6.ip6table_mangle)
114 return;
115
116 ip6t_unregister_table(net, net->ipv6.ip6table_mangle, mangle_ops);
117 net->ipv6.ip6table_mangle = NULL;
107} 118}
108 119
109static struct pernet_operations ip6table_mangle_net_ops = { 120static struct pernet_operations ip6table_mangle_net_ops = {
110 .init = ip6table_mangle_net_init,
111 .exit = ip6table_mangle_net_exit, 121 .exit = ip6table_mangle_net_exit,
112}; 122};
113 123
@@ -115,28 +125,28 @@ static int __init ip6table_mangle_init(void)
115{ 125{
116 int ret; 126 int ret;
117 127
128 mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook);
129 if (IS_ERR(mangle_ops))
130 return PTR_ERR(mangle_ops);
131
118 ret = register_pernet_subsys(&ip6table_mangle_net_ops); 132 ret = register_pernet_subsys(&ip6table_mangle_net_ops);
119 if (ret < 0) 133 if (ret < 0) {
134 kfree(mangle_ops);
120 return ret; 135 return ret;
121
122 /* Register hooks */
123 mangle_ops = xt_hook_link(&packet_mangler, ip6table_mangle_hook);
124 if (IS_ERR(mangle_ops)) {
125 ret = PTR_ERR(mangle_ops);
126 goto cleanup_table;
127 } 136 }
128 137
129 return ret; 138 ret = ip6table_mangle_table_init(&init_net);
130 139 if (ret) {
131 cleanup_table: 140 unregister_pernet_subsys(&ip6table_mangle_net_ops);
132 unregister_pernet_subsys(&ip6table_mangle_net_ops); 141 kfree(mangle_ops);
142 }
133 return ret; 143 return ret;
134} 144}
135 145
136static void __exit ip6table_mangle_fini(void) 146static void __exit ip6table_mangle_fini(void)
137{ 147{
138 xt_hook_unlink(&packet_mangler, mangle_ops);
139 unregister_pernet_subsys(&ip6table_mangle_net_ops); 148 unregister_pernet_subsys(&ip6table_mangle_net_ops);
149 kfree(mangle_ops);
140} 150}
141 151
142module_init(ip6table_mangle_init); 152module_init(ip6table_mangle_init);
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index de2a10a565f5..7d2bd940291f 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -20,6 +20,8 @@
20#include <net/netfilter/nf_nat_core.h> 20#include <net/netfilter/nf_nat_core.h>
21#include <net/netfilter/nf_nat_l3proto.h> 21#include <net/netfilter/nf_nat_l3proto.h>
22 22
23static int __net_init ip6table_nat_table_init(struct net *net);
24
23static const struct xt_table nf_nat_ipv6_table = { 25static const struct xt_table nf_nat_ipv6_table = {
24 .name = "nat", 26 .name = "nat",
25 .valid_hooks = (1 << NF_INET_PRE_ROUTING) | 27 .valid_hooks = (1 << NF_INET_PRE_ROUTING) |
@@ -28,6 +30,7 @@ static const struct xt_table nf_nat_ipv6_table = {
28 (1 << NF_INET_LOCAL_IN), 30 (1 << NF_INET_LOCAL_IN),
29 .me = THIS_MODULE, 31 .me = THIS_MODULE,
30 .af = NFPROTO_IPV6, 32 .af = NFPROTO_IPV6,
33 .table_init = ip6table_nat_table_init,
31}; 34};
32 35
33static unsigned int ip6table_nat_do_chain(void *priv, 36static unsigned int ip6table_nat_do_chain(void *priv,
@@ -97,50 +100,50 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
97 }, 100 },
98}; 101};
99 102
100static int __net_init ip6table_nat_net_init(struct net *net) 103static int __net_init ip6table_nat_table_init(struct net *net)
101{ 104{
102 struct ip6t_replace *repl; 105 struct ip6t_replace *repl;
106 int ret;
107
108 if (net->ipv6.ip6table_nat)
109 return 0;
103 110
104 repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table); 111 repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table);
105 if (repl == NULL) 112 if (repl == NULL)
106 return -ENOMEM; 113 return -ENOMEM;
107 net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl); 114 ret = ip6t_register_table(net, &nf_nat_ipv6_table, repl,
115 nf_nat_ipv6_ops, &net->ipv6.ip6table_nat);
108 kfree(repl); 116 kfree(repl);
109 return PTR_ERR_OR_ZERO(net->ipv6.ip6table_nat); 117 return ret;
110} 118}
111 119
112static void __net_exit ip6table_nat_net_exit(struct net *net) 120static void __net_exit ip6table_nat_net_exit(struct net *net)
113{ 121{
114 ip6t_unregister_table(net, net->ipv6.ip6table_nat); 122 if (!net->ipv6.ip6table_nat)
123 return;
124 ip6t_unregister_table(net, net->ipv6.ip6table_nat, nf_nat_ipv6_ops);
125 net->ipv6.ip6table_nat = NULL;
115} 126}
116 127
117static struct pernet_operations ip6table_nat_net_ops = { 128static struct pernet_operations ip6table_nat_net_ops = {
118 .init = ip6table_nat_net_init,
119 .exit = ip6table_nat_net_exit, 129 .exit = ip6table_nat_net_exit,
120}; 130};
121 131
122static int __init ip6table_nat_init(void) 132static int __init ip6table_nat_init(void)
123{ 133{
124 int err; 134 int ret = register_pernet_subsys(&ip6table_nat_net_ops);
125 135
126 err = register_pernet_subsys(&ip6table_nat_net_ops); 136 if (ret)
127 if (err < 0) 137 return ret;
128 goto err1;
129 138
130 err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); 139 ret = ip6table_nat_table_init(&init_net);
131 if (err < 0) 140 if (ret)
132 goto err2; 141 unregister_pernet_subsys(&ip6table_nat_net_ops);
133 return 0; 142 return ret;
134
135err2:
136 unregister_pernet_subsys(&ip6table_nat_net_ops);
137err1:
138 return err;
139} 143}
140 144
141static void __exit ip6table_nat_exit(void) 145static void __exit ip6table_nat_exit(void)
142{ 146{
143 nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
144 unregister_pernet_subsys(&ip6table_nat_net_ops); 147 unregister_pernet_subsys(&ip6table_nat_net_ops);
145} 148}
146 149
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 9021963565c3..d4bc56443dc1 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -9,12 +9,15 @@
9 9
10#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) 10#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
11 11
12static int __net_init ip6table_raw_table_init(struct net *net);
13
12static const struct xt_table packet_raw = { 14static const struct xt_table packet_raw = {
13 .name = "raw", 15 .name = "raw",
14 .valid_hooks = RAW_VALID_HOOKS, 16 .valid_hooks = RAW_VALID_HOOKS,
15 .me = THIS_MODULE, 17 .me = THIS_MODULE,
16 .af = NFPROTO_IPV6, 18 .af = NFPROTO_IPV6,
17 .priority = NF_IP6_PRI_RAW, 19 .priority = NF_IP6_PRI_RAW,
20 .table_init = ip6table_raw_table_init,
18}; 21};
19 22
20/* The work comes in here from netfilter.c. */ 23/* The work comes in here from netfilter.c. */
@@ -27,26 +30,32 @@ ip6table_raw_hook(void *priv, struct sk_buff *skb,
27 30
28static struct nf_hook_ops *rawtable_ops __read_mostly; 31static struct nf_hook_ops *rawtable_ops __read_mostly;
29 32
30static int __net_init ip6table_raw_net_init(struct net *net) 33static int __net_init ip6table_raw_table_init(struct net *net)
31{ 34{
32 struct ip6t_replace *repl; 35 struct ip6t_replace *repl;
36 int ret;
37
38 if (net->ipv6.ip6table_raw)
39 return 0;
33 40
34 repl = ip6t_alloc_initial_table(&packet_raw); 41 repl = ip6t_alloc_initial_table(&packet_raw);
35 if (repl == NULL) 42 if (repl == NULL)
36 return -ENOMEM; 43 return -ENOMEM;
37 net->ipv6.ip6table_raw = 44 ret = ip6t_register_table(net, &packet_raw, repl, rawtable_ops,
38 ip6t_register_table(net, &packet_raw, repl); 45 &net->ipv6.ip6table_raw);
39 kfree(repl); 46 kfree(repl);
40 return PTR_ERR_OR_ZERO(net->ipv6.ip6table_raw); 47 return ret;
41} 48}
42 49
43static void __net_exit ip6table_raw_net_exit(struct net *net) 50static void __net_exit ip6table_raw_net_exit(struct net *net)
44{ 51{
45 ip6t_unregister_table(net, net->ipv6.ip6table_raw); 52 if (!net->ipv6.ip6table_raw)
53 return;
54 ip6t_unregister_table(net, net->ipv6.ip6table_raw, rawtable_ops);
55 net->ipv6.ip6table_raw = NULL;
46} 56}
47 57
48static struct pernet_operations ip6table_raw_net_ops = { 58static struct pernet_operations ip6table_raw_net_ops = {
49 .init = ip6table_raw_net_init,
50 .exit = ip6table_raw_net_exit, 59 .exit = ip6table_raw_net_exit,
51}; 60};
52 61
@@ -54,28 +63,29 @@ static int __init ip6table_raw_init(void)
54{ 63{
55 int ret; 64 int ret;
56 65
66 /* Register hooks */
67 rawtable_ops = xt_hook_ops_alloc(&packet_raw, ip6table_raw_hook);
68 if (IS_ERR(rawtable_ops))
69 return PTR_ERR(rawtable_ops);
70
57 ret = register_pernet_subsys(&ip6table_raw_net_ops); 71 ret = register_pernet_subsys(&ip6table_raw_net_ops);
58 if (ret < 0) 72 if (ret < 0) {
73 kfree(rawtable_ops);
59 return ret; 74 return ret;
60
61 /* Register hooks */
62 rawtable_ops = xt_hook_link(&packet_raw, ip6table_raw_hook);
63 if (IS_ERR(rawtable_ops)) {
64 ret = PTR_ERR(rawtable_ops);
65 goto cleanup_table;
66 } 75 }
67 76
68 return ret; 77 ret = ip6table_raw_table_init(&init_net);
69 78 if (ret) {
70 cleanup_table: 79 unregister_pernet_subsys(&ip6table_raw_net_ops);
71 unregister_pernet_subsys(&ip6table_raw_net_ops); 80 kfree(rawtable_ops);
81 }
72 return ret; 82 return ret;
73} 83}
74 84
75static void __exit ip6table_raw_fini(void) 85static void __exit ip6table_raw_fini(void)
76{ 86{
77 xt_hook_unlink(&packet_raw, rawtable_ops);
78 unregister_pernet_subsys(&ip6table_raw_net_ops); 87 unregister_pernet_subsys(&ip6table_raw_net_ops);
88 kfree(rawtable_ops);
79} 89}
80 90
81module_init(ip6table_raw_init); 91module_init(ip6table_raw_init);
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 0d856fedfeb0..cf26ccb04056 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -27,12 +27,15 @@ MODULE_DESCRIPTION("ip6tables security table, for MAC rules");
27 (1 << NF_INET_FORWARD) | \ 27 (1 << NF_INET_FORWARD) | \
28 (1 << NF_INET_LOCAL_OUT) 28 (1 << NF_INET_LOCAL_OUT)
29 29
30static int __net_init ip6table_security_table_init(struct net *net);
31
30static const struct xt_table security_table = { 32static const struct xt_table security_table = {
31 .name = "security", 33 .name = "security",
32 .valid_hooks = SECURITY_VALID_HOOKS, 34 .valid_hooks = SECURITY_VALID_HOOKS,
33 .me = THIS_MODULE, 35 .me = THIS_MODULE,
34 .af = NFPROTO_IPV6, 36 .af = NFPROTO_IPV6,
35 .priority = NF_IP6_PRI_SECURITY, 37 .priority = NF_IP6_PRI_SECURITY,
38 .table_init = ip6table_security_table_init,
36}; 39};
37 40
38static unsigned int 41static unsigned int
@@ -44,26 +47,32 @@ ip6table_security_hook(void *priv, struct sk_buff *skb,
44 47
45static struct nf_hook_ops *sectbl_ops __read_mostly; 48static struct nf_hook_ops *sectbl_ops __read_mostly;
46 49
47static int __net_init ip6table_security_net_init(struct net *net) 50static int __net_init ip6table_security_table_init(struct net *net)
48{ 51{
49 struct ip6t_replace *repl; 52 struct ip6t_replace *repl;
53 int ret;
54
55 if (net->ipv6.ip6table_security)
56 return 0;
50 57
51 repl = ip6t_alloc_initial_table(&security_table); 58 repl = ip6t_alloc_initial_table(&security_table);
52 if (repl == NULL) 59 if (repl == NULL)
53 return -ENOMEM; 60 return -ENOMEM;
54 net->ipv6.ip6table_security = 61 ret = ip6t_register_table(net, &security_table, repl, sectbl_ops,
55 ip6t_register_table(net, &security_table, repl); 62 &net->ipv6.ip6table_security);
56 kfree(repl); 63 kfree(repl);
57 return PTR_ERR_OR_ZERO(net->ipv6.ip6table_security); 64 return ret;
58} 65}
59 66
60static void __net_exit ip6table_security_net_exit(struct net *net) 67static void __net_exit ip6table_security_net_exit(struct net *net)
61{ 68{
62 ip6t_unregister_table(net, net->ipv6.ip6table_security); 69 if (!net->ipv6.ip6table_security)
70 return;
71 ip6t_unregister_table(net, net->ipv6.ip6table_security, sectbl_ops);
72 net->ipv6.ip6table_security = NULL;
63} 73}
64 74
65static struct pernet_operations ip6table_security_net_ops = { 75static struct pernet_operations ip6table_security_net_ops = {
66 .init = ip6table_security_net_init,
67 .exit = ip6table_security_net_exit, 76 .exit = ip6table_security_net_exit,
68}; 77};
69 78
@@ -71,27 +80,28 @@ static int __init ip6table_security_init(void)
71{ 80{
72 int ret; 81 int ret;
73 82
83 sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook);
84 if (IS_ERR(sectbl_ops))
85 return PTR_ERR(sectbl_ops);
86
74 ret = register_pernet_subsys(&ip6table_security_net_ops); 87 ret = register_pernet_subsys(&ip6table_security_net_ops);
75 if (ret < 0) 88 if (ret < 0) {
89 kfree(sectbl_ops);
76 return ret; 90 return ret;
77
78 sectbl_ops = xt_hook_link(&security_table, ip6table_security_hook);
79 if (IS_ERR(sectbl_ops)) {
80 ret = PTR_ERR(sectbl_ops);
81 goto cleanup_table;
82 } 91 }
83 92
84 return ret; 93 ret = ip6table_security_table_init(&init_net);
85 94 if (ret) {
86cleanup_table: 95 unregister_pernet_subsys(&ip6table_security_net_ops);
87 unregister_pernet_subsys(&ip6table_security_net_ops); 96 kfree(sectbl_ops);
97 }
88 return ret; 98 return ret;
89} 99}
90 100
91static void __exit ip6table_security_fini(void) 101static void __exit ip6table_security_fini(void)
92{ 102{
93 xt_hook_unlink(&security_table, sectbl_ops);
94 unregister_pernet_subsys(&ip6table_security_net_ops); 103 unregister_pernet_subsys(&ip6table_security_net_ops);
104 kfree(sectbl_ops);
95} 105}
96 106
97module_init(ip6table_security_init); 107module_init(ip6table_security_init);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 6ce309928841..e0be97e636a4 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -131,29 +131,15 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
131 u8 proto, void *data, __sum16 *check, 131 u8 proto, void *data, __sum16 *check,
132 int datalen, int oldlen) 132 int datalen, int oldlen)
133{ 133{
134 const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
135 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
136
137 if (skb->ip_summed != CHECKSUM_PARTIAL) { 134 if (skb->ip_summed != CHECKSUM_PARTIAL) {
138 if (!(rt->rt6i_flags & RTF_LOCAL) && 135 const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
139 (!skb->dev || skb->dev->features & 136
140 (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))) { 137 skb->ip_summed = CHECKSUM_PARTIAL;
141 skb->ip_summed = CHECKSUM_PARTIAL; 138 skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
142 skb->csum_start = skb_headroom(skb) + 139 (data - (void *)skb->data);
143 skb_network_offset(skb) + 140 skb->csum_offset = (void *)check - data;
144 (data - (void *)skb->data); 141 *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
145 skb->csum_offset = (void *)check - data; 142 datalen, proto, 0);
146 *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
147 datalen, proto, 0);
148 } else {
149 *check = 0;
150 *check = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
151 datalen, proto,
152 csum_partial(data, datalen,
153 0));
154 if (proto == IPPROTO_UDP && !*check)
155 *check = CSUM_MANGLED_0;
156 }
157 } else 143 } else
158 inet_proto_csum_replace2(check, skb, 144 inet_proto_csum_replace2(check, skb,
159 htons(oldlen), htons(datalen), true); 145 htons(oldlen), htons(datalen), true);
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index cd1ac1637a05..9597ffb74077 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -26,7 +26,12 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr,
26 26
27 memset(&range, 0, sizeof(range)); 27 memset(&range, 0, sizeof(range));
28 range.flags = priv->flags; 28 range.flags = priv->flags;
29 29 if (priv->sreg_proto_min) {
30 range.min_proto.all =
31 *(__be16 *)&regs->data[priv->sreg_proto_min];
32 range.max_proto.all =
33 *(__be16 *)&regs->data[priv->sreg_proto_max];
34 }
30 regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out); 35 regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out);
31} 36}
32 37
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 263a5164a6f5..c382db7a2e73 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -26,35 +26,6 @@
26#include <net/transp_v6.h> 26#include <net/transp_v6.h>
27#include <net/ping.h> 27#include <net/ping.h>
28 28
29struct proto pingv6_prot = {
30 .name = "PINGv6",
31 .owner = THIS_MODULE,
32 .init = ping_init_sock,
33 .close = ping_close,
34 .connect = ip6_datagram_connect_v6_only,
35 .disconnect = udp_disconnect,
36 .setsockopt = ipv6_setsockopt,
37 .getsockopt = ipv6_getsockopt,
38 .sendmsg = ping_v6_sendmsg,
39 .recvmsg = ping_recvmsg,
40 .bind = ping_bind,
41 .backlog_rcv = ping_queue_rcv_skb,
42 .hash = ping_hash,
43 .unhash = ping_unhash,
44 .get_port = ping_get_port,
45 .obj_size = sizeof(struct raw6_sock),
46};
47EXPORT_SYMBOL_GPL(pingv6_prot);
48
49static struct inet_protosw pingv6_protosw = {
50 .type = SOCK_DGRAM,
51 .protocol = IPPROTO_ICMPV6,
52 .prot = &pingv6_prot,
53 .ops = &inet6_dgram_ops,
54 .flags = INET_PROTOSW_REUSE,
55};
56
57
58/* Compatibility glue so we can support IPv6 when it's compiled as a module */ 29/* Compatibility glue so we can support IPv6 when it's compiled as a module */
59static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, 30static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
60 int *addr_len) 31 int *addr_len)
@@ -77,7 +48,7 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
77 return 0; 48 return 0;
78} 49}
79 50
80int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) 51static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
81{ 52{
82 struct inet_sock *inet = inet_sk(sk); 53 struct inet_sock *inet = inet_sk(sk);
83 struct ipv6_pinfo *np = inet6_sk(sk); 54 struct ipv6_pinfo *np = inet6_sk(sk);
@@ -192,6 +163,34 @@ int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
192 return len; 163 return len;
193} 164}
194 165
166struct proto pingv6_prot = {
167 .name = "PINGv6",
168 .owner = THIS_MODULE,
169 .init = ping_init_sock,
170 .close = ping_close,
171 .connect = ip6_datagram_connect_v6_only,
172 .disconnect = udp_disconnect,
173 .setsockopt = ipv6_setsockopt,
174 .getsockopt = ipv6_getsockopt,
175 .sendmsg = ping_v6_sendmsg,
176 .recvmsg = ping_recvmsg,
177 .bind = ping_bind,
178 .backlog_rcv = ping_queue_rcv_skb,
179 .hash = ping_hash,
180 .unhash = ping_unhash,
181 .get_port = ping_get_port,
182 .obj_size = sizeof(struct raw6_sock),
183};
184EXPORT_SYMBOL_GPL(pingv6_prot);
185
186static struct inet_protosw pingv6_protosw = {
187 .type = SOCK_DGRAM,
188 .protocol = IPPROTO_ICMPV6,
189 .prot = &pingv6_prot,
190 .ops = &inet6_dgram_ops,
191 .flags = INET_PROTOSW_REUSE,
192};
193
195#ifdef CONFIG_PROC_FS 194#ifdef CONFIG_PROC_FS
196static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos) 195static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos)
197{ 196{
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 18f3498a6c80..e2ea31175ef9 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -496,10 +496,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
496 IP6CB(head)->flags |= IP6SKB_FRAGMENTED; 496 IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
497 497
498 /* Yes, and fold redundant checksum back. 8) */ 498 /* Yes, and fold redundant checksum back. 8) */
499 if (head->ip_summed == CHECKSUM_COMPLETE) 499 skb_postpush_rcsum(head, skb_network_header(head),
500 head->csum = csum_partial(skb_network_header(head), 500 skb_network_header_len(head));
501 skb_network_header_len(head),
502 head->csum);
503 501
504 rcu_read_lock(); 502 rcu_read_lock();
505 IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); 503 IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ed446639219c..6f32944e0223 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -338,9 +338,9 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
338 return rt; 338 return rt;
339} 339}
340 340
341static struct rt6_info *ip6_dst_alloc(struct net *net, 341struct rt6_info *ip6_dst_alloc(struct net *net,
342 struct net_device *dev, 342 struct net_device *dev,
343 int flags) 343 int flags)
344{ 344{
345 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags); 345 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
346 346
@@ -364,6 +364,7 @@ static struct rt6_info *ip6_dst_alloc(struct net *net,
364 364
365 return rt; 365 return rt;
366} 366}
367EXPORT_SYMBOL(ip6_dst_alloc);
367 368
368static void ip6_dst_destroy(struct dst_entry *dst) 369static void ip6_dst_destroy(struct dst_entry *dst)
369{ 370{
@@ -1417,8 +1418,20 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1417 1418
1418void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) 1419void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1419{ 1420{
1421 struct dst_entry *dst;
1422
1420 ip6_update_pmtu(skb, sock_net(sk), mtu, 1423 ip6_update_pmtu(skb, sock_net(sk), mtu,
1421 sk->sk_bound_dev_if, sk->sk_mark); 1424 sk->sk_bound_dev_if, sk->sk_mark);
1425
1426 dst = __sk_dst_get(sk);
1427 if (!dst || !dst->obsolete ||
1428 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1429 return;
1430
1431 bh_lock_sock(sk);
1432 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1433 ip6_datagram_dst_update(sk, false);
1434 bh_unlock_sock(sk);
1422} 1435}
1423EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); 1436EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1424 1437
@@ -1737,6 +1750,8 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
1737 } else { 1750 } else {
1738 val = nla_get_u32(nla); 1751 val = nla_get_u32(nla);
1739 } 1752 }
1753 if (type == RTAX_HOPLIMIT && val > 255)
1754 val = 255;
1740 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) 1755 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1741 goto err; 1756 goto err;
1742 1757
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 2066d1c25a11..83384308d032 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -475,7 +475,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
475 ipip6_tunnel_unlink(sitn, tunnel); 475 ipip6_tunnel_unlink(sitn, tunnel);
476 ipip6_tunnel_del_prl(tunnel, NULL); 476 ipip6_tunnel_del_prl(tunnel, NULL);
477 } 477 }
478 ip_tunnel_dst_reset_all(tunnel); 478 dst_cache_reset(&tunnel->dst_cache);
479 dev_put(dev); 479 dev_put(dev);
480} 480}
481 481
@@ -681,14 +681,16 @@ static int ipip6_rcv(struct sk_buff *skb)
681 skb->mac_header = skb->network_header; 681 skb->mac_header = skb->network_header;
682 skb_reset_network_header(skb); 682 skb_reset_network_header(skb);
683 IPCB(skb)->flags = 0; 683 IPCB(skb)->flags = 0;
684 skb->protocol = htons(ETH_P_IPV6); 684 skb->dev = tunnel->dev;
685 685
686 if (packet_is_spoofed(skb, iph, tunnel)) { 686 if (packet_is_spoofed(skb, iph, tunnel)) {
687 tunnel->dev->stats.rx_errors++; 687 tunnel->dev->stats.rx_errors++;
688 goto out; 688 goto out;
689 } 689 }
690 690
691 __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); 691 if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6),
692 !net_eq(tunnel->net, dev_net(tunnel->dev))))
693 goto out;
692 694
693 err = IP_ECN_decapsulate(iph, skb); 695 err = IP_ECN_decapsulate(iph, skb);
694 if (unlikely(err)) { 696 if (unlikely(err)) {
@@ -740,7 +742,7 @@ static int ipip_rcv(struct sk_buff *skb)
740 742
741 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 743 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
742 goto drop; 744 goto drop;
743 if (iptunnel_pull_header(skb, 0, tpi.proto)) 745 if (iptunnel_pull_header(skb, 0, tpi.proto, false))
744 goto drop; 746 goto drop;
745 return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error); 747 return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
746 } 748 }
@@ -911,7 +913,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
911 goto tx_error; 913 goto tx_error;
912 } 914 }
913 915
914 skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT); 916 skb = iptunnel_handle_offloads(skb, SKB_GSO_SIT);
915 if (IS_ERR(skb)) { 917 if (IS_ERR(skb)) {
916 ip_rt_put(rt); 918 ip_rt_put(rt);
917 goto out; 919 goto out;
@@ -1000,7 +1002,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
1000 struct ip_tunnel *tunnel = netdev_priv(dev); 1002 struct ip_tunnel *tunnel = netdev_priv(dev);
1001 const struct iphdr *tiph = &tunnel->parms.iph; 1003 const struct iphdr *tiph = &tunnel->parms.iph;
1002 1004
1003 skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP); 1005 skb = iptunnel_handle_offloads(skb, SKB_GSO_IPIP);
1004 if (IS_ERR(skb)) 1006 if (IS_ERR(skb))
1005 goto out; 1007 goto out;
1006 1008
@@ -1093,7 +1095,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
1093 t->parms.link = p->link; 1095 t->parms.link = p->link;
1094 ipip6_tunnel_bind_dev(t->dev); 1096 ipip6_tunnel_bind_dev(t->dev);
1095 } 1097 }
1096 ip_tunnel_dst_reset_all(t); 1098 dst_cache_reset(&t->dst_cache);
1097 netdev_state_change(t->dev); 1099 netdev_state_change(t->dev);
1098} 1100}
1099 1101
@@ -1124,7 +1126,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
1124 t->ip6rd.relay_prefix = relay_prefix; 1126 t->ip6rd.relay_prefix = relay_prefix;
1125 t->ip6rd.prefixlen = ip6rd->prefixlen; 1127 t->ip6rd.prefixlen = ip6rd->prefixlen;
1126 t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen; 1128 t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen;
1127 ip_tunnel_dst_reset_all(t); 1129 dst_cache_reset(&t->dst_cache);
1128 netdev_state_change(t->dev); 1130 netdev_state_change(t->dev);
1129 return 0; 1131 return 0;
1130} 1132}
@@ -1278,7 +1280,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1278 err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); 1280 err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
1279 break; 1281 break;
1280 } 1282 }
1281 ip_tunnel_dst_reset_all(t); 1283 dst_cache_reset(&t->dst_cache);
1282 netdev_state_change(dev); 1284 netdev_state_change(dev);
1283 break; 1285 break;
1284 1286
@@ -1339,7 +1341,7 @@ static void ipip6_dev_free(struct net_device *dev)
1339{ 1341{
1340 struct ip_tunnel *tunnel = netdev_priv(dev); 1342 struct ip_tunnel *tunnel = netdev_priv(dev);
1341 1343
1342 free_percpu(tunnel->dst_cache); 1344 dst_cache_destroy(&tunnel->dst_cache);
1343 free_percpu(dev->tstats); 1345 free_percpu(dev->tstats);
1344 free_netdev(dev); 1346 free_netdev(dev);
1345} 1347}
@@ -1372,6 +1374,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
1372static int ipip6_tunnel_init(struct net_device *dev) 1374static int ipip6_tunnel_init(struct net_device *dev)
1373{ 1375{
1374 struct ip_tunnel *tunnel = netdev_priv(dev); 1376 struct ip_tunnel *tunnel = netdev_priv(dev);
1377 int err;
1375 1378
1376 tunnel->dev = dev; 1379 tunnel->dev = dev;
1377 tunnel->net = dev_net(dev); 1380 tunnel->net = dev_net(dev);
@@ -1382,10 +1385,10 @@ static int ipip6_tunnel_init(struct net_device *dev)
1382 if (!dev->tstats) 1385 if (!dev->tstats)
1383 return -ENOMEM; 1386 return -ENOMEM;
1384 1387
1385 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); 1388 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1386 if (!tunnel->dst_cache) { 1389 if (err) {
1387 free_percpu(dev->tstats); 1390 free_percpu(dev->tstats);
1388 return -ENOMEM; 1391 return err;
1389 } 1392 }
1390 1393
1391 return 0; 1394 return 0;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 2906ef20795e..aab91fa86c5e 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -41,8 +41,7 @@ static __u16 const msstab[] = {
41 9000 - 60, 41 9000 - 60,
42}; 42};
43 43
44static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], 44static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv6_cookie_scratch);
45 ipv6_cookie_scratch);
46 45
47static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr, 46static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr,
48 __be16 sport, __be16 dport, u32 count, int c) 47 __be16 sport, __be16 dport, u32 count, int c)
@@ -148,7 +147,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
148 struct dst_entry *dst; 147 struct dst_entry *dst;
149 __u8 rcv_wscale; 148 __u8 rcv_wscale;
150 149
151 if (!sysctl_tcp_syncookies || !th->ack || th->rst) 150 if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
152 goto out; 151 goto out;
153 152
154 if (tcp_synq_no_recent_overflow(sk)) 153 if (tcp_synq_no_recent_overflow(sk))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5c8c84273028..f443c6b0ce16 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -66,7 +66,7 @@
66#include <linux/proc_fs.h> 66#include <linux/proc_fs.h>
67#include <linux/seq_file.h> 67#include <linux/seq_file.h>
68 68
69#include <linux/crypto.h> 69#include <crypto/hash.h>
70#include <linux/scatterlist.h> 70#include <linux/scatterlist.h>
71 71
72static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); 72static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
@@ -541,7 +541,8 @@ static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
541 bp->len = cpu_to_be32(nbytes); 541 bp->len = cpu_to_be32(nbytes);
542 542
543 sg_init_one(&sg, bp, sizeof(*bp)); 543 sg_init_one(&sg, bp, sizeof(*bp));
544 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); 544 ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp));
545 return crypto_ahash_update(hp->md5_req);
545} 546}
546 547
547static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, 548static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
@@ -549,14 +550,14 @@ static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
549 const struct tcphdr *th) 550 const struct tcphdr *th)
550{ 551{
551 struct tcp_md5sig_pool *hp; 552 struct tcp_md5sig_pool *hp;
552 struct hash_desc *desc; 553 struct ahash_request *req;
553 554
554 hp = tcp_get_md5sig_pool(); 555 hp = tcp_get_md5sig_pool();
555 if (!hp) 556 if (!hp)
556 goto clear_hash_noput; 557 goto clear_hash_noput;
557 desc = &hp->md5_desc; 558 req = hp->md5_req;
558 559
559 if (crypto_hash_init(desc)) 560 if (crypto_ahash_init(req))
560 goto clear_hash; 561 goto clear_hash;
561 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) 562 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
562 goto clear_hash; 563 goto clear_hash;
@@ -564,7 +565,8 @@ static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
564 goto clear_hash; 565 goto clear_hash;
565 if (tcp_md5_hash_key(hp, key)) 566 if (tcp_md5_hash_key(hp, key))
566 goto clear_hash; 567 goto clear_hash;
567 if (crypto_hash_final(desc, md5_hash)) 568 ahash_request_set_crypt(req, NULL, md5_hash, 0);
569 if (crypto_ahash_final(req))
568 goto clear_hash; 570 goto clear_hash;
569 571
570 tcp_put_md5sig_pool(); 572 tcp_put_md5sig_pool();
@@ -584,7 +586,7 @@ static int tcp_v6_md5_hash_skb(char *md5_hash,
584{ 586{
585 const struct in6_addr *saddr, *daddr; 587 const struct in6_addr *saddr, *daddr;
586 struct tcp_md5sig_pool *hp; 588 struct tcp_md5sig_pool *hp;
587 struct hash_desc *desc; 589 struct ahash_request *req;
588 const struct tcphdr *th = tcp_hdr(skb); 590 const struct tcphdr *th = tcp_hdr(skb);
589 591
590 if (sk) { /* valid for establish/request sockets */ 592 if (sk) { /* valid for establish/request sockets */
@@ -599,9 +601,9 @@ static int tcp_v6_md5_hash_skb(char *md5_hash,
599 hp = tcp_get_md5sig_pool(); 601 hp = tcp_get_md5sig_pool();
600 if (!hp) 602 if (!hp)
601 goto clear_hash_noput; 603 goto clear_hash_noput;
602 desc = &hp->md5_desc; 604 req = hp->md5_req;
603 605
604 if (crypto_hash_init(desc)) 606 if (crypto_ahash_init(req))
605 goto clear_hash; 607 goto clear_hash;
606 608
607 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) 609 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
@@ -612,7 +614,8 @@ static int tcp_v6_md5_hash_skb(char *md5_hash,
612 goto clear_hash; 614 goto clear_hash;
613 if (tcp_md5_hash_key(hp, key)) 615 if (tcp_md5_hash_key(hp, key))
614 goto clear_hash; 616 goto clear_hash;
615 if (crypto_hash_final(desc, md5_hash)) 617 ahash_request_set_crypt(req, NULL, md5_hash, 0);
618 if (crypto_ahash_final(req))
616 goto clear_hash; 619 goto clear_hash;
617 620
618 tcp_put_md5sig_pool(); 621 tcp_put_md5sig_pool();
@@ -807,8 +810,13 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
807 fl6.flowi6_proto = IPPROTO_TCP; 810 fl6.flowi6_proto = IPPROTO_TCP;
808 if (rt6_need_strict(&fl6.daddr) && !oif) 811 if (rt6_need_strict(&fl6.daddr) && !oif)
809 fl6.flowi6_oif = tcp_v6_iif(skb); 812 fl6.flowi6_oif = tcp_v6_iif(skb);
810 else 813 else {
814 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
815 oif = skb->skb_iif;
816
811 fl6.flowi6_oif = oif; 817 fl6.flowi6_oif = oif;
818 }
819
812 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); 820 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
813 fl6.fl6_dport = t1->dest; 821 fl6.fl6_dport = t1->dest;
814 fl6.fl6_sport = t1->source; 822 fl6.fl6_sport = t1->source;
@@ -867,7 +875,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
867 * no RST generated if md5 hash doesn't match. 875 * no RST generated if md5 hash doesn't match.
868 */ 876 */
869 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev), 877 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
870 &tcp_hashinfo, &ipv6h->saddr, 878 &tcp_hashinfo, NULL, 0,
879 &ipv6h->saddr,
871 th->source, &ipv6h->daddr, 880 th->source, &ipv6h->daddr,
872 ntohs(th->source), tcp_v6_iif(skb)); 881 ntohs(th->source), tcp_v6_iif(skb));
873 if (!sk1) 882 if (!sk1)
@@ -1376,8 +1385,8 @@ static int tcp_v6_rcv(struct sk_buff *skb)
1376 hdr = ipv6_hdr(skb); 1385 hdr = ipv6_hdr(skb);
1377 1386
1378lookup: 1387lookup:
1379 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest, 1388 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1380 inet6_iif(skb)); 1389 th->source, th->dest, inet6_iif(skb));
1381 if (!sk) 1390 if (!sk)
1382 goto no_tcp_socket; 1391 goto no_tcp_socket;
1383 1392
@@ -1442,7 +1451,7 @@ process:
1442 sk_incoming_cpu_update(sk); 1451 sk_incoming_cpu_update(sk);
1443 1452
1444 bh_lock_sock_nested(sk); 1453 bh_lock_sock_nested(sk);
1445 tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); 1454 tcp_segs_in(tcp_sk(sk), skb);
1446 ret = 0; 1455 ret = 0;
1447 if (!sock_owned_by_user(sk)) { 1456 if (!sock_owned_by_user(sk)) {
1448 if (!tcp_prequeue(sk, skb)) 1457 if (!tcp_prequeue(sk, skb))
@@ -1501,6 +1510,7 @@ do_time_wait:
1501 struct sock *sk2; 1510 struct sock *sk2;
1502 1511
1503 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, 1512 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1513 skb, __tcp_hdrlen(th),
1504 &ipv6_hdr(skb)->saddr, th->source, 1514 &ipv6_hdr(skb)->saddr, th->source,
1505 &ipv6_hdr(skb)->daddr, 1515 &ipv6_hdr(skb)->daddr,
1506 ntohs(th->dest), tcp_v6_iif(skb)); 1516 ntohs(th->dest), tcp_v6_iif(skb));
@@ -1866,7 +1876,7 @@ struct proto tcpv6_prot = {
1866 .sendpage = tcp_sendpage, 1876 .sendpage = tcp_sendpage,
1867 .backlog_rcv = tcp_v6_do_rcv, 1877 .backlog_rcv = tcp_v6_do_rcv,
1868 .release_cb = tcp_release_cb, 1878 .release_cb = tcp_release_cb,
1869 .hash = inet_hash, 1879 .hash = inet6_hash,
1870 .unhash = inet_unhash, 1880 .unhash = inet_unhash,
1871 .get_port = inet_csk_get_port, 1881 .get_port = inet_csk_get_port,
1872 .enter_memory_pressure = tcp_enter_memory_pressure, 1882 .enter_memory_pressure = tcp_enter_memory_pressure,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 422dd014aa2c..6bc5c664fa46 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -37,6 +37,7 @@
37#include <linux/slab.h> 37#include <linux/slab.h>
38#include <asm/uaccess.h> 38#include <asm/uaccess.h>
39 39
40#include <net/addrconf.h>
40#include <net/ndisc.h> 41#include <net/ndisc.h>
41#include <net/protocol.h> 42#include <net/protocol.h>
42#include <net/transp_v6.h> 43#include <net/transp_v6.h>
@@ -77,49 +78,6 @@ static u32 udp6_ehashfn(const struct net *net,
77 udp_ipv6_hash_secret + net_hash_mix(net)); 78 udp_ipv6_hash_secret + net_hash_mix(net));
78} 79}
79 80
80/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
81 * only, and any IPv4 addresses if not IPv6 only
82 * match_wildcard == false: addresses must be exactly the same, i.e.
83 * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
84 * and 0.0.0.0 equals to 0.0.0.0 only
85 */
86int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
87 bool match_wildcard)
88{
89 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
90 int sk2_ipv6only = inet_v6_ipv6only(sk2);
91 int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
92 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
93
94 /* if both are mapped, treat as IPv4 */
95 if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
96 if (!sk2_ipv6only) {
97 if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
98 return 1;
99 if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
100 return match_wildcard;
101 }
102 return 0;
103 }
104
105 if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
106 return 1;
107
108 if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
109 !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
110 return 1;
111
112 if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
113 !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
114 return 1;
115
116 if (sk2_rcv_saddr6 &&
117 ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
118 return 1;
119
120 return 0;
121}
122
123static u32 udp6_portaddr_hash(const struct net *net, 81static u32 udp6_portaddr_hash(const struct net *net,
124 const struct in6_addr *addr6, 82 const struct in6_addr *addr6,
125 unsigned int port) 83 unsigned int port)
@@ -590,6 +548,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
590 const struct in6_addr *daddr = &hdr->daddr; 548 const struct in6_addr *daddr = &hdr->daddr;
591 struct udphdr *uh = (struct udphdr *)(skb->data+offset); 549 struct udphdr *uh = (struct udphdr *)(skb->data+offset);
592 struct sock *sk; 550 struct sock *sk;
551 int harderr;
593 int err; 552 int err;
594 struct net *net = dev_net(skb->dev); 553 struct net *net = dev_net(skb->dev);
595 554
@@ -601,26 +560,27 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
601 return; 560 return;
602 } 561 }
603 562
563 harderr = icmpv6_err_convert(type, code, &err);
564 np = inet6_sk(sk);
565
604 if (type == ICMPV6_PKT_TOOBIG) { 566 if (type == ICMPV6_PKT_TOOBIG) {
605 if (!ip6_sk_accept_pmtu(sk)) 567 if (!ip6_sk_accept_pmtu(sk))
606 goto out; 568 goto out;
607 ip6_sk_update_pmtu(skb, sk, info); 569 ip6_sk_update_pmtu(skb, sk, info);
570 if (np->pmtudisc != IPV6_PMTUDISC_DONT)
571 harderr = 1;
608 } 572 }
609 if (type == NDISC_REDIRECT) { 573 if (type == NDISC_REDIRECT) {
610 ip6_sk_redirect(skb, sk); 574 ip6_sk_redirect(skb, sk);
611 goto out; 575 goto out;
612 } 576 }
613 577
614 np = inet6_sk(sk); 578 if (!np->recverr) {
615 579 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
616 if (!icmpv6_err_convert(type, code, &err) && !np->recverr) 580 goto out;
617 goto out; 581 } else {
618
619 if (sk->sk_state != TCP_ESTABLISHED && !np->recverr)
620 goto out;
621
622 if (np->recverr)
623 ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); 582 ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1));
583 }
624 584
625 sk->sk_err = err; 585 sk->sk_err = err;
626 sk->sk_error_report(sk); 586 sk->sk_error_report(sk);
@@ -883,8 +843,8 @@ start_lookup:
883 flush_stack(stack, count, skb, count - 1); 843 flush_stack(stack, count, skb, count - 1);
884 } else { 844 } else {
885 if (!inner_flushed) 845 if (!inner_flushed)
886 UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, 846 UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
887 proto == IPPROTO_UDPLITE); 847 proto == IPPROTO_UDPLITE);
888 consume_skb(skb); 848 consume_skb(skb);
889 } 849 }
890 return 0; 850 return 0;
@@ -1579,6 +1539,7 @@ struct proto udpv6_prot = {
1579 .sendmsg = udpv6_sendmsg, 1539 .sendmsg = udpv6_sendmsg,
1580 .recvmsg = udpv6_recvmsg, 1540 .recvmsg = udpv6_recvmsg,
1581 .backlog_rcv = __udpv6_queue_rcv_skb, 1541 .backlog_rcv = __udpv6_queue_rcv_skb,
1542 .release_cb = ip6_datagram_release_cb,
1582 .hash = udp_lib_hash, 1543 .hash = udp_lib_hash,
1583 .unhash = udp_lib_unhash, 1544 .unhash = udp_lib_unhash,
1584 .rehash = udp_v6_rehash, 1545 .rehash = udp_v6_rehash,
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 7441e1e63893..2b0fbe6929e8 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -81,12 +81,18 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
81 csum = skb_checksum(skb, 0, skb->len, 0); 81 csum = skb_checksum(skb, 0, skb->len, 0);
82 uh->check = udp_v6_check(skb->len, &ipv6h->saddr, 82 uh->check = udp_v6_check(skb->len, &ipv6h->saddr,
83 &ipv6h->daddr, csum); 83 &ipv6h->daddr, csum);
84
85 if (uh->check == 0) 84 if (uh->check == 0)
86 uh->check = CSUM_MANGLED_0; 85 uh->check = CSUM_MANGLED_0;
87 86
88 skb->ip_summed = CHECKSUM_NONE; 87 skb->ip_summed = CHECKSUM_NONE;
89 88
89 /* If there is no outer header we can fake a checksum offload
90 * due to the fact that we have already done the checksum in
91 * software prior to segmenting the frame.
92 */
93 if (!skb->encap_hdr_csum)
94 features |= NETIF_F_HW_CSUM;
95
90 /* Check if there is enough headroom to insert fragment header. */ 96 /* Check if there is enough headroom to insert fragment header. */
91 tnl_hlen = skb_tnl_header_len(skb); 97 tnl_hlen = skb_tnl_header_len(skb);
92 if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) { 98 if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) {
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index a4237707f79d..da126ee6d218 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -287,14 +287,14 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
287 287
288 if (filp->f_flags & O_NONBLOCK) { 288 if (filp->f_flags & O_NONBLOCK) {
289 /* nonblock mode is set */ 289 /* nonblock mode is set */
290 if (tty->termios.c_cflag & CBAUD) 290 if (C_BAUD(tty))
291 tty_port_raise_dtr_rts(port); 291 tty_port_raise_dtr_rts(port);
292 port->flags |= ASYNC_NORMAL_ACTIVE; 292 port->flags |= ASYNC_NORMAL_ACTIVE;
293 pr_debug("%s(), O_NONBLOCK requested!\n", __func__); 293 pr_debug("%s(), O_NONBLOCK requested!\n", __func__);
294 return 0; 294 return 0;
295 } 295 }
296 296
297 if (tty->termios.c_cflag & CLOCAL) { 297 if (C_CLOCAL(tty)) {
298 pr_debug("%s(), doing CLOCAL!\n", __func__); 298 pr_debug("%s(), doing CLOCAL!\n", __func__);
299 do_clocal = 1; 299 do_clocal = 1;
300 } 300 }
@@ -806,7 +806,7 @@ static void ircomm_tty_throttle(struct tty_struct *tty)
806 ircomm_tty_send_xchar(tty, STOP_CHAR(tty)); 806 ircomm_tty_send_xchar(tty, STOP_CHAR(tty));
807 807
808 /* Hardware flow control? */ 808 /* Hardware flow control? */
809 if (tty->termios.c_cflag & CRTSCTS) { 809 if (C_CRTSCTS(tty)) {
810 self->settings.dte &= ~IRCOMM_RTS; 810 self->settings.dte &= ~IRCOMM_RTS;
811 self->settings.dte |= IRCOMM_DELTA_RTS; 811 self->settings.dte |= IRCOMM_DELTA_RTS;
812 812
@@ -831,12 +831,11 @@ static void ircomm_tty_unthrottle(struct tty_struct *tty)
831 IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); 831 IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
832 832
833 /* Using software flow control? */ 833 /* Using software flow control? */
834 if (I_IXOFF(tty)) { 834 if (I_IXOFF(tty))
835 ircomm_tty_send_xchar(tty, START_CHAR(tty)); 835 ircomm_tty_send_xchar(tty, START_CHAR(tty));
836 }
837 836
838 /* Using hardware flow control? */ 837 /* Using hardware flow control? */
839 if (tty->termios.c_cflag & CRTSCTS) { 838 if (C_CRTSCTS(tty)) {
840 self->settings.dte |= (IRCOMM_RTS|IRCOMM_DELTA_RTS); 839 self->settings.dte |= (IRCOMM_RTS|IRCOMM_DELTA_RTS);
841 840
842 ircomm_param_request(self, IRCOMM_DTE, TRUE); 841 ircomm_param_request(self, IRCOMM_DTE, TRUE);
@@ -1268,10 +1267,6 @@ static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m)
1268 seq_printf(m, "%cASYNC_LOW_LATENCY", sep); 1267 seq_printf(m, "%cASYNC_LOW_LATENCY", sep);
1269 sep = '|'; 1268 sep = '|';
1270 } 1269 }
1271 if (self->port.flags & ASYNC_CLOSING) {
1272 seq_printf(m, "%cASYNC_CLOSING", sep);
1273 sep = '|';
1274 }
1275 if (self->port.flags & ASYNC_NORMAL_ACTIVE) { 1270 if (self->port.flags & ASYNC_NORMAL_ACTIVE) {
1276 seq_printf(m, "%cASYNC_NORMAL_ACTIVE", sep); 1271 seq_printf(m, "%cASYNC_NORMAL_ACTIVE", sep);
1277 sep = '|'; 1272 sep = '|';
diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c
index 75ccdbd0728e..d3687aaa23de 100644
--- a/net/irda/ircomm/ircomm_tty_ioctl.c
+++ b/net/irda/ircomm/ircomm_tty_ioctl.c
@@ -158,26 +158,21 @@ void ircomm_tty_set_termios(struct tty_struct *tty,
158 ircomm_tty_change_speed(self, tty); 158 ircomm_tty_change_speed(self, tty);
159 159
160 /* Handle transition to B0 status */ 160 /* Handle transition to B0 status */
161 if ((old_termios->c_cflag & CBAUD) && 161 if ((old_termios->c_cflag & CBAUD) && !(cflag & CBAUD)) {
162 !(cflag & CBAUD)) {
163 self->settings.dte &= ~(IRCOMM_DTR|IRCOMM_RTS); 162 self->settings.dte &= ~(IRCOMM_DTR|IRCOMM_RTS);
164 ircomm_param_request(self, IRCOMM_DTE, TRUE); 163 ircomm_param_request(self, IRCOMM_DTE, TRUE);
165 } 164 }
166 165
167 /* Handle transition away from B0 status */ 166 /* Handle transition away from B0 status */
168 if (!(old_termios->c_cflag & CBAUD) && 167 if (!(old_termios->c_cflag & CBAUD) && (cflag & CBAUD)) {
169 (cflag & CBAUD)) {
170 self->settings.dte |= IRCOMM_DTR; 168 self->settings.dte |= IRCOMM_DTR;
171 if (!(tty->termios.c_cflag & CRTSCTS) || 169 if (!C_CRTSCTS(tty) || !test_bit(TTY_THROTTLED, &tty->flags))
172 !test_bit(TTY_THROTTLED, &tty->flags)) {
173 self->settings.dte |= IRCOMM_RTS; 170 self->settings.dte |= IRCOMM_RTS;
174 }
175 ircomm_param_request(self, IRCOMM_DTE, TRUE); 171 ircomm_param_request(self, IRCOMM_DTE, TRUE);
176 } 172 }
177 173
178 /* Handle turning off CRTSCTS */ 174 /* Handle turning off CRTSCTS */
179 if ((old_termios->c_cflag & CRTSCTS) && 175 if ((old_termios->c_cflag & CRTSCTS) && !C_CRTSCTS(tty))
180 !(tty->termios.c_cflag & CRTSCTS))
181 { 176 {
182 tty->hw_stopped = 0; 177 tty->hw_stopped = 0;
183 ircomm_tty_start(tty); 178 ircomm_tty_start(tty);
diff --git a/net/kcm/Kconfig b/net/kcm/Kconfig
new file mode 100644
index 000000000000..5db94d940ecc
--- /dev/null
+++ b/net/kcm/Kconfig
@@ -0,0 +1,10 @@
1
2config AF_KCM
3 tristate "KCM sockets"
4 depends on INET
5 select BPF_SYSCALL
6 ---help---
7 KCM (Kernel Connection Multiplexor) sockets provide a method
8 for multiplexing messages of a message based application
9 protocol over kernel connectons (e.g. TCP connections).
10
diff --git a/net/kcm/Makefile b/net/kcm/Makefile
new file mode 100644
index 000000000000..71256133e677
--- /dev/null
+++ b/net/kcm/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_AF_KCM) += kcm.o
2
3kcm-y := kcmsock.o kcmproc.o
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
new file mode 100644
index 000000000000..738008726cc6
--- /dev/null
+++ b/net/kcm/kcmproc.c
@@ -0,0 +1,426 @@
1#include <linux/in.h>
2#include <linux/inet.h>
3#include <linux/list.h>
4#include <linux/module.h>
5#include <linux/net.h>
6#include <linux/proc_fs.h>
7#include <linux/rculist.h>
8#include <linux/seq_file.h>
9#include <linux/socket.h>
10#include <net/inet_sock.h>
11#include <net/kcm.h>
12#include <net/net_namespace.h>
13#include <net/netns/generic.h>
14#include <net/tcp.h>
15
16#ifdef CONFIG_PROC_FS
17struct kcm_seq_muxinfo {
18 char *name;
19 const struct file_operations *seq_fops;
20 const struct seq_operations seq_ops;
21};
22
23static struct kcm_mux *kcm_get_first(struct seq_file *seq)
24{
25 struct net *net = seq_file_net(seq);
26 struct kcm_net *knet = net_generic(net, kcm_net_id);
27
28 return list_first_or_null_rcu(&knet->mux_list,
29 struct kcm_mux, kcm_mux_list);
30}
31
32static struct kcm_mux *kcm_get_next(struct kcm_mux *mux)
33{
34 struct kcm_net *knet = mux->knet;
35
36 return list_next_or_null_rcu(&knet->mux_list, &mux->kcm_mux_list,
37 struct kcm_mux, kcm_mux_list);
38}
39
40static struct kcm_mux *kcm_get_idx(struct seq_file *seq, loff_t pos)
41{
42 struct net *net = seq_file_net(seq);
43 struct kcm_net *knet = net_generic(net, kcm_net_id);
44 struct kcm_mux *m;
45
46 list_for_each_entry_rcu(m, &knet->mux_list, kcm_mux_list) {
47 if (!pos)
48 return m;
49 --pos;
50 }
51 return NULL;
52}
53
54static void *kcm_seq_next(struct seq_file *seq, void *v, loff_t *pos)
55{
56 void *p;
57
58 if (v == SEQ_START_TOKEN)
59 p = kcm_get_first(seq);
60 else
61 p = kcm_get_next(v);
62 ++*pos;
63 return p;
64}
65
66static void *kcm_seq_start(struct seq_file *seq, loff_t *pos)
67 __acquires(rcu)
68{
69 rcu_read_lock();
70
71 if (!*pos)
72 return SEQ_START_TOKEN;
73 else
74 return kcm_get_idx(seq, *pos - 1);
75}
76
77static void kcm_seq_stop(struct seq_file *seq, void *v)
78 __releases(rcu)
79{
80 rcu_read_unlock();
81}
82
83struct kcm_proc_mux_state {
84 struct seq_net_private p;
85 int idx;
86};
87
88static int kcm_seq_open(struct inode *inode, struct file *file)
89{
90 struct kcm_seq_muxinfo *muxinfo = PDE_DATA(inode);
91 int err;
92
93 err = seq_open_net(inode, file, &muxinfo->seq_ops,
94 sizeof(struct kcm_proc_mux_state));
95 if (err < 0)
96 return err;
97 return err;
98}
99
100static void kcm_format_mux_header(struct seq_file *seq)
101{
102 struct net *net = seq_file_net(seq);
103 struct kcm_net *knet = net_generic(net, kcm_net_id);
104
105 seq_printf(seq,
106 "*** KCM statistics (%d MUX) ****\n",
107 knet->count);
108
109 seq_printf(seq,
110 "%-14s %-10s %-16s %-10s %-16s %-8s %-8s %-8s %-8s %s",
111 "Object",
112 "RX-Msgs",
113 "RX-Bytes",
114 "TX-Msgs",
115 "TX-Bytes",
116 "Recv-Q",
117 "Rmem",
118 "Send-Q",
119 "Smem",
120 "Status");
121
122 /* XXX: pdsts header stuff here */
123 seq_puts(seq, "\n");
124}
125
126static void kcm_format_sock(struct kcm_sock *kcm, struct seq_file *seq,
127 int i, int *len)
128{
129 seq_printf(seq,
130 " kcm-%-7u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8s ",
131 kcm->index,
132 kcm->stats.rx_msgs,
133 kcm->stats.rx_bytes,
134 kcm->stats.tx_msgs,
135 kcm->stats.tx_bytes,
136 kcm->sk.sk_receive_queue.qlen,
137 sk_rmem_alloc_get(&kcm->sk),
138 kcm->sk.sk_write_queue.qlen,
139 "-");
140
141 if (kcm->tx_psock)
142 seq_printf(seq, "Psck-%u ", kcm->tx_psock->index);
143
144 if (kcm->tx_wait)
145 seq_puts(seq, "TxWait ");
146
147 if (kcm->tx_wait_more)
148 seq_puts(seq, "WMore ");
149
150 if (kcm->rx_wait)
151 seq_puts(seq, "RxWait ");
152
153 seq_puts(seq, "\n");
154}
155
156static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
157 int i, int *len)
158{
159 seq_printf(seq,
160 " psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ",
161 psock->index,
162 psock->stats.rx_msgs,
163 psock->stats.rx_bytes,
164 psock->stats.tx_msgs,
165 psock->stats.tx_bytes,
166 psock->sk->sk_receive_queue.qlen,
167 atomic_read(&psock->sk->sk_rmem_alloc),
168 psock->sk->sk_write_queue.qlen,
169 atomic_read(&psock->sk->sk_wmem_alloc));
170
171 if (psock->done)
172 seq_puts(seq, "Done ");
173
174 if (psock->tx_stopped)
175 seq_puts(seq, "TxStop ");
176
177 if (psock->rx_stopped)
178 seq_puts(seq, "RxStop ");
179
180 if (psock->tx_kcm)
181 seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index);
182
183 if (psock->ready_rx_msg)
184 seq_puts(seq, "RdyRx ");
185
186 seq_puts(seq, "\n");
187}
188
189static void
190kcm_format_mux(struct kcm_mux *mux, loff_t idx, struct seq_file *seq)
191{
192 int i, len;
193 struct kcm_sock *kcm;
194 struct kcm_psock *psock;
195
196 /* mux information */
197 seq_printf(seq,
198 "%-6s%-8s %-10llu %-16llu %-10llu %-16llu %-8s %-8s %-8s %-8s ",
199 "mux", "",
200 mux->stats.rx_msgs,
201 mux->stats.rx_bytes,
202 mux->stats.tx_msgs,
203 mux->stats.tx_bytes,
204 "-", "-", "-", "-");
205
206 seq_printf(seq, "KCMs: %d, Psocks %d\n",
207 mux->kcm_socks_cnt, mux->psocks_cnt);
208
209 /* kcm sock information */
210 i = 0;
211 spin_lock_bh(&mux->lock);
212 list_for_each_entry(kcm, &mux->kcm_socks, kcm_sock_list) {
213 kcm_format_sock(kcm, seq, i, &len);
214 i++;
215 }
216 i = 0;
217 list_for_each_entry(psock, &mux->psocks, psock_list) {
218 kcm_format_psock(psock, seq, i, &len);
219 i++;
220 }
221 spin_unlock_bh(&mux->lock);
222}
223
224static int kcm_seq_show(struct seq_file *seq, void *v)
225{
226 struct kcm_proc_mux_state *mux_state;
227
228 mux_state = seq->private;
229 if (v == SEQ_START_TOKEN) {
230 mux_state->idx = 0;
231 kcm_format_mux_header(seq);
232 } else {
233 kcm_format_mux(v, mux_state->idx, seq);
234 mux_state->idx++;
235 }
236 return 0;
237}
238
239static const struct file_operations kcm_seq_fops = {
240 .owner = THIS_MODULE,
241 .open = kcm_seq_open,
242 .read = seq_read,
243 .llseek = seq_lseek,
244};
245
246static struct kcm_seq_muxinfo kcm_seq_muxinfo = {
247 .name = "kcm",
248 .seq_fops = &kcm_seq_fops,
249 .seq_ops = {
250 .show = kcm_seq_show,
251 .start = kcm_seq_start,
252 .next = kcm_seq_next,
253 .stop = kcm_seq_stop,
254 }
255};
256
257static int kcm_proc_register(struct net *net, struct kcm_seq_muxinfo *muxinfo)
258{
259 struct proc_dir_entry *p;
260 int rc = 0;
261
262 p = proc_create_data(muxinfo->name, S_IRUGO, net->proc_net,
263 muxinfo->seq_fops, muxinfo);
264 if (!p)
265 rc = -ENOMEM;
266 return rc;
267}
268EXPORT_SYMBOL(kcm_proc_register);
269
270static void kcm_proc_unregister(struct net *net,
271 struct kcm_seq_muxinfo *muxinfo)
272{
273 remove_proc_entry(muxinfo->name, net->proc_net);
274}
275EXPORT_SYMBOL(kcm_proc_unregister);
276
277static int kcm_stats_seq_show(struct seq_file *seq, void *v)
278{
279 struct kcm_psock_stats psock_stats;
280 struct kcm_mux_stats mux_stats;
281 struct kcm_mux *mux;
282 struct kcm_psock *psock;
283 struct net *net = seq->private;
284 struct kcm_net *knet = net_generic(net, kcm_net_id);
285
286 memset(&mux_stats, 0, sizeof(mux_stats));
287 memset(&psock_stats, 0, sizeof(psock_stats));
288
289 mutex_lock(&knet->mutex);
290
291 aggregate_mux_stats(&knet->aggregate_mux_stats, &mux_stats);
292 aggregate_psock_stats(&knet->aggregate_psock_stats,
293 &psock_stats);
294
295 list_for_each_entry_rcu(mux, &knet->mux_list, kcm_mux_list) {
296 spin_lock_bh(&mux->lock);
297 aggregate_mux_stats(&mux->stats, &mux_stats);
298 aggregate_psock_stats(&mux->aggregate_psock_stats,
299 &psock_stats);
300 list_for_each_entry(psock, &mux->psocks, psock_list)
301 aggregate_psock_stats(&psock->stats, &psock_stats);
302 spin_unlock_bh(&mux->lock);
303 }
304
305 mutex_unlock(&knet->mutex);
306
307 seq_printf(seq,
308 "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s\n",
309 "MUX",
310 "RX-Msgs",
311 "RX-Bytes",
312 "TX-Msgs",
313 "TX-Bytes",
314 "TX-Retries",
315 "Attach",
316 "Unattach",
317 "UnattchRsvd",
318 "RX-RdyDrops");
319
320 seq_printf(seq,
321 "%-8s %-10llu %-16llu %-10llu %-16llu %-10u %-10u %-10u %-10u %-10u\n",
322 "",
323 mux_stats.rx_msgs,
324 mux_stats.rx_bytes,
325 mux_stats.tx_msgs,
326 mux_stats.tx_bytes,
327 mux_stats.tx_retries,
328 mux_stats.psock_attach,
329 mux_stats.psock_unattach_rsvd,
330 mux_stats.psock_unattach,
331 mux_stats.rx_ready_drops);
332
333 seq_printf(seq,
334 "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n",
335 "Psock",
336 "RX-Msgs",
337 "RX-Bytes",
338 "TX-Msgs",
339 "TX-Bytes",
340 "Reserved",
341 "Unreserved",
342 "RX-Aborts",
343 "RX-MemFail",
344 "RX-NeedMor",
345 "RX-BadLen",
346 "RX-TooBig",
347 "RX-Timeout",
348 "TX-Aborts");
349
350 seq_printf(seq,
351 "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n",
352 "",
353 psock_stats.rx_msgs,
354 psock_stats.rx_bytes,
355 psock_stats.tx_msgs,
356 psock_stats.tx_bytes,
357 psock_stats.reserved,
358 psock_stats.unreserved,
359 psock_stats.rx_aborts,
360 psock_stats.rx_mem_fail,
361 psock_stats.rx_need_more_hdr,
362 psock_stats.rx_bad_hdr_len,
363 psock_stats.rx_msg_too_big,
364 psock_stats.rx_msg_timeouts,
365 psock_stats.tx_aborts);
366
367 return 0;
368}
369
370static int kcm_stats_seq_open(struct inode *inode, struct file *file)
371{
372 return single_open_net(inode, file, kcm_stats_seq_show);
373}
374
375static const struct file_operations kcm_stats_seq_fops = {
376 .owner = THIS_MODULE,
377 .open = kcm_stats_seq_open,
378 .read = seq_read,
379 .llseek = seq_lseek,
380 .release = single_release_net,
381};
382
383static int kcm_proc_init_net(struct net *net)
384{
385 int err;
386
387 if (!proc_create("kcm_stats", S_IRUGO, net->proc_net,
388 &kcm_stats_seq_fops)) {
389 err = -ENOMEM;
390 goto out_kcm_stats;
391 }
392
393 err = kcm_proc_register(net, &kcm_seq_muxinfo);
394 if (err)
395 goto out_kcm;
396
397 return 0;
398
399out_kcm:
400 remove_proc_entry("kcm_stats", net->proc_net);
401out_kcm_stats:
402 return err;
403}
404
405static void kcm_proc_exit_net(struct net *net)
406{
407 kcm_proc_unregister(net, &kcm_seq_muxinfo);
408 remove_proc_entry("kcm_stats", net->proc_net);
409}
410
411static struct pernet_operations kcm_net_ops = {
412 .init = kcm_proc_init_net,
413 .exit = kcm_proc_exit_net,
414};
415
416int __init kcm_proc_init(void)
417{
418 return register_pernet_subsys(&kcm_net_ops);
419}
420
421void __exit kcm_proc_exit(void)
422{
423 unregister_pernet_subsys(&kcm_net_ops);
424}
425
426#endif /* CONFIG_PROC_FS */
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
new file mode 100644
index 000000000000..40662d73204f
--- /dev/null
+++ b/net/kcm/kcmsock.c
@@ -0,0 +1,2409 @@
1#include <linux/bpf.h>
2#include <linux/errno.h>
3#include <linux/errqueue.h>
4#include <linux/file.h>
5#include <linux/in.h>
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/net.h>
9#include <linux/netdevice.h>
10#include <linux/poll.h>
11#include <linux/rculist.h>
12#include <linux/skbuff.h>
13#include <linux/socket.h>
14#include <linux/uaccess.h>
15#include <linux/workqueue.h>
16#include <net/kcm.h>
17#include <net/netns/generic.h>
18#include <net/sock.h>
19#include <net/tcp.h>
20#include <uapi/linux/kcm.h>
21
22unsigned int kcm_net_id;
23
24static struct kmem_cache *kcm_psockp __read_mostly;
25static struct kmem_cache *kcm_muxp __read_mostly;
26static struct workqueue_struct *kcm_wq;
27
28static inline struct kcm_sock *kcm_sk(const struct sock *sk)
29{
30 return (struct kcm_sock *)sk;
31}
32
33static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb)
34{
35 return (struct kcm_tx_msg *)skb->cb;
36}
37
38static inline struct kcm_rx_msg *kcm_rx_msg(struct sk_buff *skb)
39{
40 return (struct kcm_rx_msg *)((void *)skb->cb +
41 offsetof(struct qdisc_skb_cb, data));
42}
43
44static void report_csk_error(struct sock *csk, int err)
45{
46 csk->sk_err = EPIPE;
47 csk->sk_error_report(csk);
48}
49
50/* Callback lock held */
51static void kcm_abort_rx_psock(struct kcm_psock *psock, int err,
52 struct sk_buff *skb)
53{
54 struct sock *csk = psock->sk;
55
56 /* Unrecoverable error in receive */
57
58 del_timer(&psock->rx_msg_timer);
59
60 if (psock->rx_stopped)
61 return;
62
63 psock->rx_stopped = 1;
64 KCM_STATS_INCR(psock->stats.rx_aborts);
65
66 /* Report an error on the lower socket */
67 report_csk_error(csk, err);
68}
69
70static void kcm_abort_tx_psock(struct kcm_psock *psock, int err,
71 bool wakeup_kcm)
72{
73 struct sock *csk = psock->sk;
74 struct kcm_mux *mux = psock->mux;
75
76 /* Unrecoverable error in transmit */
77
78 spin_lock_bh(&mux->lock);
79
80 if (psock->tx_stopped) {
81 spin_unlock_bh(&mux->lock);
82 return;
83 }
84
85 psock->tx_stopped = 1;
86 KCM_STATS_INCR(psock->stats.tx_aborts);
87
88 if (!psock->tx_kcm) {
89 /* Take off psocks_avail list */
90 list_del(&psock->psock_avail_list);
91 } else if (wakeup_kcm) {
92 /* In this case psock is being aborted while outside of
93 * write_msgs and psock is reserved. Schedule tx_work
94 * to handle the failure there. Need to commit tx_stopped
95 * before queuing work.
96 */
97 smp_mb();
98
99 queue_work(kcm_wq, &psock->tx_kcm->tx_work);
100 }
101
102 spin_unlock_bh(&mux->lock);
103
104 /* Report error on lower socket */
105 report_csk_error(csk, err);
106}
107
108/* RX mux lock held. */
109static void kcm_update_rx_mux_stats(struct kcm_mux *mux,
110 struct kcm_psock *psock)
111{
112 KCM_STATS_ADD(mux->stats.rx_bytes,
113 psock->stats.rx_bytes - psock->saved_rx_bytes);
114 mux->stats.rx_msgs +=
115 psock->stats.rx_msgs - psock->saved_rx_msgs;
116 psock->saved_rx_msgs = psock->stats.rx_msgs;
117 psock->saved_rx_bytes = psock->stats.rx_bytes;
118}
119
120static void kcm_update_tx_mux_stats(struct kcm_mux *mux,
121 struct kcm_psock *psock)
122{
123 KCM_STATS_ADD(mux->stats.tx_bytes,
124 psock->stats.tx_bytes - psock->saved_tx_bytes);
125 mux->stats.tx_msgs +=
126 psock->stats.tx_msgs - psock->saved_tx_msgs;
127 psock->saved_tx_msgs = psock->stats.tx_msgs;
128 psock->saved_tx_bytes = psock->stats.tx_bytes;
129}
130
131static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
132
133/* KCM is ready to receive messages on its queue-- either the KCM is new or
134 * has become unblocked after being blocked on full socket buffer. Queue any
135 * pending ready messages on a psock. RX mux lock held.
136 */
137static void kcm_rcv_ready(struct kcm_sock *kcm)
138{
139 struct kcm_mux *mux = kcm->mux;
140 struct kcm_psock *psock;
141 struct sk_buff *skb;
142
143 if (unlikely(kcm->rx_wait || kcm->rx_psock || kcm->rx_disabled))
144 return;
145
146 while (unlikely((skb = __skb_dequeue(&mux->rx_hold_queue)))) {
147 if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
148 /* Assuming buffer limit has been reached */
149 skb_queue_head(&mux->rx_hold_queue, skb);
150 WARN_ON(!sk_rmem_alloc_get(&kcm->sk));
151 return;
152 }
153 }
154
155 while (!list_empty(&mux->psocks_ready)) {
156 psock = list_first_entry(&mux->psocks_ready, struct kcm_psock,
157 psock_ready_list);
158
159 if (kcm_queue_rcv_skb(&kcm->sk, psock->ready_rx_msg)) {
160 /* Assuming buffer limit has been reached */
161 WARN_ON(!sk_rmem_alloc_get(&kcm->sk));
162 return;
163 }
164
165 /* Consumed the ready message on the psock. Schedule rx_work to
166 * get more messages.
167 */
168 list_del(&psock->psock_ready_list);
169 psock->ready_rx_msg = NULL;
170
171 /* Commit clearing of ready_rx_msg for queuing work */
172 smp_mb();
173
174 queue_work(kcm_wq, &psock->rx_work);
175 }
176
177 /* Buffer limit is okay now, add to ready list */
178 list_add_tail(&kcm->wait_rx_list,
179 &kcm->mux->kcm_rx_waiters);
180 kcm->rx_wait = true;
181}
182
183static void kcm_rfree(struct sk_buff *skb)
184{
185 struct sock *sk = skb->sk;
186 struct kcm_sock *kcm = kcm_sk(sk);
187 struct kcm_mux *mux = kcm->mux;
188 unsigned int len = skb->truesize;
189
190 sk_mem_uncharge(sk, len);
191 atomic_sub(len, &sk->sk_rmem_alloc);
192
193 /* For reading rx_wait and rx_psock without holding lock */
194 smp_mb__after_atomic();
195
196 if (!kcm->rx_wait && !kcm->rx_psock &&
197 sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) {
198 spin_lock_bh(&mux->rx_lock);
199 kcm_rcv_ready(kcm);
200 spin_unlock_bh(&mux->rx_lock);
201 }
202}
203
204static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
205{
206 struct sk_buff_head *list = &sk->sk_receive_queue;
207
208 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
209 return -ENOMEM;
210
211 if (!sk_rmem_schedule(sk, skb, skb->truesize))
212 return -ENOBUFS;
213
214 skb->dev = NULL;
215
216 skb_orphan(skb);
217 skb->sk = sk;
218 skb->destructor = kcm_rfree;
219 atomic_add(skb->truesize, &sk->sk_rmem_alloc);
220 sk_mem_charge(sk, skb->truesize);
221
222 skb_queue_tail(list, skb);
223
224 if (!sock_flag(sk, SOCK_DEAD))
225 sk->sk_data_ready(sk);
226
227 return 0;
228}
229
230/* Requeue received messages for a kcm socket to other kcm sockets. This is
231 * called with a kcm socket is receive disabled.
232 * RX mux lock held.
233 */
234static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head)
235{
236 struct sk_buff *skb;
237 struct kcm_sock *kcm;
238
239 while ((skb = __skb_dequeue(head))) {
240 /* Reset destructor to avoid calling kcm_rcv_ready */
241 skb->destructor = sock_rfree;
242 skb_orphan(skb);
243try_again:
244 if (list_empty(&mux->kcm_rx_waiters)) {
245 skb_queue_tail(&mux->rx_hold_queue, skb);
246 continue;
247 }
248
249 kcm = list_first_entry(&mux->kcm_rx_waiters,
250 struct kcm_sock, wait_rx_list);
251
252 if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
253 /* Should mean socket buffer full */
254 list_del(&kcm->wait_rx_list);
255 kcm->rx_wait = false;
256
257 /* Commit rx_wait to read in kcm_free */
258 smp_wmb();
259
260 goto try_again;
261 }
262 }
263}
264
265/* Lower sock lock held */
266static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock,
267 struct sk_buff *head)
268{
269 struct kcm_mux *mux = psock->mux;
270 struct kcm_sock *kcm;
271
272 WARN_ON(psock->ready_rx_msg);
273
274 if (psock->rx_kcm)
275 return psock->rx_kcm;
276
277 spin_lock_bh(&mux->rx_lock);
278
279 if (psock->rx_kcm) {
280 spin_unlock_bh(&mux->rx_lock);
281 return psock->rx_kcm;
282 }
283
284 kcm_update_rx_mux_stats(mux, psock);
285
286 if (list_empty(&mux->kcm_rx_waiters)) {
287 psock->ready_rx_msg = head;
288 list_add_tail(&psock->psock_ready_list,
289 &mux->psocks_ready);
290 spin_unlock_bh(&mux->rx_lock);
291 return NULL;
292 }
293
294 kcm = list_first_entry(&mux->kcm_rx_waiters,
295 struct kcm_sock, wait_rx_list);
296 list_del(&kcm->wait_rx_list);
297 kcm->rx_wait = false;
298
299 psock->rx_kcm = kcm;
300 kcm->rx_psock = psock;
301
302 spin_unlock_bh(&mux->rx_lock);
303
304 return kcm;
305}
306
307static void kcm_done(struct kcm_sock *kcm);
308
309static void kcm_done_work(struct work_struct *w)
310{
311 kcm_done(container_of(w, struct kcm_sock, done_work));
312}
313
314/* Lower sock held */
315static void unreserve_rx_kcm(struct kcm_psock *psock,
316 bool rcv_ready)
317{
318 struct kcm_sock *kcm = psock->rx_kcm;
319 struct kcm_mux *mux = psock->mux;
320
321 if (!kcm)
322 return;
323
324 spin_lock_bh(&mux->rx_lock);
325
326 psock->rx_kcm = NULL;
327 kcm->rx_psock = NULL;
328
329 /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with
330 * kcm_rfree
331 */
332 smp_mb();
333
334 if (unlikely(kcm->done)) {
335 spin_unlock_bh(&mux->rx_lock);
336
337 /* Need to run kcm_done in a task since we need to qcquire
338 * callback locks which may already be held here.
339 */
340 INIT_WORK(&kcm->done_work, kcm_done_work);
341 schedule_work(&kcm->done_work);
342 return;
343 }
344
345 if (unlikely(kcm->rx_disabled)) {
346 requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
347 } else if (rcv_ready || unlikely(!sk_rmem_alloc_get(&kcm->sk))) {
348 /* Check for degenerative race with rx_wait that all
349 * data was dequeued (accounted for in kcm_rfree).
350 */
351 kcm_rcv_ready(kcm);
352 }
353 spin_unlock_bh(&mux->rx_lock);
354}
355
356static void kcm_start_rx_timer(struct kcm_psock *psock)
357{
358 if (psock->sk->sk_rcvtimeo)
359 mod_timer(&psock->rx_msg_timer, psock->sk->sk_rcvtimeo);
360}
361
362/* Macro to invoke filter function. */
363#define KCM_RUN_FILTER(prog, ctx) \
364 (*prog->bpf_func)(ctx, prog->insnsi)
365
366/* Lower socket lock held */
367static int kcm_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
368 unsigned int orig_offset, size_t orig_len)
369{
370 struct kcm_psock *psock = (struct kcm_psock *)desc->arg.data;
371 struct kcm_rx_msg *rxm;
372 struct kcm_sock *kcm;
373 struct sk_buff *head, *skb;
374 size_t eaten = 0, cand_len;
375 ssize_t extra;
376 int err;
377 bool cloned_orig = false;
378
379 if (psock->ready_rx_msg)
380 return 0;
381
382 head = psock->rx_skb_head;
383 if (head) {
384 /* Message already in progress */
385
386 rxm = kcm_rx_msg(head);
387 if (unlikely(rxm->early_eaten)) {
388 /* Already some number of bytes on the receive sock
389 * data saved in rx_skb_head, just indicate they
390 * are consumed.
391 */
392 eaten = orig_len <= rxm->early_eaten ?
393 orig_len : rxm->early_eaten;
394 rxm->early_eaten -= eaten;
395
396 return eaten;
397 }
398
399 if (unlikely(orig_offset)) {
400 /* Getting data with a non-zero offset when a message is
401 * in progress is not expected. If it does happen, we
402 * need to clone and pull since we can't deal with
403 * offsets in the skbs for a message expect in the head.
404 */
405 orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
406 if (!orig_skb) {
407 KCM_STATS_INCR(psock->stats.rx_mem_fail);
408 desc->error = -ENOMEM;
409 return 0;
410 }
411 if (!pskb_pull(orig_skb, orig_offset)) {
412 KCM_STATS_INCR(psock->stats.rx_mem_fail);
413 kfree_skb(orig_skb);
414 desc->error = -ENOMEM;
415 return 0;
416 }
417 cloned_orig = true;
418 orig_offset = 0;
419 }
420
421 if (!psock->rx_skb_nextp) {
422 /* We are going to append to the frags_list of head.
423 * Need to unshare the frag_list.
424 */
425 err = skb_unclone(head, GFP_ATOMIC);
426 if (err) {
427 KCM_STATS_INCR(psock->stats.rx_mem_fail);
428 desc->error = err;
429 return 0;
430 }
431
432 if (unlikely(skb_shinfo(head)->frag_list)) {
433 /* We can't append to an sk_buff that already
434 * has a frag_list. We create a new head, point
435 * the frag_list of that to the old head, and
436 * then are able to use the old head->next for
437 * appending to the message.
438 */
439 if (WARN_ON(head->next)) {
440 desc->error = -EINVAL;
441 return 0;
442 }
443
444 skb = alloc_skb(0, GFP_ATOMIC);
445 if (!skb) {
446 KCM_STATS_INCR(psock->stats.rx_mem_fail);
447 desc->error = -ENOMEM;
448 return 0;
449 }
450 skb->len = head->len;
451 skb->data_len = head->len;
452 skb->truesize = head->truesize;
453 *kcm_rx_msg(skb) = *kcm_rx_msg(head);
454 psock->rx_skb_nextp = &head->next;
455 skb_shinfo(skb)->frag_list = head;
456 psock->rx_skb_head = skb;
457 head = skb;
458 } else {
459 psock->rx_skb_nextp =
460 &skb_shinfo(head)->frag_list;
461 }
462 }
463 }
464
465 while (eaten < orig_len) {
466 /* Always clone since we will consume something */
467 skb = skb_clone(orig_skb, GFP_ATOMIC);
468 if (!skb) {
469 KCM_STATS_INCR(psock->stats.rx_mem_fail);
470 desc->error = -ENOMEM;
471 break;
472 }
473
474 cand_len = orig_len - eaten;
475
476 head = psock->rx_skb_head;
477 if (!head) {
478 head = skb;
479 psock->rx_skb_head = head;
480 /* Will set rx_skb_nextp on next packet if needed */
481 psock->rx_skb_nextp = NULL;
482 rxm = kcm_rx_msg(head);
483 memset(rxm, 0, sizeof(*rxm));
484 rxm->offset = orig_offset + eaten;
485 } else {
486 /* Unclone since we may be appending to an skb that we
487 * already share a frag_list with.
488 */
489 err = skb_unclone(skb, GFP_ATOMIC);
490 if (err) {
491 KCM_STATS_INCR(psock->stats.rx_mem_fail);
492 desc->error = err;
493 break;
494 }
495
496 rxm = kcm_rx_msg(head);
497 *psock->rx_skb_nextp = skb;
498 psock->rx_skb_nextp = &skb->next;
499 head->data_len += skb->len;
500 head->len += skb->len;
501 head->truesize += skb->truesize;
502 }
503
504 if (!rxm->full_len) {
505 ssize_t len;
506
507 len = KCM_RUN_FILTER(psock->bpf_prog, head);
508
509 if (!len) {
510 /* Need more header to determine length */
511 if (!rxm->accum_len) {
512 /* Start RX timer for new message */
513 kcm_start_rx_timer(psock);
514 }
515 rxm->accum_len += cand_len;
516 eaten += cand_len;
517 KCM_STATS_INCR(psock->stats.rx_need_more_hdr);
518 WARN_ON(eaten != orig_len);
519 break;
520 } else if (len > psock->sk->sk_rcvbuf) {
521 /* Message length exceeds maximum allowed */
522 KCM_STATS_INCR(psock->stats.rx_msg_too_big);
523 desc->error = -EMSGSIZE;
524 psock->rx_skb_head = NULL;
525 kcm_abort_rx_psock(psock, EMSGSIZE, head);
526 break;
527 } else if (len <= (ssize_t)head->len -
528 skb->len - rxm->offset) {
529 /* Length must be into new skb (and also
530 * greater than zero)
531 */
532 KCM_STATS_INCR(psock->stats.rx_bad_hdr_len);
533 desc->error = -EPROTO;
534 psock->rx_skb_head = NULL;
535 kcm_abort_rx_psock(psock, EPROTO, head);
536 break;
537 }
538
539 rxm->full_len = len;
540 }
541
542 extra = (ssize_t)(rxm->accum_len + cand_len) - rxm->full_len;
543
544 if (extra < 0) {
545 /* Message not complete yet. */
546 if (rxm->full_len - rxm->accum_len >
547 tcp_inq(psock->sk)) {
548 /* Don't have the whole messages in the socket
549 * buffer. Set psock->rx_need_bytes to wait for
550 * the rest of the message. Also, set "early
551 * eaten" since we've already buffered the skb
552 * but don't consume yet per tcp_read_sock.
553 */
554
555 if (!rxm->accum_len) {
556 /* Start RX timer for new message */
557 kcm_start_rx_timer(psock);
558 }
559
560 psock->rx_need_bytes = rxm->full_len -
561 rxm->accum_len;
562 rxm->accum_len += cand_len;
563 rxm->early_eaten = cand_len;
564 KCM_STATS_ADD(psock->stats.rx_bytes, cand_len);
565 desc->count = 0; /* Stop reading socket */
566 break;
567 }
568 rxm->accum_len += cand_len;
569 eaten += cand_len;
570 WARN_ON(eaten != orig_len);
571 break;
572 }
573
574 /* Positive extra indicates ore bytes than needed for the
575 * message
576 */
577
578 WARN_ON(extra > cand_len);
579
580 eaten += (cand_len - extra);
581
582 /* Hurray, we have a new message! */
583 del_timer(&psock->rx_msg_timer);
584 psock->rx_skb_head = NULL;
585 KCM_STATS_INCR(psock->stats.rx_msgs);
586
587try_queue:
588 kcm = reserve_rx_kcm(psock, head);
589 if (!kcm) {
590 /* Unable to reserve a KCM, message is held in psock. */
591 break;
592 }
593
594 if (kcm_queue_rcv_skb(&kcm->sk, head)) {
595 /* Should mean socket buffer full */
596 unreserve_rx_kcm(psock, false);
597 goto try_queue;
598 }
599 }
600
601 if (cloned_orig)
602 kfree_skb(orig_skb);
603
604 KCM_STATS_ADD(psock->stats.rx_bytes, eaten);
605
606 return eaten;
607}
608
609/* Called with lock held on lower socket */
610static int psock_tcp_read_sock(struct kcm_psock *psock)
611{
612 read_descriptor_t desc;
613
614 desc.arg.data = psock;
615 desc.error = 0;
616 desc.count = 1; /* give more than one skb per call */
617
618 /* sk should be locked here, so okay to do tcp_read_sock */
619 tcp_read_sock(psock->sk, &desc, kcm_tcp_recv);
620
621 unreserve_rx_kcm(psock, true);
622
623 return desc.error;
624}
625
626/* Lower sock lock held */
627static void psock_tcp_data_ready(struct sock *sk)
628{
629 struct kcm_psock *psock;
630
631 read_lock_bh(&sk->sk_callback_lock);
632
633 psock = (struct kcm_psock *)sk->sk_user_data;
634 if (unlikely(!psock || psock->rx_stopped))
635 goto out;
636
637 if (psock->ready_rx_msg)
638 goto out;
639
640 if (psock->rx_need_bytes) {
641 if (tcp_inq(sk) >= psock->rx_need_bytes)
642 psock->rx_need_bytes = 0;
643 else
644 goto out;
645 }
646
647 if (psock_tcp_read_sock(psock) == -ENOMEM)
648 queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0);
649
650out:
651 read_unlock_bh(&sk->sk_callback_lock);
652}
653
654static void do_psock_rx_work(struct kcm_psock *psock)
655{
656 read_descriptor_t rd_desc;
657 struct sock *csk = psock->sk;
658
659 /* We need the read lock to synchronize with psock_tcp_data_ready. We
660 * need the socket lock for calling tcp_read_sock.
661 */
662 lock_sock(csk);
663 read_lock_bh(&csk->sk_callback_lock);
664
665 if (unlikely(csk->sk_user_data != psock))
666 goto out;
667
668 if (unlikely(psock->rx_stopped))
669 goto out;
670
671 if (psock->ready_rx_msg)
672 goto out;
673
674 rd_desc.arg.data = psock;
675
676 if (psock_tcp_read_sock(psock) == -ENOMEM)
677 queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0);
678
679out:
680 read_unlock_bh(&csk->sk_callback_lock);
681 release_sock(csk);
682}
683
684static void psock_rx_work(struct work_struct *w)
685{
686 do_psock_rx_work(container_of(w, struct kcm_psock, rx_work));
687}
688
689static void psock_rx_delayed_work(struct work_struct *w)
690{
691 do_psock_rx_work(container_of(w, struct kcm_psock,
692 rx_delayed_work.work));
693}
694
695static void psock_tcp_state_change(struct sock *sk)
696{
697 /* TCP only does a POLLIN for a half close. Do a POLLHUP here
698 * since application will normally not poll with POLLIN
699 * on the TCP sockets.
700 */
701
702 report_csk_error(sk, EPIPE);
703}
704
705static void psock_tcp_write_space(struct sock *sk)
706{
707 struct kcm_psock *psock;
708 struct kcm_mux *mux;
709 struct kcm_sock *kcm;
710
711 read_lock_bh(&sk->sk_callback_lock);
712
713 psock = (struct kcm_psock *)sk->sk_user_data;
714 if (unlikely(!psock))
715 goto out;
716
717 mux = psock->mux;
718
719 spin_lock_bh(&mux->lock);
720
721 /* Check if the socket is reserved so someone is waiting for sending. */
722 kcm = psock->tx_kcm;
723 if (kcm)
724 queue_work(kcm_wq, &kcm->tx_work);
725
726 spin_unlock_bh(&mux->lock);
727out:
728 read_unlock_bh(&sk->sk_callback_lock);
729}
730
731static void unreserve_psock(struct kcm_sock *kcm);
732
733/* kcm sock is locked. */
734static struct kcm_psock *reserve_psock(struct kcm_sock *kcm)
735{
736 struct kcm_mux *mux = kcm->mux;
737 struct kcm_psock *psock;
738
739 psock = kcm->tx_psock;
740
741 smp_rmb(); /* Must read tx_psock before tx_wait */
742
743 if (psock) {
744 WARN_ON(kcm->tx_wait);
745 if (unlikely(psock->tx_stopped))
746 unreserve_psock(kcm);
747 else
748 return kcm->tx_psock;
749 }
750
751 spin_lock_bh(&mux->lock);
752
753 /* Check again under lock to see if psock was reserved for this
754 * psock via psock_unreserve.
755 */
756 psock = kcm->tx_psock;
757 if (unlikely(psock)) {
758 WARN_ON(kcm->tx_wait);
759 spin_unlock_bh(&mux->lock);
760 return kcm->tx_psock;
761 }
762
763 if (!list_empty(&mux->psocks_avail)) {
764 psock = list_first_entry(&mux->psocks_avail,
765 struct kcm_psock,
766 psock_avail_list);
767 list_del(&psock->psock_avail_list);
768 if (kcm->tx_wait) {
769 list_del(&kcm->wait_psock_list);
770 kcm->tx_wait = false;
771 }
772 kcm->tx_psock = psock;
773 psock->tx_kcm = kcm;
774 KCM_STATS_INCR(psock->stats.reserved);
775 } else if (!kcm->tx_wait) {
776 list_add_tail(&kcm->wait_psock_list,
777 &mux->kcm_tx_waiters);
778 kcm->tx_wait = true;
779 }
780
781 spin_unlock_bh(&mux->lock);
782
783 return psock;
784}
785
786/* mux lock held */
787static void psock_now_avail(struct kcm_psock *psock)
788{
789 struct kcm_mux *mux = psock->mux;
790 struct kcm_sock *kcm;
791
792 if (list_empty(&mux->kcm_tx_waiters)) {
793 list_add_tail(&psock->psock_avail_list,
794 &mux->psocks_avail);
795 } else {
796 kcm = list_first_entry(&mux->kcm_tx_waiters,
797 struct kcm_sock,
798 wait_psock_list);
799 list_del(&kcm->wait_psock_list);
800 kcm->tx_wait = false;
801 psock->tx_kcm = kcm;
802
803 /* Commit before changing tx_psock since that is read in
804 * reserve_psock before queuing work.
805 */
806 smp_mb();
807
808 kcm->tx_psock = psock;
809 KCM_STATS_INCR(psock->stats.reserved);
810 queue_work(kcm_wq, &kcm->tx_work);
811 }
812}
813
814/* kcm sock is locked. */
815static void unreserve_psock(struct kcm_sock *kcm)
816{
817 struct kcm_psock *psock;
818 struct kcm_mux *mux = kcm->mux;
819
820 spin_lock_bh(&mux->lock);
821
822 psock = kcm->tx_psock;
823
824 if (WARN_ON(!psock)) {
825 spin_unlock_bh(&mux->lock);
826 return;
827 }
828
829 smp_rmb(); /* Read tx_psock before tx_wait */
830
831 kcm_update_tx_mux_stats(mux, psock);
832
833 WARN_ON(kcm->tx_wait);
834
835 kcm->tx_psock = NULL;
836 psock->tx_kcm = NULL;
837 KCM_STATS_INCR(psock->stats.unreserved);
838
839 if (unlikely(psock->tx_stopped)) {
840 if (psock->done) {
841 /* Deferred free */
842 list_del(&psock->psock_list);
843 mux->psocks_cnt--;
844 sock_put(psock->sk);
845 fput(psock->sk->sk_socket->file);
846 kmem_cache_free(kcm_psockp, psock);
847 }
848
849 /* Don't put back on available list */
850
851 spin_unlock_bh(&mux->lock);
852
853 return;
854 }
855
856 psock_now_avail(psock);
857
858 spin_unlock_bh(&mux->lock);
859}
860
861static void kcm_report_tx_retry(struct kcm_sock *kcm)
862{
863 struct kcm_mux *mux = kcm->mux;
864
865 spin_lock_bh(&mux->lock);
866 KCM_STATS_INCR(mux->stats.tx_retries);
867 spin_unlock_bh(&mux->lock);
868}
869
870/* Write any messages ready on the kcm socket. Called with kcm sock lock
871 * held. Return bytes actually sent or error.
872 */
873static int kcm_write_msgs(struct kcm_sock *kcm)
874{
875 struct sock *sk = &kcm->sk;
876 struct kcm_psock *psock;
877 struct sk_buff *skb, *head;
878 struct kcm_tx_msg *txm;
879 unsigned short fragidx, frag_offset;
880 unsigned int sent, total_sent = 0;
881 int ret = 0;
882
883 kcm->tx_wait_more = false;
884 psock = kcm->tx_psock;
885 if (unlikely(psock && psock->tx_stopped)) {
886 /* A reserved psock was aborted asynchronously. Unreserve
887 * it and we'll retry the message.
888 */
889 unreserve_psock(kcm);
890 kcm_report_tx_retry(kcm);
891 if (skb_queue_empty(&sk->sk_write_queue))
892 return 0;
893
894 kcm_tx_msg(skb_peek(&sk->sk_write_queue))->sent = 0;
895
896 } else if (skb_queue_empty(&sk->sk_write_queue)) {
897 return 0;
898 }
899
900 head = skb_peek(&sk->sk_write_queue);
901 txm = kcm_tx_msg(head);
902
903 if (txm->sent) {
904 /* Send of first skbuff in queue already in progress */
905 if (WARN_ON(!psock)) {
906 ret = -EINVAL;
907 goto out;
908 }
909 sent = txm->sent;
910 frag_offset = txm->frag_offset;
911 fragidx = txm->fragidx;
912 skb = txm->frag_skb;
913
914 goto do_frag;
915 }
916
917try_again:
918 psock = reserve_psock(kcm);
919 if (!psock)
920 goto out;
921
922 do {
923 skb = head;
924 txm = kcm_tx_msg(head);
925 sent = 0;
926
927do_frag_list:
928 if (WARN_ON(!skb_shinfo(skb)->nr_frags)) {
929 ret = -EINVAL;
930 goto out;
931 }
932
933 for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags;
934 fragidx++) {
935 skb_frag_t *frag;
936
937 frag_offset = 0;
938do_frag:
939 frag = &skb_shinfo(skb)->frags[fragidx];
940 if (WARN_ON(!frag->size)) {
941 ret = -EINVAL;
942 goto out;
943 }
944
945 ret = kernel_sendpage(psock->sk->sk_socket,
946 frag->page.p,
947 frag->page_offset + frag_offset,
948 frag->size - frag_offset,
949 MSG_DONTWAIT);
950 if (ret <= 0) {
951 if (ret == -EAGAIN) {
952 /* Save state to try again when there's
953 * write space on the socket
954 */
955 txm->sent = sent;
956 txm->frag_offset = frag_offset;
957 txm->fragidx = fragidx;
958 txm->frag_skb = skb;
959
960 ret = 0;
961 goto out;
962 }
963
964 /* Hard failure in sending message, abort this
965 * psock since it has lost framing
966 * synchonization and retry sending the
967 * message from the beginning.
968 */
969 kcm_abort_tx_psock(psock, ret ? -ret : EPIPE,
970 true);
971 unreserve_psock(kcm);
972
973 txm->sent = 0;
974 kcm_report_tx_retry(kcm);
975 ret = 0;
976
977 goto try_again;
978 }
979
980 sent += ret;
981 frag_offset += ret;
982 KCM_STATS_ADD(psock->stats.tx_bytes, ret);
983 if (frag_offset < frag->size) {
984 /* Not finished with this frag */
985 goto do_frag;
986 }
987 }
988
989 if (skb == head) {
990 if (skb_has_frag_list(skb)) {
991 skb = skb_shinfo(skb)->frag_list;
992 goto do_frag_list;
993 }
994 } else if (skb->next) {
995 skb = skb->next;
996 goto do_frag_list;
997 }
998
999 /* Successfully sent the whole packet, account for it. */
1000 skb_dequeue(&sk->sk_write_queue);
1001 kfree_skb(head);
1002 sk->sk_wmem_queued -= sent;
1003 total_sent += sent;
1004 KCM_STATS_INCR(psock->stats.tx_msgs);
1005 } while ((head = skb_peek(&sk->sk_write_queue)));
1006out:
1007 if (!head) {
1008 /* Done with all queued messages. */
1009 WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
1010 unreserve_psock(kcm);
1011 }
1012
1013 /* Check if write space is available */
1014 sk->sk_write_space(sk);
1015
1016 return total_sent ? : ret;
1017}
1018
1019static void kcm_tx_work(struct work_struct *w)
1020{
1021 struct kcm_sock *kcm = container_of(w, struct kcm_sock, tx_work);
1022 struct sock *sk = &kcm->sk;
1023 int err;
1024
1025 lock_sock(sk);
1026
1027 /* Primarily for SOCK_DGRAM sockets, also handle asynchronous tx
1028 * aborts
1029 */
1030 err = kcm_write_msgs(kcm);
1031 if (err < 0) {
1032 /* Hard failure in write, report error on KCM socket */
1033 pr_warn("KCM: Hard failure on kcm_write_msgs %d\n", err);
1034 report_csk_error(&kcm->sk, -err);
1035 goto out;
1036 }
1037
1038 /* Primarily for SOCK_SEQPACKET sockets */
1039 if (likely(sk->sk_socket) &&
1040 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
1041 clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1042 sk->sk_write_space(sk);
1043 }
1044
1045out:
1046 release_sock(sk);
1047}
1048
1049static void kcm_push(struct kcm_sock *kcm)
1050{
1051 if (kcm->tx_wait_more)
1052 kcm_write_msgs(kcm);
1053}
1054
1055static ssize_t kcm_sendpage(struct socket *sock, struct page *page,
1056 int offset, size_t size, int flags)
1057
1058{
1059 struct sock *sk = sock->sk;
1060 struct kcm_sock *kcm = kcm_sk(sk);
1061 struct sk_buff *skb = NULL, *head = NULL;
1062 long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
1063 bool eor;
1064 int err = 0;
1065 int i;
1066
1067 if (flags & MSG_SENDPAGE_NOTLAST)
1068 flags |= MSG_MORE;
1069
1070 /* No MSG_EOR from splice, only look at MSG_MORE */
1071 eor = !(flags & MSG_MORE);
1072
1073 lock_sock(sk);
1074
1075 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1076
1077 err = -EPIPE;
1078 if (sk->sk_err)
1079 goto out_error;
1080
1081 if (kcm->seq_skb) {
1082 /* Previously opened message */
1083 head = kcm->seq_skb;
1084 skb = kcm_tx_msg(head)->last_skb;
1085 i = skb_shinfo(skb)->nr_frags;
1086
1087 if (skb_can_coalesce(skb, i, page, offset)) {
1088 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
1089 skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
1090 goto coalesced;
1091 }
1092
1093 if (i >= MAX_SKB_FRAGS) {
1094 struct sk_buff *tskb;
1095
1096 tskb = alloc_skb(0, sk->sk_allocation);
1097 while (!tskb) {
1098 kcm_push(kcm);
1099 err = sk_stream_wait_memory(sk, &timeo);
1100 if (err)
1101 goto out_error;
1102 }
1103
1104 if (head == skb)
1105 skb_shinfo(head)->frag_list = tskb;
1106 else
1107 skb->next = tskb;
1108
1109 skb = tskb;
1110 skb->ip_summed = CHECKSUM_UNNECESSARY;
1111 i = 0;
1112 }
1113 } else {
1114 /* Call the sk_stream functions to manage the sndbuf mem. */
1115 if (!sk_stream_memory_free(sk)) {
1116 kcm_push(kcm);
1117 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1118 err = sk_stream_wait_memory(sk, &timeo);
1119 if (err)
1120 goto out_error;
1121 }
1122
1123 head = alloc_skb(0, sk->sk_allocation);
1124 while (!head) {
1125 kcm_push(kcm);
1126 err = sk_stream_wait_memory(sk, &timeo);
1127 if (err)
1128 goto out_error;
1129 }
1130
1131 skb = head;
1132 i = 0;
1133 }
1134
1135 get_page(page);
1136 skb_fill_page_desc(skb, i, page, offset, size);
1137 skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
1138
1139coalesced:
1140 skb->len += size;
1141 skb->data_len += size;
1142 skb->truesize += size;
1143 sk->sk_wmem_queued += size;
1144 sk_mem_charge(sk, size);
1145
1146 if (head != skb) {
1147 head->len += size;
1148 head->data_len += size;
1149 head->truesize += size;
1150 }
1151
1152 if (eor) {
1153 bool not_busy = skb_queue_empty(&sk->sk_write_queue);
1154
1155 /* Message complete, queue it on send buffer */
1156 __skb_queue_tail(&sk->sk_write_queue, head);
1157 kcm->seq_skb = NULL;
1158 KCM_STATS_INCR(kcm->stats.tx_msgs);
1159
1160 if (flags & MSG_BATCH) {
1161 kcm->tx_wait_more = true;
1162 } else if (kcm->tx_wait_more || not_busy) {
1163 err = kcm_write_msgs(kcm);
1164 if (err < 0) {
1165 /* We got a hard error in write_msgs but have
1166 * already queued this message. Report an error
1167 * in the socket, but don't affect return value
1168 * from sendmsg
1169 */
1170 pr_warn("KCM: Hard failure on kcm_write_msgs\n");
1171 report_csk_error(&kcm->sk, -err);
1172 }
1173 }
1174 } else {
1175 /* Message not complete, save state */
1176 kcm->seq_skb = head;
1177 kcm_tx_msg(head)->last_skb = skb;
1178 }
1179
1180 KCM_STATS_ADD(kcm->stats.tx_bytes, size);
1181
1182 release_sock(sk);
1183 return size;
1184
1185out_error:
1186 kcm_push(kcm);
1187
1188 err = sk_stream_error(sk, flags, err);
1189
1190 /* make sure we wake any epoll edge trigger waiter */
1191 if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
1192 sk->sk_write_space(sk);
1193
1194 release_sock(sk);
1195 return err;
1196}
1197
1198static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1199{
1200 struct sock *sk = sock->sk;
1201 struct kcm_sock *kcm = kcm_sk(sk);
1202 struct sk_buff *skb = NULL, *head = NULL;
1203 size_t copy, copied = 0;
1204 long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1205 int eor = (sock->type == SOCK_DGRAM) ?
1206 !(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR);
1207 int err = -EPIPE;
1208
1209 lock_sock(sk);
1210
1211 /* Per tcp_sendmsg this should be in poll */
1212 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1213
1214 if (sk->sk_err)
1215 goto out_error;
1216
1217 if (kcm->seq_skb) {
1218 /* Previously opened message */
1219 head = kcm->seq_skb;
1220 skb = kcm_tx_msg(head)->last_skb;
1221 goto start;
1222 }
1223
1224 /* Call the sk_stream functions to manage the sndbuf mem. */
1225 if (!sk_stream_memory_free(sk)) {
1226 kcm_push(kcm);
1227 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1228 err = sk_stream_wait_memory(sk, &timeo);
1229 if (err)
1230 goto out_error;
1231 }
1232
1233 /* New message, alloc head skb */
1234 head = alloc_skb(0, sk->sk_allocation);
1235 while (!head) {
1236 kcm_push(kcm);
1237 err = sk_stream_wait_memory(sk, &timeo);
1238 if (err)
1239 goto out_error;
1240
1241 head = alloc_skb(0, sk->sk_allocation);
1242 }
1243
1244 skb = head;
1245
1246 /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling
1247 * csum_and_copy_from_iter from skb_do_copy_data_nocache.
1248 */
1249 skb->ip_summed = CHECKSUM_UNNECESSARY;
1250
1251start:
1252 while (msg_data_left(msg)) {
1253 bool merge = true;
1254 int i = skb_shinfo(skb)->nr_frags;
1255 struct page_frag *pfrag = sk_page_frag(sk);
1256
1257 if (!sk_page_frag_refill(sk, pfrag))
1258 goto wait_for_memory;
1259
1260 if (!skb_can_coalesce(skb, i, pfrag->page,
1261 pfrag->offset)) {
1262 if (i == MAX_SKB_FRAGS) {
1263 struct sk_buff *tskb;
1264
1265 tskb = alloc_skb(0, sk->sk_allocation);
1266 if (!tskb)
1267 goto wait_for_memory;
1268
1269 if (head == skb)
1270 skb_shinfo(head)->frag_list = tskb;
1271 else
1272 skb->next = tskb;
1273
1274 skb = tskb;
1275 skb->ip_summed = CHECKSUM_UNNECESSARY;
1276 continue;
1277 }
1278 merge = false;
1279 }
1280
1281 copy = min_t(int, msg_data_left(msg),
1282 pfrag->size - pfrag->offset);
1283
1284 if (!sk_wmem_schedule(sk, copy))
1285 goto wait_for_memory;
1286
1287 err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
1288 pfrag->page,
1289 pfrag->offset,
1290 copy);
1291 if (err)
1292 goto out_error;
1293
1294 /* Update the skb. */
1295 if (merge) {
1296 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1297 } else {
1298 skb_fill_page_desc(skb, i, pfrag->page,
1299 pfrag->offset, copy);
1300 get_page(pfrag->page);
1301 }
1302
1303 pfrag->offset += copy;
1304 copied += copy;
1305 if (head != skb) {
1306 head->len += copy;
1307 head->data_len += copy;
1308 }
1309
1310 continue;
1311
1312wait_for_memory:
1313 kcm_push(kcm);
1314 err = sk_stream_wait_memory(sk, &timeo);
1315 if (err)
1316 goto out_error;
1317 }
1318
1319 if (eor) {
1320 bool not_busy = skb_queue_empty(&sk->sk_write_queue);
1321
1322 /* Message complete, queue it on send buffer */
1323 __skb_queue_tail(&sk->sk_write_queue, head);
1324 kcm->seq_skb = NULL;
1325 KCM_STATS_INCR(kcm->stats.tx_msgs);
1326
1327 if (msg->msg_flags & MSG_BATCH) {
1328 kcm->tx_wait_more = true;
1329 } else if (kcm->tx_wait_more || not_busy) {
1330 err = kcm_write_msgs(kcm);
1331 if (err < 0) {
1332 /* We got a hard error in write_msgs but have
1333 * already queued this message. Report an error
1334 * in the socket, but don't affect return value
1335 * from sendmsg
1336 */
1337 pr_warn("KCM: Hard failure on kcm_write_msgs\n");
1338 report_csk_error(&kcm->sk, -err);
1339 }
1340 }
1341 } else {
1342 /* Message not complete, save state */
1343partial_message:
1344 kcm->seq_skb = head;
1345 kcm_tx_msg(head)->last_skb = skb;
1346 }
1347
1348 KCM_STATS_ADD(kcm->stats.tx_bytes, copied);
1349
1350 release_sock(sk);
1351 return copied;
1352
1353out_error:
1354 kcm_push(kcm);
1355
1356 if (copied && sock->type == SOCK_SEQPACKET) {
1357 /* Wrote some bytes before encountering an
1358 * error, return partial success.
1359 */
1360 goto partial_message;
1361 }
1362
1363 if (head != kcm->seq_skb)
1364 kfree_skb(head);
1365
1366 err = sk_stream_error(sk, msg->msg_flags, err);
1367
1368 /* make sure we wake any epoll edge trigger waiter */
1369 if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
1370 sk->sk_write_space(sk);
1371
1372 release_sock(sk);
1373 return err;
1374}
1375
1376static struct sk_buff *kcm_wait_data(struct sock *sk, int flags,
1377 long timeo, int *err)
1378{
1379 struct sk_buff *skb;
1380
1381 while (!(skb = skb_peek(&sk->sk_receive_queue))) {
1382 if (sk->sk_err) {
1383 *err = sock_error(sk);
1384 return NULL;
1385 }
1386
1387 if (sock_flag(sk, SOCK_DONE))
1388 return NULL;
1389
1390 if ((flags & MSG_DONTWAIT) || !timeo) {
1391 *err = -EAGAIN;
1392 return NULL;
1393 }
1394
1395 sk_wait_data(sk, &timeo, NULL);
1396
1397 /* Handle signals */
1398 if (signal_pending(current)) {
1399 *err = sock_intr_errno(timeo);
1400 return NULL;
1401 }
1402 }
1403
1404 return skb;
1405}
1406
1407static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
1408 size_t len, int flags)
1409{
1410 struct sock *sk = sock->sk;
1411 struct kcm_sock *kcm = kcm_sk(sk);
1412 int err = 0;
1413 long timeo;
1414 struct kcm_rx_msg *rxm;
1415 int copied = 0;
1416 struct sk_buff *skb;
1417
1418 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1419
1420 lock_sock(sk);
1421
1422 skb = kcm_wait_data(sk, flags, timeo, &err);
1423 if (!skb)
1424 goto out;
1425
1426 /* Okay, have a message on the receive queue */
1427
1428 rxm = kcm_rx_msg(skb);
1429
1430 if (len > rxm->full_len)
1431 len = rxm->full_len;
1432
1433 err = skb_copy_datagram_msg(skb, rxm->offset, msg, len);
1434 if (err < 0)
1435 goto out;
1436
1437 copied = len;
1438 if (likely(!(flags & MSG_PEEK))) {
1439 KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
1440 if (copied < rxm->full_len) {
1441 if (sock->type == SOCK_DGRAM) {
1442 /* Truncated message */
1443 msg->msg_flags |= MSG_TRUNC;
1444 goto msg_finished;
1445 }
1446 rxm->offset += copied;
1447 rxm->full_len -= copied;
1448 } else {
1449msg_finished:
1450 /* Finished with message */
1451 msg->msg_flags |= MSG_EOR;
1452 KCM_STATS_INCR(kcm->stats.rx_msgs);
1453 skb_unlink(skb, &sk->sk_receive_queue);
1454 kfree_skb(skb);
1455 }
1456 }
1457
1458out:
1459 release_sock(sk);
1460
1461 return copied ? : err;
1462}
1463
1464static ssize_t kcm_sock_splice(struct sock *sk,
1465 struct pipe_inode_info *pipe,
1466 struct splice_pipe_desc *spd)
1467{
1468 int ret;
1469
1470 release_sock(sk);
1471 ret = splice_to_pipe(pipe, spd);
1472 lock_sock(sk);
1473
1474 return ret;
1475}
1476
1477static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
1478 struct pipe_inode_info *pipe, size_t len,
1479 unsigned int flags)
1480{
1481 struct sock *sk = sock->sk;
1482 struct kcm_sock *kcm = kcm_sk(sk);
1483 long timeo;
1484 struct kcm_rx_msg *rxm;
1485 int err = 0;
1486 size_t copied;
1487 struct sk_buff *skb;
1488
1489 /* Only support splice for SOCKSEQPACKET */
1490
1491 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1492
1493 lock_sock(sk);
1494
1495 skb = kcm_wait_data(sk, flags, timeo, &err);
1496 if (!skb)
1497 goto err_out;
1498
1499 /* Okay, have a message on the receive queue */
1500
1501 rxm = kcm_rx_msg(skb);
1502
1503 if (len > rxm->full_len)
1504 len = rxm->full_len;
1505
1506 copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags,
1507 kcm_sock_splice);
1508 if (copied < 0) {
1509 err = copied;
1510 goto err_out;
1511 }
1512
1513 KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
1514
1515 rxm->offset += copied;
1516 rxm->full_len -= copied;
1517
1518 /* We have no way to return MSG_EOR. If all the bytes have been
1519 * read we still leave the message in the receive socket buffer.
1520 * A subsequent recvmsg needs to be done to return MSG_EOR and
1521 * finish reading the message.
1522 */
1523
1524 release_sock(sk);
1525
1526 return copied;
1527
1528err_out:
1529 release_sock(sk);
1530
1531 return err;
1532}
1533
1534/* kcm sock lock held */
1535static void kcm_recv_disable(struct kcm_sock *kcm)
1536{
1537 struct kcm_mux *mux = kcm->mux;
1538
1539 if (kcm->rx_disabled)
1540 return;
1541
1542 spin_lock_bh(&mux->rx_lock);
1543
1544 kcm->rx_disabled = 1;
1545
1546 /* If a psock is reserved we'll do cleanup in unreserve */
1547 if (!kcm->rx_psock) {
1548 if (kcm->rx_wait) {
1549 list_del(&kcm->wait_rx_list);
1550 kcm->rx_wait = false;
1551 }
1552
1553 requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
1554 }
1555
1556 spin_unlock_bh(&mux->rx_lock);
1557}
1558
1559/* kcm sock lock held */
1560static void kcm_recv_enable(struct kcm_sock *kcm)
1561{
1562 struct kcm_mux *mux = kcm->mux;
1563
1564 if (!kcm->rx_disabled)
1565 return;
1566
1567 spin_lock_bh(&mux->rx_lock);
1568
1569 kcm->rx_disabled = 0;
1570 kcm_rcv_ready(kcm);
1571
1572 spin_unlock_bh(&mux->rx_lock);
1573}
1574
1575static int kcm_setsockopt(struct socket *sock, int level, int optname,
1576 char __user *optval, unsigned int optlen)
1577{
1578 struct kcm_sock *kcm = kcm_sk(sock->sk);
1579 int val, valbool;
1580 int err = 0;
1581
1582 if (level != SOL_KCM)
1583 return -ENOPROTOOPT;
1584
1585 if (optlen < sizeof(int))
1586 return -EINVAL;
1587
1588 if (get_user(val, (int __user *)optval))
1589 return -EINVAL;
1590
1591 valbool = val ? 1 : 0;
1592
1593 switch (optname) {
1594 case KCM_RECV_DISABLE:
1595 lock_sock(&kcm->sk);
1596 if (valbool)
1597 kcm_recv_disable(kcm);
1598 else
1599 kcm_recv_enable(kcm);
1600 release_sock(&kcm->sk);
1601 break;
1602 default:
1603 err = -ENOPROTOOPT;
1604 }
1605
1606 return err;
1607}
1608
1609static int kcm_getsockopt(struct socket *sock, int level, int optname,
1610 char __user *optval, int __user *optlen)
1611{
1612 struct kcm_sock *kcm = kcm_sk(sock->sk);
1613 int val, len;
1614
1615 if (level != SOL_KCM)
1616 return -ENOPROTOOPT;
1617
1618 if (get_user(len, optlen))
1619 return -EFAULT;
1620
1621 len = min_t(unsigned int, len, sizeof(int));
1622 if (len < 0)
1623 return -EINVAL;
1624
1625 switch (optname) {
1626 case KCM_RECV_DISABLE:
1627 val = kcm->rx_disabled;
1628 break;
1629 default:
1630 return -ENOPROTOOPT;
1631 }
1632
1633 if (put_user(len, optlen))
1634 return -EFAULT;
1635 if (copy_to_user(optval, &val, len))
1636 return -EFAULT;
1637 return 0;
1638}
1639
1640static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
1641{
1642 struct kcm_sock *tkcm;
1643 struct list_head *head;
1644 int index = 0;
1645
1646 /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so
1647 * we set sk_state, otherwise epoll_wait always returns right away with
1648 * POLLHUP
1649 */
1650 kcm->sk.sk_state = TCP_ESTABLISHED;
1651
1652 /* Add to mux's kcm sockets list */
1653 kcm->mux = mux;
1654 spin_lock_bh(&mux->lock);
1655
1656 head = &mux->kcm_socks;
1657 list_for_each_entry(tkcm, &mux->kcm_socks, kcm_sock_list) {
1658 if (tkcm->index != index)
1659 break;
1660 head = &tkcm->kcm_sock_list;
1661 index++;
1662 }
1663
1664 list_add(&kcm->kcm_sock_list, head);
1665 kcm->index = index;
1666
1667 mux->kcm_socks_cnt++;
1668 spin_unlock_bh(&mux->lock);
1669
1670 INIT_WORK(&kcm->tx_work, kcm_tx_work);
1671
1672 spin_lock_bh(&mux->rx_lock);
1673 kcm_rcv_ready(kcm);
1674 spin_unlock_bh(&mux->rx_lock);
1675}
1676
1677static void kcm_rx_msg_timeout(unsigned long arg)
1678{
1679 struct kcm_psock *psock = (struct kcm_psock *)arg;
1680
1681 /* Message assembly timed out */
1682 KCM_STATS_INCR(psock->stats.rx_msg_timeouts);
1683 kcm_abort_rx_psock(psock, ETIMEDOUT, NULL);
1684}
1685
1686static int kcm_attach(struct socket *sock, struct socket *csock,
1687 struct bpf_prog *prog)
1688{
1689 struct kcm_sock *kcm = kcm_sk(sock->sk);
1690 struct kcm_mux *mux = kcm->mux;
1691 struct sock *csk;
1692 struct kcm_psock *psock = NULL, *tpsock;
1693 struct list_head *head;
1694 int index = 0;
1695
1696 if (csock->ops->family != PF_INET &&
1697 csock->ops->family != PF_INET6)
1698 return -EINVAL;
1699
1700 csk = csock->sk;
1701 if (!csk)
1702 return -EINVAL;
1703
1704 /* Only support TCP for now */
1705 if (csk->sk_protocol != IPPROTO_TCP)
1706 return -EINVAL;
1707
1708 psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
1709 if (!psock)
1710 return -ENOMEM;
1711
1712 psock->mux = mux;
1713 psock->sk = csk;
1714 psock->bpf_prog = prog;
1715
1716 setup_timer(&psock->rx_msg_timer, kcm_rx_msg_timeout,
1717 (unsigned long)psock);
1718
1719 INIT_WORK(&psock->rx_work, psock_rx_work);
1720 INIT_DELAYED_WORK(&psock->rx_delayed_work, psock_rx_delayed_work);
1721
1722 sock_hold(csk);
1723
1724 write_lock_bh(&csk->sk_callback_lock);
1725 psock->save_data_ready = csk->sk_data_ready;
1726 psock->save_write_space = csk->sk_write_space;
1727 psock->save_state_change = csk->sk_state_change;
1728 csk->sk_user_data = psock;
1729 csk->sk_data_ready = psock_tcp_data_ready;
1730 csk->sk_write_space = psock_tcp_write_space;
1731 csk->sk_state_change = psock_tcp_state_change;
1732 write_unlock_bh(&csk->sk_callback_lock);
1733
1734 /* Finished initialization, now add the psock to the MUX. */
1735 spin_lock_bh(&mux->lock);
1736 head = &mux->psocks;
1737 list_for_each_entry(tpsock, &mux->psocks, psock_list) {
1738 if (tpsock->index != index)
1739 break;
1740 head = &tpsock->psock_list;
1741 index++;
1742 }
1743
1744 list_add(&psock->psock_list, head);
1745 psock->index = index;
1746
1747 KCM_STATS_INCR(mux->stats.psock_attach);
1748 mux->psocks_cnt++;
1749 psock_now_avail(psock);
1750 spin_unlock_bh(&mux->lock);
1751
1752 /* Schedule RX work in case there are already bytes queued */
1753 queue_work(kcm_wq, &psock->rx_work);
1754
1755 return 0;
1756}
1757
1758static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info)
1759{
1760 struct socket *csock;
1761 struct bpf_prog *prog;
1762 int err;
1763
1764 csock = sockfd_lookup(info->fd, &err);
1765 if (!csock)
1766 return -ENOENT;
1767
1768 prog = bpf_prog_get(info->bpf_fd);
1769 if (IS_ERR(prog)) {
1770 err = PTR_ERR(prog);
1771 goto out;
1772 }
1773
1774 if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
1775 bpf_prog_put(prog);
1776 err = -EINVAL;
1777 goto out;
1778 }
1779
1780 err = kcm_attach(sock, csock, prog);
1781 if (err) {
1782 bpf_prog_put(prog);
1783 goto out;
1784 }
1785
1786 /* Keep reference on file also */
1787
1788 return 0;
1789out:
1790 fput(csock->file);
1791 return err;
1792}
1793
1794static void kcm_unattach(struct kcm_psock *psock)
1795{
1796 struct sock *csk = psock->sk;
1797 struct kcm_mux *mux = psock->mux;
1798
1799 /* Stop getting callbacks from TCP socket. After this there should
1800 * be no way to reserve a kcm for this psock.
1801 */
1802 write_lock_bh(&csk->sk_callback_lock);
1803 csk->sk_user_data = NULL;
1804 csk->sk_data_ready = psock->save_data_ready;
1805 csk->sk_write_space = psock->save_write_space;
1806 csk->sk_state_change = psock->save_state_change;
1807 psock->rx_stopped = 1;
1808
1809 if (WARN_ON(psock->rx_kcm)) {
1810 write_unlock_bh(&csk->sk_callback_lock);
1811 return;
1812 }
1813
1814 spin_lock_bh(&mux->rx_lock);
1815
1816 /* Stop receiver activities. After this point psock should not be
1817 * able to get onto ready list either through callbacks or work.
1818 */
1819 if (psock->ready_rx_msg) {
1820 list_del(&psock->psock_ready_list);
1821 kfree_skb(psock->ready_rx_msg);
1822 psock->ready_rx_msg = NULL;
1823 KCM_STATS_INCR(mux->stats.rx_ready_drops);
1824 }
1825
1826 spin_unlock_bh(&mux->rx_lock);
1827
1828 write_unlock_bh(&csk->sk_callback_lock);
1829
1830 del_timer_sync(&psock->rx_msg_timer);
1831 cancel_work_sync(&psock->rx_work);
1832 cancel_delayed_work_sync(&psock->rx_delayed_work);
1833
1834 bpf_prog_put(psock->bpf_prog);
1835
1836 kfree_skb(psock->rx_skb_head);
1837 psock->rx_skb_head = NULL;
1838
1839 spin_lock_bh(&mux->lock);
1840
1841 aggregate_psock_stats(&psock->stats, &mux->aggregate_psock_stats);
1842
1843 KCM_STATS_INCR(mux->stats.psock_unattach);
1844
1845 if (psock->tx_kcm) {
1846 /* psock was reserved. Just mark it finished and we will clean
1847 * up in the kcm paths, we need kcm lock which can not be
1848 * acquired here.
1849 */
1850 KCM_STATS_INCR(mux->stats.psock_unattach_rsvd);
1851 spin_unlock_bh(&mux->lock);
1852
1853 /* We are unattaching a socket that is reserved. Abort the
1854 * socket since we may be out of sync in sending on it. We need
1855 * to do this without the mux lock.
1856 */
1857 kcm_abort_tx_psock(psock, EPIPE, false);
1858
1859 spin_lock_bh(&mux->lock);
1860 if (!psock->tx_kcm) {
1861 /* psock now unreserved in window mux was unlocked */
1862 goto no_reserved;
1863 }
1864 psock->done = 1;
1865
1866 /* Commit done before queuing work to process it */
1867 smp_mb();
1868
1869 /* Queue tx work to make sure psock->done is handled */
1870 queue_work(kcm_wq, &psock->tx_kcm->tx_work);
1871 spin_unlock_bh(&mux->lock);
1872 } else {
1873no_reserved:
1874 if (!psock->tx_stopped)
1875 list_del(&psock->psock_avail_list);
1876 list_del(&psock->psock_list);
1877 mux->psocks_cnt--;
1878 spin_unlock_bh(&mux->lock);
1879
1880 sock_put(csk);
1881 fput(csk->sk_socket->file);
1882 kmem_cache_free(kcm_psockp, psock);
1883 }
1884}
1885
1886static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info)
1887{
1888 struct kcm_sock *kcm = kcm_sk(sock->sk);
1889 struct kcm_mux *mux = kcm->mux;
1890 struct kcm_psock *psock;
1891 struct socket *csock;
1892 struct sock *csk;
1893 int err;
1894
1895 csock = sockfd_lookup(info->fd, &err);
1896 if (!csock)
1897 return -ENOENT;
1898
1899 csk = csock->sk;
1900 if (!csk) {
1901 err = -EINVAL;
1902 goto out;
1903 }
1904
1905 err = -ENOENT;
1906
1907 spin_lock_bh(&mux->lock);
1908
1909 list_for_each_entry(psock, &mux->psocks, psock_list) {
1910 if (psock->sk != csk)
1911 continue;
1912
1913 /* Found the matching psock */
1914
1915 if (psock->unattaching || WARN_ON(psock->done)) {
1916 err = -EALREADY;
1917 break;
1918 }
1919
1920 psock->unattaching = 1;
1921
1922 spin_unlock_bh(&mux->lock);
1923
1924 kcm_unattach(psock);
1925
1926 err = 0;
1927 goto out;
1928 }
1929
1930 spin_unlock_bh(&mux->lock);
1931
1932out:
1933 fput(csock->file);
1934 return err;
1935}
1936
1937static struct proto kcm_proto = {
1938 .name = "KCM",
1939 .owner = THIS_MODULE,
1940 .obj_size = sizeof(struct kcm_sock),
1941};
1942
1943/* Clone a kcm socket. */
1944static int kcm_clone(struct socket *osock, struct kcm_clone *info,
1945 struct socket **newsockp)
1946{
1947 struct socket *newsock;
1948 struct sock *newsk;
1949 struct file *newfile;
1950 int err, newfd;
1951
1952 err = -ENFILE;
1953 newsock = sock_alloc();
1954 if (!newsock)
1955 goto out;
1956
1957 newsock->type = osock->type;
1958 newsock->ops = osock->ops;
1959
1960 __module_get(newsock->ops->owner);
1961
1962 newfd = get_unused_fd_flags(0);
1963 if (unlikely(newfd < 0)) {
1964 err = newfd;
1965 goto out_fd_fail;
1966 }
1967
1968 newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
1969 if (unlikely(IS_ERR(newfile))) {
1970 err = PTR_ERR(newfile);
1971 goto out_sock_alloc_fail;
1972 }
1973
1974 newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL,
1975 &kcm_proto, true);
1976 if (!newsk) {
1977 err = -ENOMEM;
1978 goto out_sk_alloc_fail;
1979 }
1980
1981 sock_init_data(newsock, newsk);
1982 init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux);
1983
1984 fd_install(newfd, newfile);
1985 *newsockp = newsock;
1986 info->fd = newfd;
1987
1988 return 0;
1989
1990out_sk_alloc_fail:
1991 fput(newfile);
1992out_sock_alloc_fail:
1993 put_unused_fd(newfd);
1994out_fd_fail:
1995 sock_release(newsock);
1996out:
1997 return err;
1998}
1999
2000static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2001{
2002 int err;
2003
2004 switch (cmd) {
2005 case SIOCKCMATTACH: {
2006 struct kcm_attach info;
2007
2008 if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
2009 err = -EFAULT;
2010
2011 err = kcm_attach_ioctl(sock, &info);
2012
2013 break;
2014 }
2015 case SIOCKCMUNATTACH: {
2016 struct kcm_unattach info;
2017
2018 if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
2019 err = -EFAULT;
2020
2021 err = kcm_unattach_ioctl(sock, &info);
2022
2023 break;
2024 }
2025 case SIOCKCMCLONE: {
2026 struct kcm_clone info;
2027 struct socket *newsock = NULL;
2028
2029 if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
2030 err = -EFAULT;
2031
2032 err = kcm_clone(sock, &info, &newsock);
2033
2034 if (!err) {
2035 if (copy_to_user((void __user *)arg, &info,
2036 sizeof(info))) {
2037 err = -EFAULT;
2038 sock_release(newsock);
2039 }
2040 }
2041
2042 break;
2043 }
2044 default:
2045 err = -ENOIOCTLCMD;
2046 break;
2047 }
2048
2049 return err;
2050}
2051
2052static void free_mux(struct rcu_head *rcu)
2053{
2054 struct kcm_mux *mux = container_of(rcu,
2055 struct kcm_mux, rcu);
2056
2057 kmem_cache_free(kcm_muxp, mux);
2058}
2059
2060static void release_mux(struct kcm_mux *mux)
2061{
2062 struct kcm_net *knet = mux->knet;
2063 struct kcm_psock *psock, *tmp_psock;
2064
2065 /* Release psocks */
2066 list_for_each_entry_safe(psock, tmp_psock,
2067 &mux->psocks, psock_list) {
2068 if (!WARN_ON(psock->unattaching))
2069 kcm_unattach(psock);
2070 }
2071
2072 if (WARN_ON(mux->psocks_cnt))
2073 return;
2074
2075 __skb_queue_purge(&mux->rx_hold_queue);
2076
2077 mutex_lock(&knet->mutex);
2078 aggregate_mux_stats(&mux->stats, &knet->aggregate_mux_stats);
2079 aggregate_psock_stats(&mux->aggregate_psock_stats,
2080 &knet->aggregate_psock_stats);
2081 list_del_rcu(&mux->kcm_mux_list);
2082 knet->count--;
2083 mutex_unlock(&knet->mutex);
2084
2085 call_rcu(&mux->rcu, free_mux);
2086}
2087
2088static void kcm_done(struct kcm_sock *kcm)
2089{
2090 struct kcm_mux *mux = kcm->mux;
2091 struct sock *sk = &kcm->sk;
2092 int socks_cnt;
2093
2094 spin_lock_bh(&mux->rx_lock);
2095 if (kcm->rx_psock) {
2096 /* Cleanup in unreserve_rx_kcm */
2097 WARN_ON(kcm->done);
2098 kcm->rx_disabled = 1;
2099 kcm->done = 1;
2100 spin_unlock_bh(&mux->rx_lock);
2101 return;
2102 }
2103
2104 if (kcm->rx_wait) {
2105 list_del(&kcm->wait_rx_list);
2106 kcm->rx_wait = false;
2107 }
2108 /* Move any pending receive messages to other kcm sockets */
2109 requeue_rx_msgs(mux, &sk->sk_receive_queue);
2110
2111 spin_unlock_bh(&mux->rx_lock);
2112
2113 if (WARN_ON(sk_rmem_alloc_get(sk)))
2114 return;
2115
2116 /* Detach from MUX */
2117 spin_lock_bh(&mux->lock);
2118
2119 list_del(&kcm->kcm_sock_list);
2120 mux->kcm_socks_cnt--;
2121 socks_cnt = mux->kcm_socks_cnt;
2122
2123 spin_unlock_bh(&mux->lock);
2124
2125 if (!socks_cnt) {
2126 /* We are done with the mux now. */
2127 release_mux(mux);
2128 }
2129
2130 WARN_ON(kcm->rx_wait);
2131
2132 sock_put(&kcm->sk);
2133}
2134
2135/* Called by kcm_release to close a KCM socket.
2136 * If this is the last KCM socket on the MUX, destroy the MUX.
2137 */
2138static int kcm_release(struct socket *sock)
2139{
2140 struct sock *sk = sock->sk;
2141 struct kcm_sock *kcm;
2142 struct kcm_mux *mux;
2143 struct kcm_psock *psock;
2144
2145 if (!sk)
2146 return 0;
2147
2148 kcm = kcm_sk(sk);
2149 mux = kcm->mux;
2150
2151 sock_orphan(sk);
2152 kfree_skb(kcm->seq_skb);
2153
2154 lock_sock(sk);
2155 /* Purge queue under lock to avoid race condition with tx_work trying
2156 * to act when queue is nonempty. If tx_work runs after this point
2157 * it will just return.
2158 */
2159 __skb_queue_purge(&sk->sk_write_queue);
2160 release_sock(sk);
2161
2162 spin_lock_bh(&mux->lock);
2163 if (kcm->tx_wait) {
2164 /* Take of tx_wait list, after this point there should be no way
2165 * that a psock will be assigned to this kcm.
2166 */
2167 list_del(&kcm->wait_psock_list);
2168 kcm->tx_wait = false;
2169 }
2170 spin_unlock_bh(&mux->lock);
2171
2172 /* Cancel work. After this point there should be no outside references
2173 * to the kcm socket.
2174 */
2175 cancel_work_sync(&kcm->tx_work);
2176
2177 lock_sock(sk);
2178 psock = kcm->tx_psock;
2179 if (psock) {
2180 /* A psock was reserved, so we need to kill it since it
2181 * may already have some bytes queued from a message. We
2182 * need to do this after removing kcm from tx_wait list.
2183 */
2184 kcm_abort_tx_psock(psock, EPIPE, false);
2185 unreserve_psock(kcm);
2186 }
2187 release_sock(sk);
2188
2189 WARN_ON(kcm->tx_wait);
2190 WARN_ON(kcm->tx_psock);
2191
2192 sock->sk = NULL;
2193
2194 kcm_done(kcm);
2195
2196 return 0;
2197}
2198
2199static const struct proto_ops kcm_dgram_ops = {
2200 .family = PF_KCM,
2201 .owner = THIS_MODULE,
2202 .release = kcm_release,
2203 .bind = sock_no_bind,
2204 .connect = sock_no_connect,
2205 .socketpair = sock_no_socketpair,
2206 .accept = sock_no_accept,
2207 .getname = sock_no_getname,
2208 .poll = datagram_poll,
2209 .ioctl = kcm_ioctl,
2210 .listen = sock_no_listen,
2211 .shutdown = sock_no_shutdown,
2212 .setsockopt = kcm_setsockopt,
2213 .getsockopt = kcm_getsockopt,
2214 .sendmsg = kcm_sendmsg,
2215 .recvmsg = kcm_recvmsg,
2216 .mmap = sock_no_mmap,
2217 .sendpage = kcm_sendpage,
2218};
2219
2220static const struct proto_ops kcm_seqpacket_ops = {
2221 .family = PF_KCM,
2222 .owner = THIS_MODULE,
2223 .release = kcm_release,
2224 .bind = sock_no_bind,
2225 .connect = sock_no_connect,
2226 .socketpair = sock_no_socketpair,
2227 .accept = sock_no_accept,
2228 .getname = sock_no_getname,
2229 .poll = datagram_poll,
2230 .ioctl = kcm_ioctl,
2231 .listen = sock_no_listen,
2232 .shutdown = sock_no_shutdown,
2233 .setsockopt = kcm_setsockopt,
2234 .getsockopt = kcm_getsockopt,
2235 .sendmsg = kcm_sendmsg,
2236 .recvmsg = kcm_recvmsg,
2237 .mmap = sock_no_mmap,
2238 .sendpage = kcm_sendpage,
2239 .splice_read = kcm_splice_read,
2240};
2241
2242/* Create proto operation for kcm sockets */
2243static int kcm_create(struct net *net, struct socket *sock,
2244 int protocol, int kern)
2245{
2246 struct kcm_net *knet = net_generic(net, kcm_net_id);
2247 struct sock *sk;
2248 struct kcm_mux *mux;
2249
2250 switch (sock->type) {
2251 case SOCK_DGRAM:
2252 sock->ops = &kcm_dgram_ops;
2253 break;
2254 case SOCK_SEQPACKET:
2255 sock->ops = &kcm_seqpacket_ops;
2256 break;
2257 default:
2258 return -ESOCKTNOSUPPORT;
2259 }
2260
2261 if (protocol != KCMPROTO_CONNECTED)
2262 return -EPROTONOSUPPORT;
2263
2264 sk = sk_alloc(net, PF_KCM, GFP_KERNEL, &kcm_proto, kern);
2265 if (!sk)
2266 return -ENOMEM;
2267
2268 /* Allocate a kcm mux, shared between KCM sockets */
2269 mux = kmem_cache_zalloc(kcm_muxp, GFP_KERNEL);
2270 if (!mux) {
2271 sk_free(sk);
2272 return -ENOMEM;
2273 }
2274
2275 spin_lock_init(&mux->lock);
2276 spin_lock_init(&mux->rx_lock);
2277 INIT_LIST_HEAD(&mux->kcm_socks);
2278 INIT_LIST_HEAD(&mux->kcm_rx_waiters);
2279 INIT_LIST_HEAD(&mux->kcm_tx_waiters);
2280
2281 INIT_LIST_HEAD(&mux->psocks);
2282 INIT_LIST_HEAD(&mux->psocks_ready);
2283 INIT_LIST_HEAD(&mux->psocks_avail);
2284
2285 mux->knet = knet;
2286
2287 /* Add new MUX to list */
2288 mutex_lock(&knet->mutex);
2289 list_add_rcu(&mux->kcm_mux_list, &knet->mux_list);
2290 knet->count++;
2291 mutex_unlock(&knet->mutex);
2292
2293 skb_queue_head_init(&mux->rx_hold_queue);
2294
2295 /* Init KCM socket */
2296 sock_init_data(sock, sk);
2297 init_kcm_sock(kcm_sk(sk), mux);
2298
2299 return 0;
2300}
2301
2302static struct net_proto_family kcm_family_ops = {
2303 .family = PF_KCM,
2304 .create = kcm_create,
2305 .owner = THIS_MODULE,
2306};
2307
2308static __net_init int kcm_init_net(struct net *net)
2309{
2310 struct kcm_net *knet = net_generic(net, kcm_net_id);
2311
2312 INIT_LIST_HEAD_RCU(&knet->mux_list);
2313 mutex_init(&knet->mutex);
2314
2315 return 0;
2316}
2317
2318static __net_exit void kcm_exit_net(struct net *net)
2319{
2320 struct kcm_net *knet = net_generic(net, kcm_net_id);
2321
2322 /* All KCM sockets should be closed at this point, which should mean
2323 * that all multiplexors and psocks have been destroyed.
2324 */
2325 WARN_ON(!list_empty(&knet->mux_list));
2326}
2327
2328static struct pernet_operations kcm_net_ops = {
2329 .init = kcm_init_net,
2330 .exit = kcm_exit_net,
2331 .id = &kcm_net_id,
2332 .size = sizeof(struct kcm_net),
2333};
2334
2335static int __init kcm_init(void)
2336{
2337 int err = -ENOMEM;
2338
2339 kcm_muxp = kmem_cache_create("kcm_mux_cache",
2340 sizeof(struct kcm_mux), 0,
2341 SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2342 if (!kcm_muxp)
2343 goto fail;
2344
2345 kcm_psockp = kmem_cache_create("kcm_psock_cache",
2346 sizeof(struct kcm_psock), 0,
2347 SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2348 if (!kcm_psockp)
2349 goto fail;
2350
2351 kcm_wq = create_singlethread_workqueue("kkcmd");
2352 if (!kcm_wq)
2353 goto fail;
2354
2355 err = proto_register(&kcm_proto, 1);
2356 if (err)
2357 goto fail;
2358
2359 err = sock_register(&kcm_family_ops);
2360 if (err)
2361 goto sock_register_fail;
2362
2363 err = register_pernet_device(&kcm_net_ops);
2364 if (err)
2365 goto net_ops_fail;
2366
2367 err = kcm_proc_init();
2368 if (err)
2369 goto proc_init_fail;
2370
2371 return 0;
2372
2373proc_init_fail:
2374 unregister_pernet_device(&kcm_net_ops);
2375
2376net_ops_fail:
2377 sock_unregister(PF_KCM);
2378
2379sock_register_fail:
2380 proto_unregister(&kcm_proto);
2381
2382fail:
2383 kmem_cache_destroy(kcm_muxp);
2384 kmem_cache_destroy(kcm_psockp);
2385
2386 if (kcm_wq)
2387 destroy_workqueue(kcm_wq);
2388
2389 return err;
2390}
2391
2392static void __exit kcm_exit(void)
2393{
2394 kcm_proc_exit();
2395 unregister_pernet_device(&kcm_net_ops);
2396 sock_unregister(PF_KCM);
2397 proto_unregister(&kcm_proto);
2398 destroy_workqueue(kcm_wq);
2399
2400 kmem_cache_destroy(kcm_muxp);
2401 kmem_cache_destroy(kcm_psockp);
2402}
2403
2404module_init(kcm_init);
2405module_exit(kcm_exit);
2406
2407MODULE_LICENSE("GPL");
2408MODULE_ALIAS_NETPROTO(PF_KCM);
2409
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index afca2eb4dfa7..6edfa9980314 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1376,9 +1376,9 @@ static int l2tp_tunnel_sock_create(struct net *net,
1376 memcpy(&udp_conf.peer_ip6, cfg->peer_ip6, 1376 memcpy(&udp_conf.peer_ip6, cfg->peer_ip6,
1377 sizeof(udp_conf.peer_ip6)); 1377 sizeof(udp_conf.peer_ip6));
1378 udp_conf.use_udp6_tx_checksums = 1378 udp_conf.use_udp6_tx_checksums =
1379 cfg->udp6_zero_tx_checksums; 1379 ! cfg->udp6_zero_tx_checksums;
1380 udp_conf.use_udp6_rx_checksums = 1380 udp_conf.use_udp6_rx_checksums =
1381 cfg->udp6_zero_rx_checksums; 1381 ! cfg->udp6_zero_rx_checksums;
1382 } else 1382 } else
1383#endif 1383#endif
1384 { 1384 {
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index ec22078b0914..42de4ccd159f 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -123,12 +123,11 @@ static int l2tp_ip_recv(struct sk_buff *skb)
123 struct l2tp_tunnel *tunnel = NULL; 123 struct l2tp_tunnel *tunnel = NULL;
124 int length; 124 int length;
125 125
126 /* Point to L2TP header */
127 optr = ptr = skb->data;
128
129 if (!pskb_may_pull(skb, 4)) 126 if (!pskb_may_pull(skb, 4))
130 goto discard; 127 goto discard;
131 128
129 /* Point to L2TP header */
130 optr = ptr = skb->data;
132 session_id = ntohl(*((__be32 *) ptr)); 131 session_id = ntohl(*((__be32 *) ptr));
133 ptr += 4; 132 ptr += 4;
134 133
@@ -156,6 +155,9 @@ static int l2tp_ip_recv(struct sk_buff *skb)
156 if (!pskb_may_pull(skb, length)) 155 if (!pskb_may_pull(skb, length))
157 goto discard; 156 goto discard;
158 157
158 /* Point to L2TP header */
159 optr = ptr = skb->data;
160 ptr += 4;
159 pr_debug("%s: ip recv\n", tunnel->name); 161 pr_debug("%s: ip recv\n", tunnel->name);
160 print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); 162 print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
161 } 163 }
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index a2c8747d2936..cd479903d943 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -25,6 +25,7 @@
25#include <net/udp.h> 25#include <net/udp.h>
26#include <net/inet_common.h> 26#include <net/inet_common.h>
27#include <net/inet_hashtables.h> 27#include <net/inet_hashtables.h>
28#include <net/inet6_hashtables.h>
28#include <net/tcp_states.h> 29#include <net/tcp_states.h>
29#include <net/protocol.h> 30#include <net/protocol.h>
30#include <net/xfrm.h> 31#include <net/xfrm.h>
@@ -135,12 +136,11 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
135 struct l2tp_tunnel *tunnel = NULL; 136 struct l2tp_tunnel *tunnel = NULL;
136 int length; 137 int length;
137 138
138 /* Point to L2TP header */
139 optr = ptr = skb->data;
140
141 if (!pskb_may_pull(skb, 4)) 139 if (!pskb_may_pull(skb, 4))
142 goto discard; 140 goto discard;
143 141
142 /* Point to L2TP header */
143 optr = ptr = skb->data;
144 session_id = ntohl(*((__be32 *) ptr)); 144 session_id = ntohl(*((__be32 *) ptr));
145 ptr += 4; 145 ptr += 4;
146 146
@@ -168,6 +168,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
168 if (!pskb_may_pull(skb, length)) 168 if (!pskb_may_pull(skb, length))
169 goto discard; 169 goto discard;
170 170
171 /* Point to L2TP header */
172 optr = ptr = skb->data;
173 ptr += 4;
171 pr_debug("%s: ip recv\n", tunnel->name); 174 pr_debug("%s: ip recv\n", tunnel->name);
172 print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); 175 print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
173 } 176 }
@@ -718,7 +721,7 @@ static struct proto l2tp_ip6_prot = {
718 .sendmsg = l2tp_ip6_sendmsg, 721 .sendmsg = l2tp_ip6_sendmsg,
719 .recvmsg = l2tp_ip6_recvmsg, 722 .recvmsg = l2tp_ip6_recvmsg,
720 .backlog_rcv = l2tp_ip6_backlog_recv, 723 .backlog_rcv = l2tp_ip6_backlog_recv,
721 .hash = inet_hash, 724 .hash = inet6_hash,
722 .unhash = inet_unhash, 725 .unhash = inet_unhash,
723 .obj_size = sizeof(struct l2tp_ip6_sock), 726 .obj_size = sizeof(struct l2tp_ip6_sock),
724#ifdef CONFIG_COMPAT 727#ifdef CONFIG_COMPAT
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 8e5ead366e7f..e925037fa0df 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -17,7 +17,7 @@
17 * @dev: targeted interface 17 * @dev: targeted interface
18 */ 18 */
19 19
20int l3mdev_master_ifindex_rcu(struct net_device *dev) 20int l3mdev_master_ifindex_rcu(const struct net_device *dev)
21{ 21{
22 int ifindex = 0; 22 int ifindex = 0;
23 23
@@ -28,8 +28,15 @@ int l3mdev_master_ifindex_rcu(struct net_device *dev)
28 ifindex = dev->ifindex; 28 ifindex = dev->ifindex;
29 } else if (netif_is_l3_slave(dev)) { 29 } else if (netif_is_l3_slave(dev)) {
30 struct net_device *master; 30 struct net_device *master;
31 struct net_device *_dev = (struct net_device *)dev;
31 32
32 master = netdev_master_upper_dev_get_rcu(dev); 33 /* netdev_master_upper_dev_get_rcu calls
34 * list_first_or_null_rcu to walk the upper dev list.
35 * list_first_or_null_rcu does not handle a const arg. We aren't
36 * making changes, just want the master device from that list so
37 * typecast to remove the const
38 */
39 master = netdev_master_upper_dev_get_rcu(_dev);
33 if (master) 40 if (master)
34 ifindex = master->ifindex; 41 ifindex = master->ifindex;
35 } 42 }
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 8dab4e569571..8ae3ed97d95c 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -38,7 +38,7 @@ static u16 llc_ui_sap_link_no_max[256];
38static struct sockaddr_llc llc_ui_addrnull; 38static struct sockaddr_llc llc_ui_addrnull;
39static const struct proto_ops llc_ui_ops; 39static const struct proto_ops llc_ui_ops;
40 40
41static int llc_ui_wait_for_conn(struct sock *sk, long timeout); 41static long llc_ui_wait_for_conn(struct sock *sk, long timeout);
42static int llc_ui_wait_for_disc(struct sock *sk, long timeout); 42static int llc_ui_wait_for_disc(struct sock *sk, long timeout);
43static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout); 43static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout);
44 44
@@ -551,7 +551,7 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
551 return rc; 551 return rc;
552} 552}
553 553
554static int llc_ui_wait_for_conn(struct sock *sk, long timeout) 554static long llc_ui_wait_for_conn(struct sock *sk, long timeout)
555{ 555{
556 DEFINE_WAIT(wait); 556 DEFINE_WAIT(wait);
557 557
@@ -626,6 +626,7 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb)
626 if (llc->cmsg_flags & LLC_CMSG_PKTINFO) { 626 if (llc->cmsg_flags & LLC_CMSG_PKTINFO) {
627 struct llc_pktinfo info; 627 struct llc_pktinfo info;
628 628
629 memset(&info, 0, sizeof(info));
629 info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex; 630 info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex;
630 llc_pdu_decode_dsap(skb, &info.lpi_sap); 631 llc_pdu_decode_dsap(skb, &info.lpi_sap);
631 llc_pdu_decode_da(skb, info.lpi_mac); 632 llc_pdu_decode_da(skb, info.lpi_mac);
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 367784be5df2..3a8f881b22f1 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -7,6 +7,7 @@
7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net> 8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
9 * Copyright 2007-2010, Intel Corporation 9 * Copyright 2007-2010, Intel Corporation
10 * Copyright(c) 2015 Intel Deutschland GmbH
10 * 11 *
11 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as 13 * it under the terms of the GNU General Public License version 2 as
@@ -61,16 +62,25 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
61{ 62{
62 struct ieee80211_local *local = sta->local; 63 struct ieee80211_local *local = sta->local;
63 struct tid_ampdu_rx *tid_rx; 64 struct tid_ampdu_rx *tid_rx;
65 struct ieee80211_ampdu_params params = {
66 .sta = &sta->sta,
67 .action = IEEE80211_AMPDU_RX_STOP,
68 .tid = tid,
69 .amsdu = false,
70 .timeout = 0,
71 .ssn = 0,
72 };
64 73
65 lockdep_assert_held(&sta->ampdu_mlme.mtx); 74 lockdep_assert_held(&sta->ampdu_mlme.mtx);
66 75
67 tid_rx = rcu_dereference_protected(sta->ampdu_mlme.tid_rx[tid], 76 tid_rx = rcu_dereference_protected(sta->ampdu_mlme.tid_rx[tid],
68 lockdep_is_held(&sta->ampdu_mlme.mtx)); 77 lockdep_is_held(&sta->ampdu_mlme.mtx));
69 78
70 if (!tid_rx) 79 if (!test_bit(tid, sta->ampdu_mlme.agg_session_valid))
71 return; 80 return;
72 81
73 RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], NULL); 82 RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], NULL);
83 __clear_bit(tid, sta->ampdu_mlme.agg_session_valid);
74 84
75 ht_dbg(sta->sdata, 85 ht_dbg(sta->sdata,
76 "Rx BA session stop requested for %pM tid %u %s reason: %d\n", 86 "Rx BA session stop requested for %pM tid %u %s reason: %d\n",
@@ -78,8 +88,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
78 initiator == WLAN_BACK_RECIPIENT ? "recipient" : "inititator", 88 initiator == WLAN_BACK_RECIPIENT ? "recipient" : "inititator",
79 (int)reason); 89 (int)reason);
80 90
81 if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP, 91 if (drv_ampdu_action(local, sta->sdata, &params))
82 &sta->sta, tid, NULL, 0, false))
83 sdata_info(sta->sdata, 92 sdata_info(sta->sdata,
84 "HW problem - can not stop rx aggregation for %pM tid %d\n", 93 "HW problem - can not stop rx aggregation for %pM tid %d\n",
85 sta->sta.addr, tid); 94 sta->sta.addr, tid);
@@ -89,6 +98,13 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
89 ieee80211_send_delba(sta->sdata, sta->sta.addr, 98 ieee80211_send_delba(sta->sdata, sta->sta.addr,
90 tid, WLAN_BACK_RECIPIENT, reason); 99 tid, WLAN_BACK_RECIPIENT, reason);
91 100
101 /*
102 * return here in case tid_rx is not assigned - which will happen if
103 * IEEE80211_HW_SUPPORTS_REORDERING_BUFFER is set.
104 */
105 if (!tid_rx)
106 return;
107
92 del_timer_sync(&tid_rx->session_timer); 108 del_timer_sync(&tid_rx->session_timer);
93 109
94 /* make sure ieee80211_sta_reorder_release() doesn't re-arm the timer */ 110 /* make sure ieee80211_sta_reorder_release() doesn't re-arm the timer */
@@ -237,6 +253,15 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
237{ 253{
238 struct ieee80211_local *local = sta->sdata->local; 254 struct ieee80211_local *local = sta->sdata->local;
239 struct tid_ampdu_rx *tid_agg_rx; 255 struct tid_ampdu_rx *tid_agg_rx;
256 struct ieee80211_ampdu_params params = {
257 .sta = &sta->sta,
258 .action = IEEE80211_AMPDU_RX_START,
259 .tid = tid,
260 .amsdu = false,
261 .timeout = timeout,
262 .ssn = start_seq_num,
263 };
264
240 int i, ret = -EOPNOTSUPP; 265 int i, ret = -EOPNOTSUPP;
241 u16 status = WLAN_STATUS_REQUEST_DECLINED; 266 u16 status = WLAN_STATUS_REQUEST_DECLINED;
242 267
@@ -275,11 +300,12 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
275 /* make sure the size doesn't exceed the maximum supported by the hw */ 300 /* make sure the size doesn't exceed the maximum supported by the hw */
276 if (buf_size > local->hw.max_rx_aggregation_subframes) 301 if (buf_size > local->hw.max_rx_aggregation_subframes)
277 buf_size = local->hw.max_rx_aggregation_subframes; 302 buf_size = local->hw.max_rx_aggregation_subframes;
303 params.buf_size = buf_size;
278 304
279 /* examine state machine */ 305 /* examine state machine */
280 mutex_lock(&sta->ampdu_mlme.mtx); 306 mutex_lock(&sta->ampdu_mlme.mtx);
281 307
282 if (sta->ampdu_mlme.tid_rx[tid]) { 308 if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) {
283 ht_dbg_ratelimited(sta->sdata, 309 ht_dbg_ratelimited(sta->sdata,
284 "unexpected AddBA Req from %pM on tid %u\n", 310 "unexpected AddBA Req from %pM on tid %u\n",
285 sta->sta.addr, tid); 311 sta->sta.addr, tid);
@@ -290,6 +316,16 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
290 false); 316 false);
291 } 317 }
292 318
319 if (ieee80211_hw_check(&local->hw, SUPPORTS_REORDERING_BUFFER)) {
320 ret = drv_ampdu_action(local, sta->sdata, &params);
321 ht_dbg(sta->sdata,
322 "Rx A-MPDU request on %pM tid %d result %d\n",
323 sta->sta.addr, tid, ret);
324 if (!ret)
325 status = WLAN_STATUS_SUCCESS;
326 goto end;
327 }
328
293 /* prepare A-MPDU MLME for Rx aggregation */ 329 /* prepare A-MPDU MLME for Rx aggregation */
294 tid_agg_rx = kzalloc(sizeof(*tid_agg_rx), GFP_KERNEL); 330 tid_agg_rx = kzalloc(sizeof(*tid_agg_rx), GFP_KERNEL);
295 if (!tid_agg_rx) 331 if (!tid_agg_rx)
@@ -322,8 +358,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
322 for (i = 0; i < buf_size; i++) 358 for (i = 0; i < buf_size; i++)
323 __skb_queue_head_init(&tid_agg_rx->reorder_buf[i]); 359 __skb_queue_head_init(&tid_agg_rx->reorder_buf[i]);
324 360
325 ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START, 361 ret = drv_ampdu_action(local, sta->sdata, &params);
326 &sta->sta, tid, &start_seq_num, 0, false);
327 ht_dbg(sta->sdata, "Rx A-MPDU request on %pM tid %d result %d\n", 362 ht_dbg(sta->sdata, "Rx A-MPDU request on %pM tid %d result %d\n",
328 sta->sta.addr, tid, ret); 363 sta->sta.addr, tid, ret);
329 if (ret) { 364 if (ret) {
@@ -341,6 +376,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
341 tid_agg_rx->timeout = timeout; 376 tid_agg_rx->timeout = timeout;
342 tid_agg_rx->stored_mpdu_num = 0; 377 tid_agg_rx->stored_mpdu_num = 0;
343 tid_agg_rx->auto_seq = auto_seq; 378 tid_agg_rx->auto_seq = auto_seq;
379 tid_agg_rx->reorder_buf_filtered = 0;
344 status = WLAN_STATUS_SUCCESS; 380 status = WLAN_STATUS_SUCCESS;
345 381
346 /* activate it for RX */ 382 /* activate it for RX */
@@ -352,6 +388,8 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
352 } 388 }
353 389
354end: 390end:
391 if (status == WLAN_STATUS_SUCCESS)
392 __set_bit(tid, sta->ampdu_mlme.agg_session_valid);
355 mutex_unlock(&sta->ampdu_mlme.mtx); 393 mutex_unlock(&sta->ampdu_mlme.mtx);
356 394
357end_no_lock: 395end_no_lock:
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index ff757181b0a8..4932e9f243a2 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -7,6 +7,7 @@
7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net> 8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
9 * Copyright 2007-2010, Intel Corporation 9 * Copyright 2007-2010, Intel Corporation
10 * Copyright(c) 2015 Intel Deutschland GmbH
10 * 11 *
11 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as 13 * it under the terms of the GNU General Public License version 2 as
@@ -295,7 +296,14 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
295{ 296{
296 struct ieee80211_local *local = sta->local; 297 struct ieee80211_local *local = sta->local;
297 struct tid_ampdu_tx *tid_tx; 298 struct tid_ampdu_tx *tid_tx;
298 enum ieee80211_ampdu_mlme_action action; 299 struct ieee80211_ampdu_params params = {
300 .sta = &sta->sta,
301 .tid = tid,
302 .buf_size = 0,
303 .amsdu = false,
304 .timeout = 0,
305 .ssn = 0,
306 };
299 int ret; 307 int ret;
300 308
301 lockdep_assert_held(&sta->ampdu_mlme.mtx); 309 lockdep_assert_held(&sta->ampdu_mlme.mtx);
@@ -304,10 +312,10 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
304 case AGG_STOP_DECLINED: 312 case AGG_STOP_DECLINED:
305 case AGG_STOP_LOCAL_REQUEST: 313 case AGG_STOP_LOCAL_REQUEST:
306 case AGG_STOP_PEER_REQUEST: 314 case AGG_STOP_PEER_REQUEST:
307 action = IEEE80211_AMPDU_TX_STOP_CONT; 315 params.action = IEEE80211_AMPDU_TX_STOP_CONT;
308 break; 316 break;
309 case AGG_STOP_DESTROY_STA: 317 case AGG_STOP_DESTROY_STA:
310 action = IEEE80211_AMPDU_TX_STOP_FLUSH; 318 params.action = IEEE80211_AMPDU_TX_STOP_FLUSH;
311 break; 319 break;
312 default: 320 default:
313 WARN_ON_ONCE(1); 321 WARN_ON_ONCE(1);
@@ -330,9 +338,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
330 spin_unlock_bh(&sta->lock); 338 spin_unlock_bh(&sta->lock);
331 if (reason != AGG_STOP_DESTROY_STA) 339 if (reason != AGG_STOP_DESTROY_STA)
332 return -EALREADY; 340 return -EALREADY;
333 ret = drv_ampdu_action(local, sta->sdata, 341 params.action = IEEE80211_AMPDU_TX_STOP_FLUSH_CONT;
334 IEEE80211_AMPDU_TX_STOP_FLUSH_CONT, 342 ret = drv_ampdu_action(local, sta->sdata, &params);
335 &sta->sta, tid, NULL, 0, false);
336 WARN_ON_ONCE(ret); 343 WARN_ON_ONCE(ret);
337 return 0; 344 return 0;
338 } 345 }
@@ -381,8 +388,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
381 WLAN_BACK_INITIATOR; 388 WLAN_BACK_INITIATOR;
382 tid_tx->tx_stop = reason == AGG_STOP_LOCAL_REQUEST; 389 tid_tx->tx_stop = reason == AGG_STOP_LOCAL_REQUEST;
383 390
384 ret = drv_ampdu_action(local, sta->sdata, action, 391 ret = drv_ampdu_action(local, sta->sdata, &params);
385 &sta->sta, tid, NULL, 0, false);
386 392
387 /* HW shall not deny going back to legacy */ 393 /* HW shall not deny going back to legacy */
388 if (WARN_ON(ret)) { 394 if (WARN_ON(ret)) {
@@ -445,7 +451,14 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
445 struct tid_ampdu_tx *tid_tx; 451 struct tid_ampdu_tx *tid_tx;
446 struct ieee80211_local *local = sta->local; 452 struct ieee80211_local *local = sta->local;
447 struct ieee80211_sub_if_data *sdata = sta->sdata; 453 struct ieee80211_sub_if_data *sdata = sta->sdata;
448 u16 start_seq_num; 454 struct ieee80211_ampdu_params params = {
455 .sta = &sta->sta,
456 .action = IEEE80211_AMPDU_TX_START,
457 .tid = tid,
458 .buf_size = 0,
459 .amsdu = false,
460 .timeout = 0,
461 };
449 int ret; 462 int ret;
450 463
451 tid_tx = rcu_dereference_protected_tid_tx(sta, tid); 464 tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
@@ -467,10 +480,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
467 */ 480 */
468 synchronize_net(); 481 synchronize_net();
469 482
470 start_seq_num = sta->tid_seq[tid] >> 4; 483 params.ssn = sta->tid_seq[tid] >> 4;
471 484 ret = drv_ampdu_action(local, sdata, &params);
472 ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START,
473 &sta->sta, tid, &start_seq_num, 0, false);
474 if (ret) { 485 if (ret) {
475 ht_dbg(sdata, 486 ht_dbg(sdata,
476 "BA request denied - HW unavailable for %pM tid %d\n", 487 "BA request denied - HW unavailable for %pM tid %d\n",
@@ -499,7 +510,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
499 510
500 /* send AddBA request */ 511 /* send AddBA request */
501 ieee80211_send_addba_request(sdata, sta->sta.addr, tid, 512 ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
502 tid_tx->dialog_token, start_seq_num, 513 tid_tx->dialog_token, params.ssn,
503 IEEE80211_MAX_AMPDU_BUF, 514 IEEE80211_MAX_AMPDU_BUF,
504 tid_tx->timeout); 515 tid_tx->timeout);
505} 516}
@@ -684,18 +695,24 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
684 struct sta_info *sta, u16 tid) 695 struct sta_info *sta, u16 tid)
685{ 696{
686 struct tid_ampdu_tx *tid_tx; 697 struct tid_ampdu_tx *tid_tx;
698 struct ieee80211_ampdu_params params = {
699 .sta = &sta->sta,
700 .action = IEEE80211_AMPDU_TX_OPERATIONAL,
701 .tid = tid,
702 .timeout = 0,
703 .ssn = 0,
704 };
687 705
688 lockdep_assert_held(&sta->ampdu_mlme.mtx); 706 lockdep_assert_held(&sta->ampdu_mlme.mtx);
689 707
690 tid_tx = rcu_dereference_protected_tid_tx(sta, tid); 708 tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
709 params.buf_size = tid_tx->buf_size;
710 params.amsdu = tid_tx->amsdu;
691 711
692 ht_dbg(sta->sdata, "Aggregation is on for %pM tid %d\n", 712 ht_dbg(sta->sdata, "Aggregation is on for %pM tid %d\n",
693 sta->sta.addr, tid); 713 sta->sta.addr, tid);
694 714
695 drv_ampdu_action(local, sta->sdata, 715 drv_ampdu_action(local, sta->sdata, &params);
696 IEEE80211_AMPDU_TX_OPERATIONAL,
697 &sta->sta, tid, NULL, tid_tx->buf_size,
698 tid_tx->amsdu);
699 716
700 /* 717 /*
701 * synchronize with TX path, while splicing the TX path 718 * synchronize with TX path, while splicing the TX path
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 166a29fe6c35..fe1704c4e8fb 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -339,8 +339,9 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
339 339
340 switch (key->conf.cipher) { 340 switch (key->conf.cipher) {
341 case WLAN_CIPHER_SUITE_TKIP: 341 case WLAN_CIPHER_SUITE_TKIP:
342 iv32 = key->u.tkip.tx.iv32; 342 pn64 = atomic64_read(&key->conf.tx_pn);
343 iv16 = key->u.tkip.tx.iv16; 343 iv32 = TKIP_PN_TO_IV32(pn64);
344 iv16 = TKIP_PN_TO_IV16(pn64);
344 345
345 if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && 346 if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
346 !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { 347 !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
@@ -1131,6 +1132,34 @@ static int sta_apply_parameters(struct ieee80211_local *local,
1131 sta->sta.max_sp = params->max_sp; 1132 sta->sta.max_sp = params->max_sp;
1132 } 1133 }
1133 1134
1135 /* The sender might not have sent the last bit, consider it to be 0 */
1136 if (params->ext_capab_len >= 8) {
1137 u8 val = (params->ext_capab[7] &
1138 WLAN_EXT_CAPA8_MAX_MSDU_IN_AMSDU_LSB) >> 7;
1139
1140 /* we did get all the bits, take the MSB as well */
1141 if (params->ext_capab_len >= 9) {
1142 u8 val_msb = params->ext_capab[8] &
1143 WLAN_EXT_CAPA9_MAX_MSDU_IN_AMSDU_MSB;
1144 val_msb <<= 1;
1145 val |= val_msb;
1146 }
1147
1148 switch (val) {
1149 case 1:
1150 sta->sta.max_amsdu_subframes = 32;
1151 break;
1152 case 2:
1153 sta->sta.max_amsdu_subframes = 16;
1154 break;
1155 case 3:
1156 sta->sta.max_amsdu_subframes = 8;
1157 break;
1158 default:
1159 sta->sta.max_amsdu_subframes = 0;
1160 }
1161 }
1162
1134 /* 1163 /*
1135 * cfg80211 validates this (1-2007) and allows setting the AID 1164 * cfg80211 validates this (1-2007) and allows setting the AID
1136 * only when creating a new station entry 1165 * only when creating a new station entry
@@ -1160,6 +1189,7 @@ static int sta_apply_parameters(struct ieee80211_local *local,
1160 ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, 1189 ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
1161 params->ht_capa, sta); 1190 params->ht_capa, sta);
1162 1191
1192 /* VHT can override some HT caps such as the A-MSDU max length */
1163 if (params->vht_capa) 1193 if (params->vht_capa)
1164 ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, 1194 ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
1165 params->vht_capa, sta); 1195 params->vht_capa, sta);
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 1d1b9b7bdefe..74142d07ad31 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -231,7 +231,7 @@ ieee80211_get_max_required_bw(struct ieee80211_sub_if_data *sdata)
231 !(sta->sdata->bss && sta->sdata->bss == sdata->bss)) 231 !(sta->sdata->bss && sta->sdata->bss == sdata->bss))
232 continue; 232 continue;
233 233
234 if (!sta->uploaded) 234 if (!sta->uploaded || !test_sta_flag(sta, WLAN_STA_ASSOC))
235 continue; 235 continue;
236 236
237 max_bw = max(max_bw, ieee80211_get_sta_bw(&sta->sta)); 237 max_bw = max(max_bw, ieee80211_get_sta_bw(&sta->sta));
@@ -343,8 +343,10 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local,
343 struct ieee80211_chanctx *ctx, 343 struct ieee80211_chanctx *ctx,
344 const struct cfg80211_chan_def *chandef) 344 const struct cfg80211_chan_def *chandef)
345{ 345{
346 if (cfg80211_chandef_identical(&ctx->conf.def, chandef)) 346 if (cfg80211_chandef_identical(&ctx->conf.def, chandef)) {
347 ieee80211_recalc_chanctx_min_def(local, ctx);
347 return; 348 return;
349 }
348 350
349 WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef)); 351 WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef));
350 352
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 3e24d0ddb51b..4ab5c522ceee 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -126,6 +126,7 @@ static const char *hw_flag_names[] = {
126 FLAG(SUPPORTS_AMSDU_IN_AMPDU), 126 FLAG(SUPPORTS_AMSDU_IN_AMPDU),
127 FLAG(BEACON_TX_STATUS), 127 FLAG(BEACON_TX_STATUS),
128 FLAG(NEEDS_UNIQUE_STA_ADDR), 128 FLAG(NEEDS_UNIQUE_STA_ADDR),
129 FLAG(SUPPORTS_REORDERING_BUFFER),
129#undef FLAG 130#undef FLAG
130}; 131};
131 132
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 7961e7d0b61e..a2ef95f16f11 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -132,9 +132,10 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
132 len = scnprintf(buf, sizeof(buf), "\n"); 132 len = scnprintf(buf, sizeof(buf), "\n");
133 break; 133 break;
134 case WLAN_CIPHER_SUITE_TKIP: 134 case WLAN_CIPHER_SUITE_TKIP:
135 pn = atomic64_read(&key->conf.tx_pn);
135 len = scnprintf(buf, sizeof(buf), "%08x %04x\n", 136 len = scnprintf(buf, sizeof(buf), "%08x %04x\n",
136 key->u.tkip.tx.iv32, 137 TKIP_PN_TO_IV32(pn),
137 key->u.tkip.tx.iv16); 138 TKIP_PN_TO_IV16(pn));
138 break; 139 break;
139 case WLAN_CIPHER_SUITE_CCMP: 140 case WLAN_CIPHER_SUITE_CCMP:
140 case WLAN_CIPHER_SUITE_CCMP_256: 141 case WLAN_CIPHER_SUITE_CCMP_256:
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index ca1fe5576103..c258f1041d33 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -284,9 +284,7 @@ int drv_switch_vif_chanctx(struct ieee80211_local *local,
284 284
285int drv_ampdu_action(struct ieee80211_local *local, 285int drv_ampdu_action(struct ieee80211_local *local,
286 struct ieee80211_sub_if_data *sdata, 286 struct ieee80211_sub_if_data *sdata,
287 enum ieee80211_ampdu_mlme_action action, 287 struct ieee80211_ampdu_params *params)
288 struct ieee80211_sta *sta, u16 tid,
289 u16 *ssn, u8 buf_size, bool amsdu)
290{ 288{
291 int ret = -EOPNOTSUPP; 289 int ret = -EOPNOTSUPP;
292 290
@@ -296,12 +294,10 @@ int drv_ampdu_action(struct ieee80211_local *local,
296 if (!check_sdata_in_driver(sdata)) 294 if (!check_sdata_in_driver(sdata))
297 return -EIO; 295 return -EIO;
298 296
299 trace_drv_ampdu_action(local, sdata, action, sta, tid, 297 trace_drv_ampdu_action(local, sdata, params);
300 ssn, buf_size, amsdu);
301 298
302 if (local->ops->ampdu_action) 299 if (local->ops->ampdu_action)
303 ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action, 300 ret = local->ops->ampdu_action(&local->hw, &sdata->vif, params);
304 sta, tid, ssn, buf_size, amsdu);
305 301
306 trace_drv_return_int(local, ret); 302 trace_drv_return_int(local, ret);
307 303
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 154ce4b13406..18b0d65baff0 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -585,9 +585,7 @@ static inline int drv_tx_last_beacon(struct ieee80211_local *local)
585 585
586int drv_ampdu_action(struct ieee80211_local *local, 586int drv_ampdu_action(struct ieee80211_local *local,
587 struct ieee80211_sub_if_data *sdata, 587 struct ieee80211_sub_if_data *sdata,
588 enum ieee80211_ampdu_mlme_action action, 588 struct ieee80211_ampdu_params *params);
589 struct ieee80211_sta *sta, u16 tid,
590 u16 *ssn, u8 buf_size, bool amsdu);
591 589
592static inline int drv_get_survey(struct ieee80211_local *local, int idx, 590static inline int drv_get_survey(struct ieee80211_local *local, int idx,
593 struct survey_info *survey) 591 struct survey_info *survey)
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 7a76ce639d58..f4a528773563 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -230,6 +230,11 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
230 /* set Rx highest rate */ 230 /* set Rx highest rate */
231 ht_cap.mcs.rx_highest = ht_cap_ie->mcs.rx_highest; 231 ht_cap.mcs.rx_highest = ht_cap_ie->mcs.rx_highest;
232 232
233 if (ht_cap.cap & IEEE80211_HT_CAP_MAX_AMSDU)
234 sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_7935;
235 else
236 sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_3839;
237
233 apply: 238 apply:
234 changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap)); 239 changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap));
235 240
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 978d3bc31df7..fc3238376b39 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -7,6 +7,7 @@
7 * Copyright 2007, Michael Wu <flamingice@sourmilk.net> 7 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
8 * Copyright 2009, Johannes Berg <johannes@sipsolutions.net> 8 * Copyright 2009, Johannes Berg <johannes@sipsolutions.net>
9 * Copyright 2013-2014 Intel Mobile Communications GmbH 9 * Copyright 2013-2014 Intel Mobile Communications GmbH
10 * Copyright(c) 2016 Intel Deutschland GmbH
10 * 11 *
11 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as 13 * it under the terms of the GNU General Public License version 2 as
@@ -1050,9 +1051,8 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
1050 struct cfg80211_chan_def chandef; 1051 struct cfg80211_chan_def chandef;
1051 enum ieee80211_sta_rx_bandwidth bw = sta->sta.bandwidth; 1052 enum ieee80211_sta_rx_bandwidth bw = sta->sta.bandwidth;
1052 1053
1053 ieee80211_ht_oper_to_chandef(channel, 1054 cfg80211_chandef_create(&chandef, channel, NL80211_CHAN_NO_HT);
1054 elems->ht_operation, 1055 ieee80211_chandef_ht_oper(elems->ht_operation, &chandef);
1055 &chandef);
1056 1056
1057 memcpy(&htcap_ie, elems->ht_cap_elem, sizeof(htcap_ie)); 1057 memcpy(&htcap_ie, elems->ht_cap_elem, sizeof(htcap_ie));
1058 rates_updated |= ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, 1058 rates_updated |= ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
@@ -1066,9 +1066,8 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
1066 struct ieee80211_vht_cap cap_ie; 1066 struct ieee80211_vht_cap cap_ie;
1067 struct ieee80211_sta_vht_cap cap = sta->sta.vht_cap; 1067 struct ieee80211_sta_vht_cap cap = sta->sta.vht_cap;
1068 1068
1069 ieee80211_vht_oper_to_chandef(channel, 1069 ieee80211_chandef_vht_oper(elems->vht_operation,
1070 elems->vht_operation, 1070 &chandef);
1071 &chandef);
1072 memcpy(&cap_ie, elems->vht_cap_elem, sizeof(cap_ie)); 1071 memcpy(&cap_ie, elems->vht_cap_elem, sizeof(cap_ie));
1073 ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, 1072 ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
1074 &cap_ie, sta); 1073 &cap_ie, sta);
@@ -1485,14 +1484,21 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
1485 1484
1486 sdata_info(sdata, "Trigger new scan to find an IBSS to join\n"); 1485 sdata_info(sdata, "Trigger new scan to find an IBSS to join\n");
1487 1486
1488 num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy,
1489 &ifibss->chandef,
1490 channels,
1491 ARRAY_SIZE(channels));
1492 scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef); 1487 scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef);
1493 ieee80211_request_ibss_scan(sdata, ifibss->ssid, 1488
1494 ifibss->ssid_len, channels, num, 1489 if (ifibss->fixed_channel) {
1495 scan_width); 1490 num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy,
1491 &ifibss->chandef,
1492 channels,
1493 ARRAY_SIZE(channels));
1494 ieee80211_request_ibss_scan(sdata, ifibss->ssid,
1495 ifibss->ssid_len, channels,
1496 num, scan_width);
1497 } else {
1498 ieee80211_request_ibss_scan(sdata, ifibss->ssid,
1499 ifibss->ssid_len, NULL,
1500 0, scan_width);
1501 }
1496 } else { 1502 } else {
1497 int interval = IEEE80211_SCAN_INTERVAL; 1503 int interval = IEEE80211_SCAN_INTERVAL;
1498 1504
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index f006f4a44c0e..422003540169 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -716,7 +716,6 @@ struct ieee80211_if_mesh {
716 * back to wireless media and to the local net stack. 716 * back to wireless media and to the local net stack.
717 * @IEEE80211_SDATA_DISCONNECT_RESUME: Disconnect after resume. 717 * @IEEE80211_SDATA_DISCONNECT_RESUME: Disconnect after resume.
718 * @IEEE80211_SDATA_IN_DRIVER: indicates interface was added to driver 718 * @IEEE80211_SDATA_IN_DRIVER: indicates interface was added to driver
719 * @IEEE80211_SDATA_MU_MIMO_OWNER: indicates interface owns MU-MIMO capability
720 */ 719 */
721enum ieee80211_sub_if_data_flags { 720enum ieee80211_sub_if_data_flags {
722 IEEE80211_SDATA_ALLMULTI = BIT(0), 721 IEEE80211_SDATA_ALLMULTI = BIT(0),
@@ -724,7 +723,6 @@ enum ieee80211_sub_if_data_flags {
724 IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3), 723 IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3),
725 IEEE80211_SDATA_DISCONNECT_RESUME = BIT(4), 724 IEEE80211_SDATA_DISCONNECT_RESUME = BIT(4),
726 IEEE80211_SDATA_IN_DRIVER = BIT(5), 725 IEEE80211_SDATA_IN_DRIVER = BIT(5),
727 IEEE80211_SDATA_MU_MIMO_OWNER = BIT(6),
728}; 726};
729 727
730/** 728/**
@@ -804,6 +802,7 @@ enum txq_info_flags {
804struct txq_info { 802struct txq_info {
805 struct sk_buff_head queue; 803 struct sk_buff_head queue;
806 unsigned long flags; 804 unsigned long flags;
805 unsigned long byte_cnt;
807 806
808 /* keep last! */ 807 /* keep last! */
809 struct ieee80211_txq txq; 808 struct ieee80211_txq txq;
@@ -1466,7 +1465,13 @@ ieee80211_have_rx_timestamp(struct ieee80211_rx_status *status)
1466{ 1465{
1467 WARN_ON_ONCE(status->flag & RX_FLAG_MACTIME_START && 1466 WARN_ON_ONCE(status->flag & RX_FLAG_MACTIME_START &&
1468 status->flag & RX_FLAG_MACTIME_END); 1467 status->flag & RX_FLAG_MACTIME_END);
1469 return status->flag & (RX_FLAG_MACTIME_START | RX_FLAG_MACTIME_END); 1468 if (status->flag & (RX_FLAG_MACTIME_START | RX_FLAG_MACTIME_END))
1469 return true;
1470 /* can't handle HT/VHT preamble yet */
1471 if (status->flag & RX_FLAG_MACTIME_PLCP_START &&
1472 !(status->flag & (RX_FLAG_HT | RX_FLAG_VHT)))
1473 return true;
1474 return false;
1470} 1475}
1471 1476
1472u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, 1477u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
@@ -1714,6 +1719,12 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
1714enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta); 1719enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta);
1715enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta); 1720enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta);
1716void ieee80211_sta_set_rx_nss(struct sta_info *sta); 1721void ieee80211_sta_set_rx_nss(struct sta_info *sta);
1722enum ieee80211_sta_rx_bandwidth
1723ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width);
1724enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta);
1725void ieee80211_sta_set_rx_nss(struct sta_info *sta);
1726void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata,
1727 struct ieee80211_mgmt *mgmt);
1717u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, 1728u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
1718 struct sta_info *sta, u8 opmode, 1729 struct sta_info *sta, u8 opmode,
1719 enum ieee80211_band band); 1730 enum ieee80211_band band);
@@ -1829,20 +1840,6 @@ static inline void ieee802_11_parse_elems(const u8 *start, size_t len,
1829 ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0); 1840 ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0);
1830} 1841}
1831 1842
1832static inline bool ieee80211_rx_reorder_ready(struct sk_buff_head *frames)
1833{
1834 struct sk_buff *tail = skb_peek_tail(frames);
1835 struct ieee80211_rx_status *status;
1836
1837 if (!tail)
1838 return false;
1839
1840 status = IEEE80211_SKB_RXCB(tail);
1841 if (status->flag & RX_FLAG_AMSDU_MORE)
1842 return false;
1843
1844 return true;
1845}
1846 1843
1847extern const int ieee802_1d_to_ac[8]; 1844extern const int ieee802_1d_to_ac[8];
1848 1845
@@ -1986,12 +1983,10 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
1986u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo); 1983u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo);
1987 1984
1988/* channel management */ 1985/* channel management */
1989void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan, 1986bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper,
1990 const struct ieee80211_ht_operation *ht_oper, 1987 struct cfg80211_chan_def *chandef);
1991 struct cfg80211_chan_def *chandef); 1988bool ieee80211_chandef_vht_oper(const struct ieee80211_vht_operation *oper,
1992void ieee80211_vht_oper_to_chandef(struct ieee80211_channel *control_chan, 1989 struct cfg80211_chan_def *chandef);
1993 const struct ieee80211_vht_operation *oper,
1994 struct cfg80211_chan_def *chandef);
1995u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c); 1990u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c);
1996 1991
1997int __must_check 1992int __must_check
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index c9e325d2e120..e1cb22c16530 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -977,7 +977,11 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
977 if (sdata->vif.txq) { 977 if (sdata->vif.txq) {
978 struct txq_info *txqi = to_txq_info(sdata->vif.txq); 978 struct txq_info *txqi = to_txq_info(sdata->vif.txq);
979 979
980 spin_lock_bh(&txqi->queue.lock);
980 ieee80211_purge_tx_queue(&local->hw, &txqi->queue); 981 ieee80211_purge_tx_queue(&local->hw, &txqi->queue);
982 txqi->byte_cnt = 0;
983 spin_unlock_bh(&txqi->queue.lock);
984
981 atomic_set(&sdata->txqs_len[txqi->txq.ac], 0); 985 atomic_set(&sdata->txqs_len[txqi->txq.ac], 0);
982 } 986 }
983 987
@@ -1271,6 +1275,16 @@ static void ieee80211_iface_work(struct work_struct *work)
1271 } 1275 }
1272 } 1276 }
1273 mutex_unlock(&local->sta_mtx); 1277 mutex_unlock(&local->sta_mtx);
1278 } else if (ieee80211_is_action(mgmt->frame_control) &&
1279 mgmt->u.action.category == WLAN_CATEGORY_VHT) {
1280 switch (mgmt->u.action.u.vht_group_notif.action_code) {
1281 case WLAN_VHT_ACTION_GROUPID_MGMT:
1282 ieee80211_process_mu_groups(sdata, mgmt);
1283 break;
1284 default:
1285 WARN_ON(1);
1286 break;
1287 }
1274 } else if (ieee80211_is_data_qos(mgmt->frame_control)) { 1288 } else if (ieee80211_is_data_qos(mgmt->frame_control)) {
1275 struct ieee80211_hdr *hdr = (void *)mgmt; 1289 struct ieee80211_hdr *hdr = (void *)mgmt;
1276 /* 1290 /*
@@ -1747,7 +1761,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
1747 1761
1748 ret = dev_alloc_name(ndev, ndev->name); 1762 ret = dev_alloc_name(ndev, ndev->name);
1749 if (ret < 0) { 1763 if (ret < 0) {
1750 free_netdev(ndev); 1764 ieee80211_if_free(ndev);
1751 return ret; 1765 return ret;
1752 } 1766 }
1753 1767
@@ -1833,7 +1847,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
1833 1847
1834 ret = register_netdevice(ndev); 1848 ret = register_netdevice(ndev);
1835 if (ret) { 1849 if (ret) {
1836 free_netdev(ndev); 1850 ieee80211_if_free(ndev);
1837 return ret; 1851 return ret;
1838 } 1852 }
1839 } 1853 }
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 5e5bc599da4c..3df7b0392d30 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -932,50 +932,6 @@ void ieee80211_gtk_rekey_notify(struct ieee80211_vif *vif, const u8 *bssid,
932} 932}
933EXPORT_SYMBOL_GPL(ieee80211_gtk_rekey_notify); 933EXPORT_SYMBOL_GPL(ieee80211_gtk_rekey_notify);
934 934
935void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf,
936 struct ieee80211_key_seq *seq)
937{
938 struct ieee80211_key *key;
939 u64 pn64;
940
941 if (WARN_ON(!(keyconf->flags & IEEE80211_KEY_FLAG_GENERATE_IV)))
942 return;
943
944 key = container_of(keyconf, struct ieee80211_key, conf);
945
946 switch (key->conf.cipher) {
947 case WLAN_CIPHER_SUITE_TKIP:
948 seq->tkip.iv32 = key->u.tkip.tx.iv32;
949 seq->tkip.iv16 = key->u.tkip.tx.iv16;
950 break;
951 case WLAN_CIPHER_SUITE_CCMP:
952 case WLAN_CIPHER_SUITE_CCMP_256:
953 case WLAN_CIPHER_SUITE_AES_CMAC:
954 case WLAN_CIPHER_SUITE_BIP_CMAC_256:
955 BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
956 offsetof(typeof(*seq), aes_cmac));
957 case WLAN_CIPHER_SUITE_BIP_GMAC_128:
958 case WLAN_CIPHER_SUITE_BIP_GMAC_256:
959 BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
960 offsetof(typeof(*seq), aes_gmac));
961 case WLAN_CIPHER_SUITE_GCMP:
962 case WLAN_CIPHER_SUITE_GCMP_256:
963 BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
964 offsetof(typeof(*seq), gcmp));
965 pn64 = atomic64_read(&key->conf.tx_pn);
966 seq->ccmp.pn[5] = pn64;
967 seq->ccmp.pn[4] = pn64 >> 8;
968 seq->ccmp.pn[3] = pn64 >> 16;
969 seq->ccmp.pn[2] = pn64 >> 24;
970 seq->ccmp.pn[1] = pn64 >> 32;
971 seq->ccmp.pn[0] = pn64 >> 40;
972 break;
973 default:
974 WARN_ON(1);
975 }
976}
977EXPORT_SYMBOL(ieee80211_get_key_tx_seq);
978
979void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf, 935void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf,
980 int tid, struct ieee80211_key_seq *seq) 936 int tid, struct ieee80211_key_seq *seq)
981{ 937{
@@ -1029,48 +985,6 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf,
1029} 985}
1030EXPORT_SYMBOL(ieee80211_get_key_rx_seq); 986EXPORT_SYMBOL(ieee80211_get_key_rx_seq);
1031 987
1032void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf,
1033 struct ieee80211_key_seq *seq)
1034{
1035 struct ieee80211_key *key;
1036 u64 pn64;
1037
1038 key = container_of(keyconf, struct ieee80211_key, conf);
1039
1040 switch (key->conf.cipher) {
1041 case WLAN_CIPHER_SUITE_TKIP:
1042 key->u.tkip.tx.iv32 = seq->tkip.iv32;
1043 key->u.tkip.tx.iv16 = seq->tkip.iv16;
1044 break;
1045 case WLAN_CIPHER_SUITE_CCMP:
1046 case WLAN_CIPHER_SUITE_CCMP_256:
1047 case WLAN_CIPHER_SUITE_AES_CMAC:
1048 case WLAN_CIPHER_SUITE_BIP_CMAC_256:
1049 BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
1050 offsetof(typeof(*seq), aes_cmac));
1051 case WLAN_CIPHER_SUITE_BIP_GMAC_128:
1052 case WLAN_CIPHER_SUITE_BIP_GMAC_256:
1053 BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
1054 offsetof(typeof(*seq), aes_gmac));
1055 case WLAN_CIPHER_SUITE_GCMP:
1056 case WLAN_CIPHER_SUITE_GCMP_256:
1057 BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
1058 offsetof(typeof(*seq), gcmp));
1059 pn64 = (u64)seq->ccmp.pn[5] |
1060 ((u64)seq->ccmp.pn[4] << 8) |
1061 ((u64)seq->ccmp.pn[3] << 16) |
1062 ((u64)seq->ccmp.pn[2] << 24) |
1063 ((u64)seq->ccmp.pn[1] << 32) |
1064 ((u64)seq->ccmp.pn[0] << 40);
1065 atomic64_set(&key->conf.tx_pn, pn64);
1066 break;
1067 default:
1068 WARN_ON(1);
1069 break;
1070 }
1071}
1072EXPORT_SYMBOL_GPL(ieee80211_set_key_tx_seq);
1073
1074void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf, 988void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf,
1075 int tid, struct ieee80211_key_seq *seq) 989 int tid, struct ieee80211_key_seq *seq)
1076{ 990{
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 9951ef06323e..4aa20cef0859 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -44,13 +44,17 @@ enum ieee80211_internal_tkip_state {
44}; 44};
45 45
46struct tkip_ctx { 46struct tkip_ctx {
47 u32 iv32; /* current iv32 */
48 u16 iv16; /* current iv16 */
49 u16 p1k[5]; /* p1k cache */ 47 u16 p1k[5]; /* p1k cache */
50 u32 p1k_iv32; /* iv32 for which p1k computed */ 48 u32 p1k_iv32; /* iv32 for which p1k computed */
51 enum ieee80211_internal_tkip_state state; 49 enum ieee80211_internal_tkip_state state;
52}; 50};
53 51
52struct tkip_ctx_rx {
53 struct tkip_ctx ctx;
54 u32 iv32; /* current iv32 */
55 u16 iv16; /* current iv16 */
56};
57
54struct ieee80211_key { 58struct ieee80211_key {
55 struct ieee80211_local *local; 59 struct ieee80211_local *local;
56 struct ieee80211_sub_if_data *sdata; 60 struct ieee80211_sub_if_data *sdata;
@@ -71,7 +75,7 @@ struct ieee80211_key {
71 struct tkip_ctx tx; 75 struct tkip_ctx tx;
72 76
73 /* last received RSC */ 77 /* last received RSC */
74 struct tkip_ctx rx[IEEE80211_NUM_TIDS]; 78 struct tkip_ctx_rx rx[IEEE80211_NUM_TIDS];
75 79
76 /* number of mic failures */ 80 /* number of mic failures */
77 u32 mic_failures; 81 u32 mic_failures;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 6f85b6ab8e51..d32cefcb63b0 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -91,11 +91,10 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
91 if (sdata->vif.bss_conf.basic_rates != basic_rates) 91 if (sdata->vif.bss_conf.basic_rates != basic_rates)
92 return false; 92 return false;
93 93
94 ieee80211_ht_oper_to_chandef(sdata->vif.bss_conf.chandef.chan, 94 cfg80211_chandef_create(&sta_chan_def, sdata->vif.bss_conf.chandef.chan,
95 ie->ht_operation, &sta_chan_def); 95 NL80211_CHAN_NO_HT);
96 96 ieee80211_chandef_ht_oper(ie->ht_operation, &sta_chan_def);
97 ieee80211_vht_oper_to_chandef(sdata->vif.bss_conf.chandef.chan, 97 ieee80211_chandef_vht_oper(ie->vht_operation, &sta_chan_def);
98 ie->vht_operation, &sta_chan_def);
99 98
100 if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef, 99 if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef,
101 &sta_chan_def)) 100 &sta_chan_def))
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 4a8019f79fb2..87c017a3b1ce 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -137,8 +137,6 @@ struct mesh_path {
137 * @copy_node: function to copy nodes of the table 137 * @copy_node: function to copy nodes of the table
138 * @size_order: determines size of the table, there will be 2^size_order hash 138 * @size_order: determines size of the table, there will be 2^size_order hash
139 * buckets 139 * buckets
140 * @mean_chain_len: maximum average length for the hash buckets' list, if it is
141 * reached, the table will grow
142 * @known_gates: list of known mesh gates and their mpaths by the station. The 140 * @known_gates: list of known mesh gates and their mpaths by the station. The
143 * gate's mpath may or may not be resolved and active. 141 * gate's mpath may or may not be resolved and active.
144 * 142 *
@@ -154,7 +152,6 @@ struct mesh_table {
154 void (*free_node) (struct hlist_node *p, bool free_leafs); 152 void (*free_node) (struct hlist_node *p, bool free_leafs);
155 int (*copy_node) (struct hlist_node *p, struct mesh_table *newtbl); 153 int (*copy_node) (struct hlist_node *p, struct mesh_table *newtbl);
156 int size_order; 154 int size_order;
157 int mean_chain_len;
158 struct hlist_head *known_gates; 155 struct hlist_head *known_gates;
159 spinlock_t gates_lock; 156 spinlock_t gates_lock;
160 157
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index c6be0b4f4058..002244bca948 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -205,9 +205,9 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata,
205 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 205 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
206 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; 206 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
207 207
208 skb_set_mac_header(skb, 0); 208 skb_reset_mac_header(skb);
209 skb_set_network_header(skb, 0); 209 skb_reset_network_header(skb);
210 skb_set_transport_header(skb, 0); 210 skb_reset_transport_header(skb);
211 211
212 /* Send all internal mgmt frames on VO. Accordingly set TID to 7. */ 212 /* Send all internal mgmt frames on VO. Accordingly set TID to 7. */
213 skb_set_queue_mapping(skb, IEEE80211_AC_VO); 213 skb_set_queue_mapping(skb, IEEE80211_AC_VO);
@@ -530,7 +530,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
530 const u8 *target_addr, *orig_addr; 530 const u8 *target_addr, *orig_addr;
531 const u8 *da; 531 const u8 *da;
532 u8 target_flags, ttl, flags; 532 u8 target_flags, ttl, flags;
533 u32 orig_sn, target_sn, lifetime, target_metric; 533 u32 orig_sn, target_sn, lifetime, target_metric = 0;
534 bool reply = false; 534 bool reply = false;
535 bool forward = true; 535 bool forward = true;
536 bool root_is_gate; 536 bool root_is_gate;
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index dadf8dc6f1cf..2ba7aa56b11c 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -55,16 +55,21 @@ int mpp_paths_generation;
55static DEFINE_RWLOCK(pathtbl_resize_lock); 55static DEFINE_RWLOCK(pathtbl_resize_lock);
56 56
57 57
58static inline struct mesh_table *resize_dereference_paths(
59 struct mesh_table __rcu *table)
60{
61 return rcu_dereference_protected(table,
62 lockdep_is_held(&pathtbl_resize_lock));
63}
64
58static inline struct mesh_table *resize_dereference_mesh_paths(void) 65static inline struct mesh_table *resize_dereference_mesh_paths(void)
59{ 66{
60 return rcu_dereference_protected(mesh_paths, 67 return resize_dereference_paths(mesh_paths);
61 lockdep_is_held(&pathtbl_resize_lock));
62} 68}
63 69
64static inline struct mesh_table *resize_dereference_mpp_paths(void) 70static inline struct mesh_table *resize_dereference_mpp_paths(void)
65{ 71{
66 return rcu_dereference_protected(mpp_paths, 72 return resize_dereference_paths(mpp_paths);
67 lockdep_is_held(&pathtbl_resize_lock));
68} 73}
69 74
70/* 75/*
@@ -160,11 +165,10 @@ static int mesh_table_grow(struct mesh_table *oldtbl,
160 int i; 165 int i;
161 166
162 if (atomic_read(&oldtbl->entries) 167 if (atomic_read(&oldtbl->entries)
163 < oldtbl->mean_chain_len * (oldtbl->hash_mask + 1)) 168 < MEAN_CHAIN_LEN * (oldtbl->hash_mask + 1))
164 return -EAGAIN; 169 return -EAGAIN;
165 170
166 newtbl->free_node = oldtbl->free_node; 171 newtbl->free_node = oldtbl->free_node;
167 newtbl->mean_chain_len = oldtbl->mean_chain_len;
168 newtbl->copy_node = oldtbl->copy_node; 172 newtbl->copy_node = oldtbl->copy_node;
169 newtbl->known_gates = oldtbl->known_gates; 173 newtbl->known_gates = oldtbl->known_gates;
170 atomic_set(&newtbl->entries, atomic_read(&oldtbl->entries)); 174 atomic_set(&newtbl->entries, atomic_read(&oldtbl->entries));
@@ -585,7 +589,7 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata,
585 589
586 hlist_add_head_rcu(&new_node->list, bucket); 590 hlist_add_head_rcu(&new_node->list, bucket);
587 if (atomic_inc_return(&tbl->entries) >= 591 if (atomic_inc_return(&tbl->entries) >=
588 tbl->mean_chain_len * (tbl->hash_mask + 1)) 592 MEAN_CHAIN_LEN * (tbl->hash_mask + 1))
589 grow = 1; 593 grow = 1;
590 594
591 mesh_paths_generation++; 595 mesh_paths_generation++;
@@ -714,7 +718,7 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata,
714 718
715 hlist_add_head_rcu(&new_node->list, bucket); 719 hlist_add_head_rcu(&new_node->list, bucket);
716 if (atomic_inc_return(&tbl->entries) >= 720 if (atomic_inc_return(&tbl->entries) >=
717 tbl->mean_chain_len * (tbl->hash_mask + 1)) 721 MEAN_CHAIN_LEN * (tbl->hash_mask + 1))
718 grow = 1; 722 grow = 1;
719 723
720 spin_unlock(&tbl->hashwlock[hash_idx]); 724 spin_unlock(&tbl->hashwlock[hash_idx]);
@@ -835,6 +839,29 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta)
835 rcu_read_unlock(); 839 rcu_read_unlock();
836} 840}
837 841
842static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata,
843 const u8 *proxy)
844{
845 struct mesh_table *tbl;
846 struct mesh_path *mpp;
847 struct mpath_node *node;
848 int i;
849
850 rcu_read_lock();
851 read_lock_bh(&pathtbl_resize_lock);
852 tbl = resize_dereference_mpp_paths();
853 for_each_mesh_entry(tbl, node, i) {
854 mpp = node->mpath;
855 if (ether_addr_equal(mpp->mpp, proxy)) {
856 spin_lock(&tbl->hashwlock[i]);
857 __mesh_path_del(tbl, node);
858 spin_unlock(&tbl->hashwlock[i]);
859 }
860 }
861 read_unlock_bh(&pathtbl_resize_lock);
862 rcu_read_unlock();
863}
864
838static void table_flush_by_iface(struct mesh_table *tbl, 865static void table_flush_by_iface(struct mesh_table *tbl,
839 struct ieee80211_sub_if_data *sdata) 866 struct ieee80211_sub_if_data *sdata)
840{ 867{
@@ -876,14 +903,17 @@ void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata)
876} 903}
877 904
878/** 905/**
879 * mesh_path_del - delete a mesh path from the table 906 * table_path_del - delete a path from the mesh or mpp table
880 * 907 *
881 * @addr: dst address (ETH_ALEN length) 908 * @tbl: mesh or mpp path table
882 * @sdata: local subif 909 * @sdata: local subif
910 * @addr: dst address (ETH_ALEN length)
883 * 911 *
884 * Returns: 0 if successful 912 * Returns: 0 if successful
885 */ 913 */
886int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr) 914static int table_path_del(struct mesh_table __rcu *rcu_tbl,
915 struct ieee80211_sub_if_data *sdata,
916 const u8 *addr)
887{ 917{
888 struct mesh_table *tbl; 918 struct mesh_table *tbl;
889 struct mesh_path *mpath; 919 struct mesh_path *mpath;
@@ -892,8 +922,7 @@ int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr)
892 int hash_idx; 922 int hash_idx;
893 int err = 0; 923 int err = 0;
894 924
895 read_lock_bh(&pathtbl_resize_lock); 925 tbl = resize_dereference_paths(rcu_tbl);
896 tbl = resize_dereference_mesh_paths();
897 hash_idx = mesh_table_hash(addr, sdata, tbl); 926 hash_idx = mesh_table_hash(addr, sdata, tbl);
898 bucket = &tbl->hash_buckets[hash_idx]; 927 bucket = &tbl->hash_buckets[hash_idx];
899 928
@@ -909,9 +938,50 @@ int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr)
909 938
910 err = -ENXIO; 939 err = -ENXIO;
911enddel: 940enddel:
912 mesh_paths_generation++;
913 spin_unlock(&tbl->hashwlock[hash_idx]); 941 spin_unlock(&tbl->hashwlock[hash_idx]);
942 return err;
943}
944
945/**
946 * mesh_path_del - delete a mesh path from the table
947 *
948 * @addr: dst address (ETH_ALEN length)
949 * @sdata: local subif
950 *
951 * Returns: 0 if successful
952 */
953int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr)
954{
955 int err = 0;
956
957 /* flush relevant mpp entries first */
958 mpp_flush_by_proxy(sdata, addr);
959
960 read_lock_bh(&pathtbl_resize_lock);
961 err = table_path_del(mesh_paths, sdata, addr);
962 mesh_paths_generation++;
914 read_unlock_bh(&pathtbl_resize_lock); 963 read_unlock_bh(&pathtbl_resize_lock);
964
965 return err;
966}
967
968/**
969 * mpp_path_del - delete a mesh proxy path from the table
970 *
971 * @addr: addr address (ETH_ALEN length)
972 * @sdata: local subif
973 *
974 * Returns: 0 if successful
975 */
976static int mpp_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr)
977{
978 int err = 0;
979
980 read_lock_bh(&pathtbl_resize_lock);
981 err = table_path_del(mpp_paths, sdata, addr);
982 mpp_paths_generation++;
983 read_unlock_bh(&pathtbl_resize_lock);
984
915 return err; 985 return err;
916} 986}
917 987
@@ -1076,7 +1146,6 @@ int mesh_pathtbl_init(void)
1076 return -ENOMEM; 1146 return -ENOMEM;
1077 tbl_path->free_node = &mesh_path_node_free; 1147 tbl_path->free_node = &mesh_path_node_free;
1078 tbl_path->copy_node = &mesh_path_node_copy; 1148 tbl_path->copy_node = &mesh_path_node_copy;
1079 tbl_path->mean_chain_len = MEAN_CHAIN_LEN;
1080 tbl_path->known_gates = kzalloc(sizeof(struct hlist_head), GFP_ATOMIC); 1149 tbl_path->known_gates = kzalloc(sizeof(struct hlist_head), GFP_ATOMIC);
1081 if (!tbl_path->known_gates) { 1150 if (!tbl_path->known_gates) {
1082 ret = -ENOMEM; 1151 ret = -ENOMEM;
@@ -1092,7 +1161,6 @@ int mesh_pathtbl_init(void)
1092 } 1161 }
1093 tbl_mpp->free_node = &mesh_path_node_free; 1162 tbl_mpp->free_node = &mesh_path_node_free;
1094 tbl_mpp->copy_node = &mesh_path_node_copy; 1163 tbl_mpp->copy_node = &mesh_path_node_copy;
1095 tbl_mpp->mean_chain_len = MEAN_CHAIN_LEN;
1096 tbl_mpp->known_gates = kzalloc(sizeof(struct hlist_head), GFP_ATOMIC); 1164 tbl_mpp->known_gates = kzalloc(sizeof(struct hlist_head), GFP_ATOMIC);
1097 if (!tbl_mpp->known_gates) { 1165 if (!tbl_mpp->known_gates) {
1098 ret = -ENOMEM; 1166 ret = -ENOMEM;
@@ -1131,6 +1199,17 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata)
1131 time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) 1199 time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE))
1132 mesh_path_del(mpath->sdata, mpath->dst); 1200 mesh_path_del(mpath->sdata, mpath->dst);
1133 } 1201 }
1202
1203 tbl = rcu_dereference(mpp_paths);
1204 for_each_mesh_entry(tbl, node, i) {
1205 if (node->mpath->sdata != sdata)
1206 continue;
1207 mpath = node->mpath;
1208 if ((!(mpath->flags & MESH_PATH_FIXED)) &&
1209 time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE))
1210 mpp_path_del(mpath->sdata, mpath->dst);
1211 }
1212
1134 rcu_read_unlock(); 1213 rcu_read_unlock();
1135} 1214}
1136 1215
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index bd3d55eb21d4..a07e93c21c9e 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -976,6 +976,10 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata,
976 mpl_dbg(sdata, "Mesh plink error: no more free plinks\n"); 976 mpl_dbg(sdata, "Mesh plink error: no more free plinks\n");
977 goto out; 977 goto out;
978 } 978 }
979
980 /* new matching peer */
981 event = OPN_ACPT;
982 goto out;
979 } else { 983 } else {
980 if (!test_sta_flag(sta, WLAN_STA_AUTH)) { 984 if (!test_sta_flag(sta, WLAN_STA_AUTH)) {
981 mpl_dbg(sdata, "Mesh plink: Action frame from non-authed peer\n"); 985 mpl_dbg(sdata, "Mesh plink: Action frame from non-authed peer\n");
@@ -985,12 +989,6 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata,
985 goto out; 989 goto out;
986 } 990 }
987 991
988 /* new matching peer */
989 if (!sta) {
990 event = OPN_ACPT;
991 goto out;
992 }
993
994 switch (ftype) { 992 switch (ftype) {
995 case WLAN_SP_MESH_PEERING_OPEN: 993 case WLAN_SP_MESH_PEERING_OPEN:
996 if (!matches_local) 994 if (!matches_local)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index bfbb1acafdd1..281b8d6e5109 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -6,7 +6,7 @@
6 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 6 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
7 * Copyright 2007, Michael Wu <flamingice@sourmilk.net> 7 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
8 * Copyright 2013-2014 Intel Mobile Communications GmbH 8 * Copyright 2013-2014 Intel Mobile Communications GmbH
9 * Copyright (C) 2015 Intel Deutschland GmbH 9 * Copyright (C) 2015 - 2016 Intel Deutschland GmbH
10 * 10 *
11 * This program is free software; you can redistribute it and/or modify 11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as 12 * it under the terms of the GNU General Public License version 2 as
@@ -196,16 +196,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
196 196
197 /* check 40 MHz support, if we have it */ 197 /* check 40 MHz support, if we have it */
198 if (sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) { 198 if (sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) {
199 switch (ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { 199 ieee80211_chandef_ht_oper(ht_oper, chandef);
200 case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
201 chandef->width = NL80211_CHAN_WIDTH_40;
202 chandef->center_freq1 += 10;
203 break;
204 case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
205 chandef->width = NL80211_CHAN_WIDTH_40;
206 chandef->center_freq1 -= 10;
207 break;
208 }
209 } else { 200 } else {
210 /* 40 MHz (and 80 MHz) must be supported for VHT */ 201 /* 40 MHz (and 80 MHz) must be supported for VHT */
211 ret = IEEE80211_STA_DISABLE_VHT; 202 ret = IEEE80211_STA_DISABLE_VHT;
@@ -219,35 +210,11 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
219 goto out; 210 goto out;
220 } 211 }
221 212
222 vht_chandef.chan = channel; 213 vht_chandef = *chandef;
223 vht_chandef.center_freq1 = 214 if (!ieee80211_chandef_vht_oper(vht_oper, &vht_chandef)) {
224 ieee80211_channel_to_frequency(vht_oper->center_freq_seg1_idx,
225 channel->band);
226 vht_chandef.center_freq2 = 0;
227
228 switch (vht_oper->chan_width) {
229 case IEEE80211_VHT_CHANWIDTH_USE_HT:
230 vht_chandef.width = chandef->width;
231 vht_chandef.center_freq1 = chandef->center_freq1;
232 break;
233 case IEEE80211_VHT_CHANWIDTH_80MHZ:
234 vht_chandef.width = NL80211_CHAN_WIDTH_80;
235 break;
236 case IEEE80211_VHT_CHANWIDTH_160MHZ:
237 vht_chandef.width = NL80211_CHAN_WIDTH_160;
238 break;
239 case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
240 vht_chandef.width = NL80211_CHAN_WIDTH_80P80;
241 vht_chandef.center_freq2 =
242 ieee80211_channel_to_frequency(
243 vht_oper->center_freq_seg2_idx,
244 channel->band);
245 break;
246 default:
247 if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) 215 if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
248 sdata_info(sdata, 216 sdata_info(sdata,
249 "AP VHT operation IE has invalid channel width (%d), disable VHT\n", 217 "AP VHT information is invalid, disable VHT\n");
250 vht_oper->chan_width);
251 ret = IEEE80211_STA_DISABLE_VHT; 218 ret = IEEE80211_STA_DISABLE_VHT;
252 goto out; 219 goto out;
253 } 220 }
@@ -592,7 +559,7 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
592 struct ieee80211_sub_if_data *other; 559 struct ieee80211_sub_if_data *other;
593 560
594 list_for_each_entry_rcu(other, &local->interfaces, list) { 561 list_for_each_entry_rcu(other, &local->interfaces, list) {
595 if (other->flags & IEEE80211_SDATA_MU_MIMO_OWNER) { 562 if (other->vif.mu_mimo_owner) {
596 disable_mu_mimo = true; 563 disable_mu_mimo = true;
597 break; 564 break;
598 } 565 }
@@ -600,7 +567,7 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
600 if (disable_mu_mimo) 567 if (disable_mu_mimo)
601 cap &= ~IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE; 568 cap &= ~IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE;
602 else 569 else
603 sdata->flags |= IEEE80211_SDATA_MU_MIMO_OWNER; 570 sdata->vif.mu_mimo_owner = true;
604 } 571 }
605 572
606 mask = IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK; 573 mask = IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK;
@@ -1638,8 +1605,7 @@ void ieee80211_dynamic_ps_timer(unsigned long data)
1638 1605
1639void ieee80211_dfs_cac_timer_work(struct work_struct *work) 1606void ieee80211_dfs_cac_timer_work(struct work_struct *work)
1640{ 1607{
1641 struct delayed_work *delayed_work = 1608 struct delayed_work *delayed_work = to_delayed_work(work);
1642 container_of(work, struct delayed_work, work);
1643 struct ieee80211_sub_if_data *sdata = 1609 struct ieee80211_sub_if_data *sdata =
1644 container_of(delayed_work, struct ieee80211_sub_if_data, 1610 container_of(delayed_work, struct ieee80211_sub_if_data,
1645 dfs_cac_timer_work); 1611 dfs_cac_timer_work);
@@ -2079,7 +2045,14 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
2079 memset(&ifmgd->ht_capa_mask, 0, sizeof(ifmgd->ht_capa_mask)); 2045 memset(&ifmgd->ht_capa_mask, 0, sizeof(ifmgd->ht_capa_mask));
2080 memset(&ifmgd->vht_capa, 0, sizeof(ifmgd->vht_capa)); 2046 memset(&ifmgd->vht_capa, 0, sizeof(ifmgd->vht_capa));
2081 memset(&ifmgd->vht_capa_mask, 0, sizeof(ifmgd->vht_capa_mask)); 2047 memset(&ifmgd->vht_capa_mask, 0, sizeof(ifmgd->vht_capa_mask));
2082 sdata->flags &= ~IEEE80211_SDATA_MU_MIMO_OWNER; 2048
2049 /* reset MU-MIMO ownership and group data */
2050 memset(sdata->vif.bss_conf.mu_group.membership, 0,
2051 sizeof(sdata->vif.bss_conf.mu_group.membership));
2052 memset(sdata->vif.bss_conf.mu_group.position, 0,
2053 sizeof(sdata->vif.bss_conf.mu_group.position));
2054 changed |= BSS_CHANGED_MU_GROUPS;
2055 sdata->vif.mu_mimo_owner = false;
2083 2056
2084 sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL; 2057 sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
2085 2058
@@ -2536,7 +2509,8 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata,
2536 eth_zero_addr(sdata->u.mgd.bssid); 2509 eth_zero_addr(sdata->u.mgd.bssid);
2537 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); 2510 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID);
2538 sdata->u.mgd.flags = 0; 2511 sdata->u.mgd.flags = 0;
2539 sdata->flags &= ~IEEE80211_SDATA_MU_MIMO_OWNER; 2512 sdata->vif.mu_mimo_owner = false;
2513
2540 mutex_lock(&sdata->local->mtx); 2514 mutex_lock(&sdata->local->mtx);
2541 ieee80211_vif_release_channel(sdata); 2515 ieee80211_vif_release_channel(sdata);
2542 mutex_unlock(&sdata->local->mtx); 2516 mutex_unlock(&sdata->local->mtx);
@@ -3571,6 +3545,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
3571 elems.ht_cap_elem, elems.ht_operation, 3545 elems.ht_cap_elem, elems.ht_operation,
3572 elems.vht_operation, bssid, &changed)) { 3546 elems.vht_operation, bssid, &changed)) {
3573 mutex_unlock(&local->sta_mtx); 3547 mutex_unlock(&local->sta_mtx);
3548 sdata_info(sdata,
3549 "failed to follow AP %pM bandwidth change, disconnect\n",
3550 bssid);
3574 ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, 3551 ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
3575 WLAN_REASON_DEAUTH_LEAVING, 3552 WLAN_REASON_DEAUTH_LEAVING,
3576 true, deauth_buf); 3553 true, deauth_buf);
@@ -3946,11 +3923,9 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
3946 * We actually lost the connection ... or did we? 3923 * We actually lost the connection ... or did we?
3947 * Let's make sure! 3924 * Let's make sure!
3948 */ 3925 */
3949 wiphy_debug(local->hw.wiphy, 3926 mlme_dbg(sdata,
3950 "%s: No probe response from AP %pM" 3927 "No probe response from AP %pM after %dms, disconnecting.\n",
3951 " after %dms, disconnecting.\n", 3928 bssid, probe_wait_ms);
3952 sdata->name,
3953 bssid, probe_wait_ms);
3954 3929
3955 ieee80211_sta_connection_lost(sdata, bssid, 3930 ieee80211_sta_connection_lost(sdata, bssid,
3956 WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, false); 3931 WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, false);
@@ -4536,6 +4511,9 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
4536 if (ifmgd->associated) { 4511 if (ifmgd->associated) {
4537 u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; 4512 u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
4538 4513
4514 sdata_info(sdata,
4515 "disconnect from AP %pM for new auth to %pM\n",
4516 ifmgd->associated->bssid, req->bss->bssid);
4539 ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, 4517 ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
4540 WLAN_REASON_UNSPECIFIED, 4518 WLAN_REASON_UNSPECIFIED,
4541 false, frame_buf); 4519 false, frame_buf);
@@ -4604,6 +4582,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4604 if (ifmgd->associated) { 4582 if (ifmgd->associated) {
4605 u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; 4583 u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
4606 4584
4585 sdata_info(sdata,
4586 "disconnect from AP %pM for new assoc to %pM\n",
4587 ifmgd->associated->bssid, req->bss->bssid);
4607 ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, 4588 ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
4608 WLAN_REASON_UNSPECIFIED, 4589 WLAN_REASON_UNSPECIFIED,
4609 false, frame_buf); 4590 false, frame_buf);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 60d093f40f1d..dc27becb9b71 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4,6 +4,7 @@
4 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 4 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
5 * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> 5 * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
6 * Copyright 2013-2014 Intel Mobile Communications GmbH 6 * Copyright 2013-2014 Intel Mobile Communications GmbH
7 * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
7 * 8 *
8 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as 10 * it under the terms of the GNU General Public License version 2 as
@@ -18,6 +19,7 @@
18#include <linux/etherdevice.h> 19#include <linux/etherdevice.h>
19#include <linux/rcupdate.h> 20#include <linux/rcupdate.h>
20#include <linux/export.h> 21#include <linux/export.h>
22#include <linux/bitops.h>
21#include <net/mac80211.h> 23#include <net/mac80211.h>
22#include <net/ieee80211_radiotap.h> 24#include <net/ieee80211_radiotap.h>
23#include <asm/unaligned.h> 25#include <asm/unaligned.h>
@@ -122,7 +124,8 @@ static inline bool should_drop_frame(struct sk_buff *skb, int present_fcs_len,
122 hdr = (void *)(skb->data + rtap_vendor_space); 124 hdr = (void *)(skb->data + rtap_vendor_space);
123 125
124 if (status->flag & (RX_FLAG_FAILED_FCS_CRC | 126 if (status->flag & (RX_FLAG_FAILED_FCS_CRC |
125 RX_FLAG_FAILED_PLCP_CRC)) 127 RX_FLAG_FAILED_PLCP_CRC |
128 RX_FLAG_ONLY_MONITOR))
126 return true; 129 return true;
127 130
128 if (unlikely(skb->len < 16 + present_fcs_len + rtap_vendor_space)) 131 if (unlikely(skb->len < 16 + present_fcs_len + rtap_vendor_space))
@@ -507,7 +510,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
507 return NULL; 510 return NULL;
508 } 511 }
509 512
510 if (!local->monitors) { 513 if (!local->monitors || (status->flag & RX_FLAG_SKIP_MONITOR)) {
511 if (should_drop_frame(origskb, present_fcs_len, 514 if (should_drop_frame(origskb, present_fcs_len,
512 rtap_vendor_space)) { 515 rtap_vendor_space)) {
513 dev_kfree_skb(origskb); 516 dev_kfree_skb(origskb);
@@ -797,6 +800,26 @@ static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
797 return RX_CONTINUE; 800 return RX_CONTINUE;
798} 801}
799 802
803static inline bool ieee80211_rx_reorder_ready(struct tid_ampdu_rx *tid_agg_rx,
804 int index)
805{
806 struct sk_buff_head *frames = &tid_agg_rx->reorder_buf[index];
807 struct sk_buff *tail = skb_peek_tail(frames);
808 struct ieee80211_rx_status *status;
809
810 if (tid_agg_rx->reorder_buf_filtered & BIT_ULL(index))
811 return true;
812
813 if (!tail)
814 return false;
815
816 status = IEEE80211_SKB_RXCB(tail);
817 if (status->flag & RX_FLAG_AMSDU_MORE)
818 return false;
819
820 return true;
821}
822
800static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata, 823static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,
801 struct tid_ampdu_rx *tid_agg_rx, 824 struct tid_ampdu_rx *tid_agg_rx,
802 int index, 825 int index,
@@ -811,7 +834,7 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,
811 if (skb_queue_empty(skb_list)) 834 if (skb_queue_empty(skb_list))
812 goto no_frame; 835 goto no_frame;
813 836
814 if (!ieee80211_rx_reorder_ready(skb_list)) { 837 if (!ieee80211_rx_reorder_ready(tid_agg_rx, index)) {
815 __skb_queue_purge(skb_list); 838 __skb_queue_purge(skb_list);
816 goto no_frame; 839 goto no_frame;
817 } 840 }
@@ -825,6 +848,7 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,
825 } 848 }
826 849
827no_frame: 850no_frame:
851 tid_agg_rx->reorder_buf_filtered &= ~BIT_ULL(index);
828 tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num); 852 tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num);
829} 853}
830 854
@@ -865,7 +889,7 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
865 889
866 /* release the buffer until next missing frame */ 890 /* release the buffer until next missing frame */
867 index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size; 891 index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size;
868 if (!ieee80211_rx_reorder_ready(&tid_agg_rx->reorder_buf[index]) && 892 if (!ieee80211_rx_reorder_ready(tid_agg_rx, index) &&
869 tid_agg_rx->stored_mpdu_num) { 893 tid_agg_rx->stored_mpdu_num) {
870 /* 894 /*
871 * No buffers ready to be released, but check whether any 895 * No buffers ready to be released, but check whether any
@@ -874,8 +898,7 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
874 int skipped = 1; 898 int skipped = 1;
875 for (j = (index + 1) % tid_agg_rx->buf_size; j != index; 899 for (j = (index + 1) % tid_agg_rx->buf_size; j != index;
876 j = (j + 1) % tid_agg_rx->buf_size) { 900 j = (j + 1) % tid_agg_rx->buf_size) {
877 if (!ieee80211_rx_reorder_ready( 901 if (!ieee80211_rx_reorder_ready(tid_agg_rx, j)) {
878 &tid_agg_rx->reorder_buf[j])) {
879 skipped++; 902 skipped++;
880 continue; 903 continue;
881 } 904 }
@@ -902,8 +925,7 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
902 skipped) & IEEE80211_SN_MASK; 925 skipped) & IEEE80211_SN_MASK;
903 skipped = 0; 926 skipped = 0;
904 } 927 }
905 } else while (ieee80211_rx_reorder_ready( 928 } else while (ieee80211_rx_reorder_ready(tid_agg_rx, index)) {
906 &tid_agg_rx->reorder_buf[index])) {
907 ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, 929 ieee80211_release_reorder_frame(sdata, tid_agg_rx, index,
908 frames); 930 frames);
909 index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size; 931 index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size;
@@ -914,8 +936,7 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
914 936
915 for (; j != (index - 1) % tid_agg_rx->buf_size; 937 for (; j != (index - 1) % tid_agg_rx->buf_size;
916 j = (j + 1) % tid_agg_rx->buf_size) { 938 j = (j + 1) % tid_agg_rx->buf_size) {
917 if (ieee80211_rx_reorder_ready( 939 if (ieee80211_rx_reorder_ready(tid_agg_rx, j))
918 &tid_agg_rx->reorder_buf[j]))
919 break; 940 break;
920 } 941 }
921 942
@@ -986,7 +1007,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
986 index = mpdu_seq_num % tid_agg_rx->buf_size; 1007 index = mpdu_seq_num % tid_agg_rx->buf_size;
987 1008
988 /* check if we already stored this frame */ 1009 /* check if we already stored this frame */
989 if (ieee80211_rx_reorder_ready(&tid_agg_rx->reorder_buf[index])) { 1010 if (ieee80211_rx_reorder_ready(tid_agg_rx, index)) {
990 dev_kfree_skb(skb); 1011 dev_kfree_skb(skb);
991 goto out; 1012 goto out;
992 } 1013 }
@@ -1099,6 +1120,9 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx)
1099 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; 1120 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
1100 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); 1121 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
1101 1122
1123 if (status->flag & RX_FLAG_DUP_VALIDATED)
1124 return RX_CONTINUE;
1125
1102 /* 1126 /*
1103 * Drop duplicate 802.11 retransmissions 1127 * Drop duplicate 802.11 retransmissions
1104 * (IEEE 802.11-2012: 9.3.2.10 "Duplicate detection and recovery") 1128 * (IEEE 802.11-2012: 9.3.2.10 "Duplicate detection and recovery")
@@ -2217,9 +2241,6 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
2217 skb->dev = dev; 2241 skb->dev = dev;
2218 __skb_queue_head_init(&frame_list); 2242 __skb_queue_head_init(&frame_list);
2219 2243
2220 if (skb_linearize(skb))
2221 return RX_DROP_UNUSABLE;
2222
2223 ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr, 2244 ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
2224 rx->sdata->vif.type, 2245 rx->sdata->vif.type,
2225 rx->local->hw.extra_tx_headroom, true); 2246 rx->local->hw.extra_tx_headroom, true);
@@ -2249,7 +2270,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
2249 struct ieee80211_local *local = rx->local; 2270 struct ieee80211_local *local = rx->local;
2250 struct ieee80211_sub_if_data *sdata = rx->sdata; 2271 struct ieee80211_sub_if_data *sdata = rx->sdata;
2251 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 2272 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
2252 u16 q, hdrlen; 2273 u16 ac, q, hdrlen;
2253 2274
2254 hdr = (struct ieee80211_hdr *) skb->data; 2275 hdr = (struct ieee80211_hdr *) skb->data;
2255 hdrlen = ieee80211_hdrlen(hdr->frame_control); 2276 hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -2308,6 +2329,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
2308 spin_lock_bh(&mppath->state_lock); 2329 spin_lock_bh(&mppath->state_lock);
2309 if (!ether_addr_equal(mppath->mpp, mpp_addr)) 2330 if (!ether_addr_equal(mppath->mpp, mpp_addr))
2310 memcpy(mppath->mpp, mpp_addr, ETH_ALEN); 2331 memcpy(mppath->mpp, mpp_addr, ETH_ALEN);
2332 mppath->exp_time = jiffies;
2311 spin_unlock_bh(&mppath->state_lock); 2333 spin_unlock_bh(&mppath->state_lock);
2312 } 2334 }
2313 rcu_read_unlock(); 2335 rcu_read_unlock();
@@ -2318,7 +2340,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
2318 ether_addr_equal(sdata->vif.addr, hdr->addr3)) 2340 ether_addr_equal(sdata->vif.addr, hdr->addr3))
2319 return RX_CONTINUE; 2341 return RX_CONTINUE;
2320 2342
2321 q = ieee80211_select_queue_80211(sdata, skb, hdr); 2343 ac = ieee80211_select_queue_80211(sdata, skb, hdr);
2344 q = sdata->vif.hw_queue[ac];
2322 if (ieee80211_queue_stopped(&local->hw, q)) { 2345 if (ieee80211_queue_stopped(&local->hw, q)) {
2323 IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion); 2346 IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion);
2324 return RX_DROP_MONITOR; 2347 return RX_DROP_MONITOR;
@@ -2756,6 +2779,11 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2756 opmode, status->band); 2779 opmode, status->band);
2757 goto handled; 2780 goto handled;
2758 } 2781 }
2782 case WLAN_VHT_ACTION_GROUPID_MGMT: {
2783 if (len < IEEE80211_MIN_ACTION_SIZE + 25)
2784 goto invalid;
2785 goto queue;
2786 }
2759 default: 2787 default:
2760 break; 2788 break;
2761 } 2789 }
@@ -3091,7 +3119,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
3091 ieee80211_add_rx_radiotap_header(local, skb, rate, needed_headroom, 3119 ieee80211_add_rx_radiotap_header(local, skb, rate, needed_headroom,
3092 false); 3120 false);
3093 3121
3094 skb_set_mac_header(skb, 0); 3122 skb_reset_mac_header(skb);
3095 skb->ip_summed = CHECKSUM_UNNECESSARY; 3123 skb->ip_summed = CHECKSUM_UNNECESSARY;
3096 skb->pkt_type = PACKET_OTHERHOST; 3124 skb->pkt_type = PACKET_OTHERHOST;
3097 skb->protocol = htons(ETH_P_802_2); 3125 skb->protocol = htons(ETH_P_802_2);
@@ -3293,6 +3321,85 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
3293 ieee80211_rx_handlers(&rx, &frames); 3321 ieee80211_rx_handlers(&rx, &frames);
3294} 3322}
3295 3323
3324void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
3325 u16 ssn, u64 filtered,
3326 u16 received_mpdus)
3327{
3328 struct sta_info *sta;
3329 struct tid_ampdu_rx *tid_agg_rx;
3330 struct sk_buff_head frames;
3331 struct ieee80211_rx_data rx = {
3332 /* This is OK -- must be QoS data frame */
3333 .security_idx = tid,
3334 .seqno_idx = tid,
3335 };
3336 int i, diff;
3337
3338 if (WARN_ON(!pubsta || tid >= IEEE80211_NUM_TIDS))
3339 return;
3340
3341 __skb_queue_head_init(&frames);
3342
3343 sta = container_of(pubsta, struct sta_info, sta);
3344
3345 rx.sta = sta;
3346 rx.sdata = sta->sdata;
3347 rx.local = sta->local;
3348
3349 rcu_read_lock();
3350 tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
3351 if (!tid_agg_rx)
3352 goto out;
3353
3354 spin_lock_bh(&tid_agg_rx->reorder_lock);
3355
3356 if (received_mpdus >= IEEE80211_SN_MODULO >> 1) {
3357 int release;
3358
3359 /* release all frames in the reorder buffer */
3360 release = (tid_agg_rx->head_seq_num + tid_agg_rx->buf_size) %
3361 IEEE80211_SN_MODULO;
3362 ieee80211_release_reorder_frames(sta->sdata, tid_agg_rx,
3363 release, &frames);
3364 /* update ssn to match received ssn */
3365 tid_agg_rx->head_seq_num = ssn;
3366 } else {
3367 ieee80211_release_reorder_frames(sta->sdata, tid_agg_rx, ssn,
3368 &frames);
3369 }
3370
3371 /* handle the case that received ssn is behind the mac ssn.
3372 * it can be tid_agg_rx->buf_size behind and still be valid */
3373 diff = (tid_agg_rx->head_seq_num - ssn) & IEEE80211_SN_MASK;
3374 if (diff >= tid_agg_rx->buf_size) {
3375 tid_agg_rx->reorder_buf_filtered = 0;
3376 goto release;
3377 }
3378 filtered = filtered >> diff;
3379 ssn += diff;
3380
3381 /* update bitmap */
3382 for (i = 0; i < tid_agg_rx->buf_size; i++) {
3383 int index = (ssn + i) % tid_agg_rx->buf_size;
3384
3385 tid_agg_rx->reorder_buf_filtered &= ~BIT_ULL(index);
3386 if (filtered & BIT_ULL(i))
3387 tid_agg_rx->reorder_buf_filtered |= BIT_ULL(index);
3388 }
3389
3390 /* now process also frames that the filter marking released */
3391 ieee80211_sta_reorder_release(sta->sdata, tid_agg_rx, &frames);
3392
3393release:
3394 spin_unlock_bh(&tid_agg_rx->reorder_lock);
3395
3396 ieee80211_rx_handlers(&rx, &frames);
3397
3398 out:
3399 rcu_read_unlock();
3400}
3401EXPORT_SYMBOL(ieee80211_mark_rx_ba_filtered_frames);
3402
3296/* main receive path */ 3403/* main receive path */
3297 3404
3298static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) 3405static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index a4a4f89d3ba0..861b93ffbe92 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -67,6 +67,7 @@
67 67
68static const struct rhashtable_params sta_rht_params = { 68static const struct rhashtable_params sta_rht_params = {
69 .nelem_hint = 3, /* start small */ 69 .nelem_hint = 3, /* start small */
70 .insecure_elasticity = true, /* Disable chain-length checks. */
70 .automatic_shrinking = true, 71 .automatic_shrinking = true,
71 .head_offset = offsetof(struct sta_info, hash_node), 72 .head_offset = offsetof(struct sta_info, hash_node),
72 .key_offset = offsetof(struct sta_info, addr), 73 .key_offset = offsetof(struct sta_info, addr),
@@ -116,6 +117,7 @@ static void __cleanup_single_sta(struct sta_info *sta)
116 117
117 ieee80211_purge_tx_queue(&local->hw, &txqi->queue); 118 ieee80211_purge_tx_queue(&local->hw, &txqi->queue);
118 atomic_sub(n, &sdata->txqs_len[txqi->txq.ac]); 119 atomic_sub(n, &sdata->txqs_len[txqi->txq.ac]);
120 txqi->byte_cnt = 0;
119 } 121 }
120 } 122 }
121 123
@@ -257,11 +259,11 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
257} 259}
258 260
259/* Caller must hold local->sta_mtx */ 261/* Caller must hold local->sta_mtx */
260static void sta_info_hash_add(struct ieee80211_local *local, 262static int sta_info_hash_add(struct ieee80211_local *local,
261 struct sta_info *sta) 263 struct sta_info *sta)
262{ 264{
263 rhashtable_insert_fast(&local->sta_hash, &sta->hash_node, 265 return rhashtable_insert_fast(&local->sta_hash, &sta->hash_node,
264 sta_rht_params); 266 sta_rht_params);
265} 267}
266 268
267static void sta_deliver_ps_frames(struct work_struct *wk) 269static void sta_deliver_ps_frames(struct work_struct *wk)
@@ -498,11 +500,17 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
498{ 500{
499 struct ieee80211_local *local = sta->local; 501 struct ieee80211_local *local = sta->local;
500 struct ieee80211_sub_if_data *sdata = sta->sdata; 502 struct ieee80211_sub_if_data *sdata = sta->sdata;
501 struct station_info sinfo; 503 struct station_info *sinfo;
502 int err = 0; 504 int err = 0;
503 505
504 lockdep_assert_held(&local->sta_mtx); 506 lockdep_assert_held(&local->sta_mtx);
505 507
508 sinfo = kzalloc(sizeof(struct station_info), GFP_KERNEL);
509 if (!sinfo) {
510 err = -ENOMEM;
511 goto out_err;
512 }
513
506 /* check if STA exists already */ 514 /* check if STA exists already */
507 if (sta_info_get_bss(sdata, sta->sta.addr)) { 515 if (sta_info_get_bss(sdata, sta->sta.addr)) {
508 err = -EEXIST; 516 err = -EEXIST;
@@ -517,7 +525,9 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
517 set_sta_flag(sta, WLAN_STA_BLOCK_BA); 525 set_sta_flag(sta, WLAN_STA_BLOCK_BA);
518 526
519 /* make the station visible */ 527 /* make the station visible */
520 sta_info_hash_add(local, sta); 528 err = sta_info_hash_add(local, sta);
529 if (err)
530 goto out_drop_sta;
521 531
522 list_add_tail_rcu(&sta->list, &local->sta_list); 532 list_add_tail_rcu(&sta->list, &local->sta_list);
523 533
@@ -530,14 +540,12 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
530 /* accept BA sessions now */ 540 /* accept BA sessions now */
531 clear_sta_flag(sta, WLAN_STA_BLOCK_BA); 541 clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
532 542
533 ieee80211_recalc_min_chandef(sdata);
534 ieee80211_sta_debugfs_add(sta); 543 ieee80211_sta_debugfs_add(sta);
535 rate_control_add_sta_debugfs(sta); 544 rate_control_add_sta_debugfs(sta);
536 545
537 memset(&sinfo, 0, sizeof(sinfo)); 546 sinfo->generation = local->sta_generation;
538 sinfo.filled = 0; 547 cfg80211_new_sta(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL);
539 sinfo.generation = local->sta_generation; 548 kfree(sinfo);
540 cfg80211_new_sta(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL);
541 549
542 sta_dbg(sdata, "Inserted STA %pM\n", sta->sta.addr); 550 sta_dbg(sdata, "Inserted STA %pM\n", sta->sta.addr);
543 551
@@ -552,11 +560,13 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
552 out_remove: 560 out_remove:
553 sta_info_hash_del(local, sta); 561 sta_info_hash_del(local, sta);
554 list_del_rcu(&sta->list); 562 list_del_rcu(&sta->list);
563 out_drop_sta:
555 local->num_sta--; 564 local->num_sta--;
556 synchronize_net(); 565 synchronize_net();
557 __cleanup_single_sta(sta); 566 __cleanup_single_sta(sta);
558 out_err: 567 out_err:
559 mutex_unlock(&local->sta_mtx); 568 mutex_unlock(&local->sta_mtx);
569 kfree(sinfo);
560 rcu_read_lock(); 570 rcu_read_lock();
561 return err; 571 return err;
562} 572}
@@ -898,7 +908,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
898{ 908{
899 struct ieee80211_local *local = sta->local; 909 struct ieee80211_local *local = sta->local;
900 struct ieee80211_sub_if_data *sdata = sta->sdata; 910 struct ieee80211_sub_if_data *sdata = sta->sdata;
901 struct station_info sinfo = {}; 911 struct station_info *sinfo;
902 int ret; 912 int ret;
903 913
904 /* 914 /*
@@ -936,12 +946,14 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
936 946
937 sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr); 947 sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr);
938 948
939 sta_set_sinfo(sta, &sinfo); 949 sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL);
940 cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL); 950 if (sinfo)
951 sta_set_sinfo(sta, sinfo);
952 cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL);
953 kfree(sinfo);
941 954
942 rate_control_remove_sta_debugfs(sta); 955 rate_control_remove_sta_debugfs(sta);
943 ieee80211_sta_debugfs_remove(sta); 956 ieee80211_sta_debugfs_remove(sta);
944 ieee80211_recalc_min_chandef(sdata);
945 957
946 cleanup_single_sta(sta); 958 cleanup_single_sta(sta);
947} 959}
@@ -1808,14 +1820,17 @@ int sta_info_move_state(struct sta_info *sta,
1808 clear_bit(WLAN_STA_AUTH, &sta->_flags); 1820 clear_bit(WLAN_STA_AUTH, &sta->_flags);
1809 break; 1821 break;
1810 case IEEE80211_STA_AUTH: 1822 case IEEE80211_STA_AUTH:
1811 if (sta->sta_state == IEEE80211_STA_NONE) 1823 if (sta->sta_state == IEEE80211_STA_NONE) {
1812 set_bit(WLAN_STA_AUTH, &sta->_flags); 1824 set_bit(WLAN_STA_AUTH, &sta->_flags);
1813 else if (sta->sta_state == IEEE80211_STA_ASSOC) 1825 } else if (sta->sta_state == IEEE80211_STA_ASSOC) {
1814 clear_bit(WLAN_STA_ASSOC, &sta->_flags); 1826 clear_bit(WLAN_STA_ASSOC, &sta->_flags);
1827 ieee80211_recalc_min_chandef(sta->sdata);
1828 }
1815 break; 1829 break;
1816 case IEEE80211_STA_ASSOC: 1830 case IEEE80211_STA_ASSOC:
1817 if (sta->sta_state == IEEE80211_STA_AUTH) { 1831 if (sta->sta_state == IEEE80211_STA_AUTH) {
1818 set_bit(WLAN_STA_ASSOC, &sta->_flags); 1832 set_bit(WLAN_STA_ASSOC, &sta->_flags);
1833 ieee80211_recalc_min_chandef(sta->sdata);
1819 } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { 1834 } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) {
1820 if (sta->sdata->vif.type == NL80211_IFTYPE_AP || 1835 if (sta->sdata->vif.type == NL80211_IFTYPE_AP ||
1821 (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && 1836 (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index d6051629ed15..62193f4bc37b 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -1,6 +1,7 @@
1/* 1/*
2 * Copyright 2002-2005, Devicescape Software, Inc. 2 * Copyright 2002-2005, Devicescape Software, Inc.
3 * Copyright 2013-2014 Intel Mobile Communications GmbH 3 * Copyright 2013-2014 Intel Mobile Communications GmbH
4 * Copyright(c) 2015 Intel Deutschland GmbH
4 * 5 *
5 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
@@ -167,6 +168,8 @@ struct tid_ampdu_tx {
167 * 168 *
168 * @reorder_buf: buffer to reorder incoming aggregated MPDUs. An MPDU may be an 169 * @reorder_buf: buffer to reorder incoming aggregated MPDUs. An MPDU may be an
169 * A-MSDU with individually reported subframes. 170 * A-MSDU with individually reported subframes.
171 * @reorder_buf_filtered: bitmap indicating where there are filtered frames in
172 * the reorder buffer that should be ignored when releasing frames
170 * @reorder_time: jiffies when skb was added 173 * @reorder_time: jiffies when skb was added
171 * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value) 174 * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value)
172 * @reorder_timer: releases expired frames from the reorder buffer. 175 * @reorder_timer: releases expired frames from the reorder buffer.
@@ -194,6 +197,7 @@ struct tid_ampdu_tx {
194struct tid_ampdu_rx { 197struct tid_ampdu_rx {
195 struct rcu_head rcu_head; 198 struct rcu_head rcu_head;
196 spinlock_t reorder_lock; 199 spinlock_t reorder_lock;
200 u64 reorder_buf_filtered;
197 struct sk_buff_head *reorder_buf; 201 struct sk_buff_head *reorder_buf;
198 unsigned long *reorder_time; 202 unsigned long *reorder_time;
199 struct timer_list session_timer; 203 struct timer_list session_timer;
@@ -212,20 +216,21 @@ struct tid_ampdu_rx {
212/** 216/**
213 * struct sta_ampdu_mlme - STA aggregation information. 217 * struct sta_ampdu_mlme - STA aggregation information.
214 * 218 *
219 * @mtx: mutex to protect all TX data (except non-NULL assignments
220 * to tid_tx[idx], which are protected by the sta spinlock)
221 * tid_start_tx is also protected by sta->lock.
215 * @tid_rx: aggregation info for Rx per TID -- RCU protected 222 * @tid_rx: aggregation info for Rx per TID -- RCU protected
216 * @tid_tx: aggregation info for Tx per TID
217 * @tid_start_tx: sessions where start was requested
218 * @addba_req_num: number of times addBA request has been sent.
219 * @last_addba_req_time: timestamp of the last addBA request.
220 * @dialog_token_allocator: dialog token enumerator for each new session;
221 * @work: work struct for starting/stopping aggregation
222 * @tid_rx_timer_expired: bitmap indicating on which TIDs the 223 * @tid_rx_timer_expired: bitmap indicating on which TIDs the
223 * RX timer expired until the work for it runs 224 * RX timer expired until the work for it runs
224 * @tid_rx_stop_requested: bitmap indicating which BA sessions per TID the 225 * @tid_rx_stop_requested: bitmap indicating which BA sessions per TID the
225 * driver requested to close until the work for it runs 226 * driver requested to close until the work for it runs
226 * @mtx: mutex to protect all TX data (except non-NULL assignments 227 * @agg_session_valid: bitmap indicating which TID has a rx BA session open on
227 * to tid_tx[idx], which are protected by the sta spinlock) 228 * @work: work struct for starting/stopping aggregation
228 * tid_start_tx is also protected by sta->lock. 229 * @tid_tx: aggregation info for Tx per TID
230 * @tid_start_tx: sessions where start was requested
231 * @last_addba_req_time: timestamp of the last addBA request.
232 * @addba_req_num: number of times addBA request has been sent.
233 * @dialog_token_allocator: dialog token enumerator for each new session;
229 */ 234 */
230struct sta_ampdu_mlme { 235struct sta_ampdu_mlme {
231 struct mutex mtx; 236 struct mutex mtx;
@@ -233,6 +238,7 @@ struct sta_ampdu_mlme {
233 struct tid_ampdu_rx __rcu *tid_rx[IEEE80211_NUM_TIDS]; 238 struct tid_ampdu_rx __rcu *tid_rx[IEEE80211_NUM_TIDS];
234 unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; 239 unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
235 unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; 240 unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
241 unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
236 /* tx */ 242 /* tx */
237 struct work_struct work; 243 struct work_struct work;
238 struct tid_ampdu_tx __rcu *tid_tx[IEEE80211_NUM_TIDS]; 244 struct tid_ampdu_tx __rcu *tid_tx[IEEE80211_NUM_TIDS];
@@ -371,7 +377,6 @@ DECLARE_EWMA(signal, 1024, 8)
371 * @uploaded: set to true when sta is uploaded to the driver 377 * @uploaded: set to true when sta is uploaded to the driver
372 * @sta: station information we share with the driver 378 * @sta: station information we share with the driver
373 * @sta_state: duplicates information about station state (for debug) 379 * @sta_state: duplicates information about station state (for debug)
374 * @beacon_loss_count: number of times beacon loss has triggered
375 * @rcu_head: RCU head used for freeing this station struct 380 * @rcu_head: RCU head used for freeing this station struct
376 * @cur_max_bandwidth: maximum bandwidth to use for TX to the station, 381 * @cur_max_bandwidth: maximum bandwidth to use for TX to the station,
377 * taken from HT/VHT capabilities or VHT operating mode notification 382 * taken from HT/VHT capabilities or VHT operating mode notification
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 6101deb805a8..8b1b2ea03eb5 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -697,7 +697,7 @@ void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
697 rtap_len, shift); 697 rtap_len, shift);
698 698
699 /* XXX: is this sufficient for BPF? */ 699 /* XXX: is this sufficient for BPF? */
700 skb_set_mac_header(skb, 0); 700 skb_reset_mac_header(skb);
701 skb->ip_summed = CHECKSUM_UNNECESSARY; 701 skb->ip_summed = CHECKSUM_UNNECESSARY;
702 skb->pkt_type = PACKET_OTHERHOST; 702 skb->pkt_type = PACKET_OTHERHOST;
703 skb->protocol = htons(ETH_P_802_2); 703 skb->protocol = htons(ETH_P_802_2);
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index c9eeb3f12808..a29ea813b7d5 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -4,7 +4,7 @@
4 * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> 4 * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
5 * Copyright 2014, Intel Corporation 5 * Copyright 2014, Intel Corporation
6 * Copyright 2014 Intel Mobile Communications GmbH 6 * Copyright 2014 Intel Mobile Communications GmbH
7 * Copyright 2015 Intel Deutschland GmbH 7 * Copyright 2015 - 2016 Intel Deutschland GmbH
8 * 8 *
9 * This file is GPLv2 as found in COPYING. 9 * This file is GPLv2 as found in COPYING.
10 */ 10 */
@@ -15,6 +15,7 @@
15#include <linux/rtnetlink.h> 15#include <linux/rtnetlink.h>
16#include "ieee80211_i.h" 16#include "ieee80211_i.h"
17#include "driver-ops.h" 17#include "driver-ops.h"
18#include "rate.h"
18 19
19/* give usermode some time for retries in setting up the TDLS session */ 20/* give usermode some time for retries in setting up the TDLS session */
20#define TDLS_PEER_SETUP_TIMEOUT (15 * HZ) 21#define TDLS_PEER_SETUP_TIMEOUT (15 * HZ)
@@ -302,7 +303,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata,
302 /* IEEE802.11ac-2013 Table E-4 */ 303 /* IEEE802.11ac-2013 Table E-4 */
303 u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 }; 304 u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 };
304 struct cfg80211_chan_def uc = sta->tdls_chandef; 305 struct cfg80211_chan_def uc = sta->tdls_chandef;
305 enum nl80211_chan_width max_width = ieee80211_get_sta_bw(&sta->sta); 306 enum nl80211_chan_width max_width = ieee80211_sta_cap_chan_bw(sta);
306 int i; 307 int i;
307 308
308 /* only support upgrading non-narrow channels up to 80Mhz */ 309 /* only support upgrading non-narrow channels up to 80Mhz */
@@ -313,7 +314,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata,
313 if (max_width > NL80211_CHAN_WIDTH_80) 314 if (max_width > NL80211_CHAN_WIDTH_80)
314 max_width = NL80211_CHAN_WIDTH_80; 315 max_width = NL80211_CHAN_WIDTH_80;
315 316
316 if (uc.width == max_width) 317 if (uc.width >= max_width)
317 return; 318 return;
318 /* 319 /*
319 * Channel usage constrains in the IEEE802.11ac-2013 specification only 320 * Channel usage constrains in the IEEE802.11ac-2013 specification only
@@ -324,6 +325,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata,
324 for (i = 0; i < ARRAY_SIZE(centers_80mhz); i++) 325 for (i = 0; i < ARRAY_SIZE(centers_80mhz); i++)
325 if (abs(uc.chan->center_freq - centers_80mhz[i]) <= 30) { 326 if (abs(uc.chan->center_freq - centers_80mhz[i]) <= 30) {
326 uc.center_freq1 = centers_80mhz[i]; 327 uc.center_freq1 = centers_80mhz[i];
328 uc.center_freq2 = 0;
327 uc.width = NL80211_CHAN_WIDTH_80; 329 uc.width = NL80211_CHAN_WIDTH_80;
328 break; 330 break;
329 } 331 }
@@ -332,7 +334,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata,
332 return; 334 return;
333 335
334 /* proceed to downgrade the chandef until usable or the same */ 336 /* proceed to downgrade the chandef until usable or the same */
335 while (uc.width > max_width && 337 while (uc.width > max_width ||
336 !cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &uc, 338 !cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &uc,
337 sdata->wdev.iftype)) 339 sdata->wdev.iftype))
338 ieee80211_chandef_downgrade(&uc); 340 ieee80211_chandef_downgrade(&uc);
@@ -1242,18 +1244,44 @@ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
1242 return ret; 1244 return ret;
1243} 1245}
1244 1246
1245static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata) 1247static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata,
1248 struct sta_info *sta)
1246{ 1249{
1247 struct ieee80211_local *local = sdata->local; 1250 struct ieee80211_local *local = sdata->local;
1248 struct ieee80211_chanctx_conf *conf; 1251 struct ieee80211_chanctx_conf *conf;
1249 struct ieee80211_chanctx *ctx; 1252 struct ieee80211_chanctx *ctx;
1253 enum nl80211_chan_width width;
1254 struct ieee80211_supported_band *sband;
1250 1255
1251 mutex_lock(&local->chanctx_mtx); 1256 mutex_lock(&local->chanctx_mtx);
1252 conf = rcu_dereference_protected(sdata->vif.chanctx_conf, 1257 conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
1253 lockdep_is_held(&local->chanctx_mtx)); 1258 lockdep_is_held(&local->chanctx_mtx));
1254 if (conf) { 1259 if (conf) {
1260 width = conf->def.width;
1261 sband = local->hw.wiphy->bands[conf->def.chan->band];
1255 ctx = container_of(conf, struct ieee80211_chanctx, conf); 1262 ctx = container_of(conf, struct ieee80211_chanctx, conf);
1256 ieee80211_recalc_chanctx_chantype(local, ctx); 1263 ieee80211_recalc_chanctx_chantype(local, ctx);
1264
1265 /* if width changed and a peer is given, update its BW */
1266 if (width != conf->def.width && sta &&
1267 test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW)) {
1268 enum ieee80211_sta_rx_bandwidth bw;
1269
1270 bw = ieee80211_chan_width_to_rx_bw(conf->def.width);
1271 bw = min(bw, ieee80211_sta_cap_rx_bw(sta));
1272 if (bw != sta->sta.bandwidth) {
1273 sta->sta.bandwidth = bw;
1274 rate_control_rate_update(local, sband, sta,
1275 IEEE80211_RC_BW_CHANGED);
1276 /*
1277 * if a TDLS peer BW was updated, we need to
1278 * recalc the chandef width again, to get the
1279 * correct chanctx min_def
1280 */
1281 ieee80211_recalc_chanctx_chantype(local, ctx);
1282 }
1283 }
1284
1257 } 1285 }
1258 mutex_unlock(&local->chanctx_mtx); 1286 mutex_unlock(&local->chanctx_mtx);
1259} 1287}
@@ -1350,8 +1378,6 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
1350 break; 1378 break;
1351 } 1379 }
1352 1380
1353 iee80211_tdls_recalc_chanctx(sdata);
1354
1355 mutex_lock(&local->sta_mtx); 1381 mutex_lock(&local->sta_mtx);
1356 sta = sta_info_get(sdata, peer); 1382 sta = sta_info_get(sdata, peer);
1357 if (!sta) { 1383 if (!sta) {
@@ -1360,6 +1386,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
1360 break; 1386 break;
1361 } 1387 }
1362 1388
1389 iee80211_tdls_recalc_chanctx(sdata, sta);
1363 iee80211_tdls_recalc_ht_protection(sdata, sta); 1390 iee80211_tdls_recalc_ht_protection(sdata, sta);
1364 1391
1365 set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH); 1392 set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH);
@@ -1390,7 +1417,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
1390 iee80211_tdls_recalc_ht_protection(sdata, NULL); 1417 iee80211_tdls_recalc_ht_protection(sdata, NULL);
1391 mutex_unlock(&local->sta_mtx); 1418 mutex_unlock(&local->sta_mtx);
1392 1419
1393 iee80211_tdls_recalc_chanctx(sdata); 1420 iee80211_tdls_recalc_chanctx(sdata, NULL);
1394 break; 1421 break;
1395 default: 1422 default:
1396 ret = -ENOTSUPP; 1423 ret = -ENOTSUPP;
diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index 0ae207771a58..b3622823bad2 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * Copyright 2002-2004, Instant802 Networks, Inc. 2 * Copyright 2002-2004, Instant802 Networks, Inc.
3 * Copyright 2005, Devicescape Software, Inc. 3 * Copyright 2005, Devicescape Software, Inc.
4 * Copyright (C) 2016 Intel Deutschland GmbH
4 * 5 *
5 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
@@ -142,15 +143,14 @@ static void tkip_mixing_phase2(const u8 *tk, struct tkip_ctx *ctx,
142/* Add TKIP IV and Ext. IV at @pos. @iv0, @iv1, and @iv2 are the first octets 143/* Add TKIP IV and Ext. IV at @pos. @iv0, @iv1, and @iv2 are the first octets
143 * of the IV. Returns pointer to the octet following IVs (i.e., beginning of 144 * of the IV. Returns pointer to the octet following IVs (i.e., beginning of
144 * the packet payload). */ 145 * the packet payload). */
145u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key) 146u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key_conf *keyconf, u64 pn)
146{ 147{
147 lockdep_assert_held(&key->u.tkip.txlock); 148 pos = write_tkip_iv(pos, TKIP_PN_TO_IV16(pn));
148 149 *pos++ = (keyconf->keyidx << 6) | (1 << 5) /* Ext IV */;
149 pos = write_tkip_iv(pos, key->u.tkip.tx.iv16); 150 put_unaligned_le32(TKIP_PN_TO_IV32(pn), pos);
150 *pos++ = (key->conf.keyidx << 6) | (1 << 5) /* Ext IV */;
151 put_unaligned_le32(key->u.tkip.tx.iv32, pos);
152 return pos + 4; 151 return pos + 4;
153} 152}
153EXPORT_SYMBOL_GPL(ieee80211_tkip_add_iv);
154 154
155static void ieee80211_compute_tkip_p1k(struct ieee80211_key *key, u32 iv32) 155static void ieee80211_compute_tkip_p1k(struct ieee80211_key *key, u32 iv32)
156{ 156{
@@ -250,6 +250,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_cipher *tfm,
250 u8 rc4key[16], keyid, *pos = payload; 250 u8 rc4key[16], keyid, *pos = payload;
251 int res; 251 int res;
252 const u8 *tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY]; 252 const u8 *tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY];
253 struct tkip_ctx_rx *rx_ctx = &key->u.tkip.rx[queue];
253 254
254 if (payload_len < 12) 255 if (payload_len < 12)
255 return -1; 256 return -1;
@@ -265,37 +266,36 @@ int ieee80211_tkip_decrypt_data(struct crypto_cipher *tfm,
265 if ((keyid >> 6) != key->conf.keyidx) 266 if ((keyid >> 6) != key->conf.keyidx)
266 return TKIP_DECRYPT_INVALID_KEYIDX; 267 return TKIP_DECRYPT_INVALID_KEYIDX;
267 268
268 if (key->u.tkip.rx[queue].state != TKIP_STATE_NOT_INIT && 269 if (rx_ctx->ctx.state != TKIP_STATE_NOT_INIT &&
269 (iv32 < key->u.tkip.rx[queue].iv32 || 270 (iv32 < rx_ctx->iv32 ||
270 (iv32 == key->u.tkip.rx[queue].iv32 && 271 (iv32 == rx_ctx->iv32 && iv16 <= rx_ctx->iv16)))
271 iv16 <= key->u.tkip.rx[queue].iv16)))
272 return TKIP_DECRYPT_REPLAY; 272 return TKIP_DECRYPT_REPLAY;
273 273
274 if (only_iv) { 274 if (only_iv) {
275 res = TKIP_DECRYPT_OK; 275 res = TKIP_DECRYPT_OK;
276 key->u.tkip.rx[queue].state = TKIP_STATE_PHASE1_HW_UPLOADED; 276 rx_ctx->ctx.state = TKIP_STATE_PHASE1_HW_UPLOADED;
277 goto done; 277 goto done;
278 } 278 }
279 279
280 if (key->u.tkip.rx[queue].state == TKIP_STATE_NOT_INIT || 280 if (rx_ctx->ctx.state == TKIP_STATE_NOT_INIT ||
281 key->u.tkip.rx[queue].iv32 != iv32) { 281 rx_ctx->iv32 != iv32) {
282 /* IV16 wrapped around - perform TKIP phase 1 */ 282 /* IV16 wrapped around - perform TKIP phase 1 */
283 tkip_mixing_phase1(tk, &key->u.tkip.rx[queue], ta, iv32); 283 tkip_mixing_phase1(tk, &rx_ctx->ctx, ta, iv32);
284 } 284 }
285 if (key->local->ops->update_tkip_key && 285 if (key->local->ops->update_tkip_key &&
286 key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && 286 key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
287 key->u.tkip.rx[queue].state != TKIP_STATE_PHASE1_HW_UPLOADED) { 287 rx_ctx->ctx.state != TKIP_STATE_PHASE1_HW_UPLOADED) {
288 struct ieee80211_sub_if_data *sdata = key->sdata; 288 struct ieee80211_sub_if_data *sdata = key->sdata;
289 289
290 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 290 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
291 sdata = container_of(key->sdata->bss, 291 sdata = container_of(key->sdata->bss,
292 struct ieee80211_sub_if_data, u.ap); 292 struct ieee80211_sub_if_data, u.ap);
293 drv_update_tkip_key(key->local, sdata, &key->conf, key->sta, 293 drv_update_tkip_key(key->local, sdata, &key->conf, key->sta,
294 iv32, key->u.tkip.rx[queue].p1k); 294 iv32, rx_ctx->ctx.p1k);
295 key->u.tkip.rx[queue].state = TKIP_STATE_PHASE1_HW_UPLOADED; 295 rx_ctx->ctx.state = TKIP_STATE_PHASE1_HW_UPLOADED;
296 } 296 }
297 297
298 tkip_mixing_phase2(tk, &key->u.tkip.rx[queue], iv16, rc4key); 298 tkip_mixing_phase2(tk, &rx_ctx->ctx, iv16, rc4key);
299 299
300 res = ieee80211_wep_decrypt_data(tfm, rc4key, 16, pos, payload_len - 12); 300 res = ieee80211_wep_decrypt_data(tfm, rc4key, 16, pos, payload_len - 12);
301 done: 301 done:
diff --git a/net/mac80211/tkip.h b/net/mac80211/tkip.h
index e3ecb659b90a..a1bcbfbefe7c 100644
--- a/net/mac80211/tkip.h
+++ b/net/mac80211/tkip.h
@@ -13,8 +13,6 @@
13#include <linux/crypto.h> 13#include <linux/crypto.h>
14#include "key.h" 14#include "key.h"
15 15
16u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key);
17
18int ieee80211_tkip_encrypt_data(struct crypto_cipher *tfm, 16int ieee80211_tkip_encrypt_data(struct crypto_cipher *tfm,
19 struct ieee80211_key *key, 17 struct ieee80211_key *key,
20 struct sk_buff *skb, 18 struct sk_buff *skb,
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index a6b4442776a0..2b0a17ee907a 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -80,7 +80,23 @@
80#define KEY_PR_FMT " cipher:0x%x, flags=%#x, keyidx=%d, hw_key_idx=%d" 80#define KEY_PR_FMT " cipher:0x%x, flags=%#x, keyidx=%d, hw_key_idx=%d"
81#define KEY_PR_ARG __entry->cipher, __entry->flags, __entry->keyidx, __entry->hw_key_idx 81#define KEY_PR_ARG __entry->cipher, __entry->flags, __entry->keyidx, __entry->hw_key_idx
82 82
83 83#define AMPDU_ACTION_ENTRY __field(enum ieee80211_ampdu_mlme_action, \
84 ieee80211_ampdu_mlme_action) \
85 STA_ENTRY \
86 __field(u16, tid) \
87 __field(u16, ssn) \
88 __field(u8, buf_size) \
89 __field(bool, amsdu) \
90 __field(u16, timeout)
91#define AMPDU_ACTION_ASSIGN STA_NAMED_ASSIGN(params->sta); \
92 __entry->tid = params->tid; \
93 __entry->ssn = params->ssn; \
94 __entry->buf_size = params->buf_size; \
95 __entry->amsdu = params->amsdu; \
96 __entry->timeout = params->timeout;
97#define AMPDU_ACTION_PR_FMT STA_PR_FMT " tid %d, ssn %d, buf_size %u, amsdu %d, timeout %d"
98#define AMPDU_ACTION_PR_ARG STA_PR_ARG, __entry->tid, __entry->ssn, \
99 __entry->buf_size, __entry->amsdu, __entry->timeout
84 100
85/* 101/*
86 * Tracing for driver callbacks. 102 * Tracing for driver callbacks.
@@ -970,38 +986,25 @@ DEFINE_EVENT(local_only_evt, drv_tx_last_beacon,
970TRACE_EVENT(drv_ampdu_action, 986TRACE_EVENT(drv_ampdu_action,
971 TP_PROTO(struct ieee80211_local *local, 987 TP_PROTO(struct ieee80211_local *local,
972 struct ieee80211_sub_if_data *sdata, 988 struct ieee80211_sub_if_data *sdata,
973 enum ieee80211_ampdu_mlme_action action, 989 struct ieee80211_ampdu_params *params),
974 struct ieee80211_sta *sta, u16 tid,
975 u16 *ssn, u8 buf_size, bool amsdu),
976 990
977 TP_ARGS(local, sdata, action, sta, tid, ssn, buf_size, amsdu), 991 TP_ARGS(local, sdata, params),
978 992
979 TP_STRUCT__entry( 993 TP_STRUCT__entry(
980 LOCAL_ENTRY 994 LOCAL_ENTRY
981 STA_ENTRY
982 __field(u32, action)
983 __field(u16, tid)
984 __field(u16, ssn)
985 __field(u8, buf_size)
986 __field(bool, amsdu)
987 VIF_ENTRY 995 VIF_ENTRY
996 AMPDU_ACTION_ENTRY
988 ), 997 ),
989 998
990 TP_fast_assign( 999 TP_fast_assign(
991 LOCAL_ASSIGN; 1000 LOCAL_ASSIGN;
992 VIF_ASSIGN; 1001 VIF_ASSIGN;
993 STA_ASSIGN; 1002 AMPDU_ACTION_ASSIGN;
994 __entry->action = action;
995 __entry->tid = tid;
996 __entry->ssn = ssn ? *ssn : 0;
997 __entry->buf_size = buf_size;
998 __entry->amsdu = amsdu;
999 ), 1003 ),
1000 1004
1001 TP_printk( 1005 TP_printk(
1002 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d buf:%d amsdu:%d", 1006 LOCAL_PR_FMT VIF_PR_FMT AMPDU_ACTION_PR_FMT,
1003 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, 1007 LOCAL_PR_ARG, VIF_PR_ARG, AMPDU_ACTION_PR_ARG
1004 __entry->tid, __entry->buf_size, __entry->amsdu
1005 ) 1008 )
1006); 1009);
1007 1010
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 3311ce0f3d6c..21f6602395f7 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -710,6 +710,10 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
710 710
711 info->control.short_preamble = txrc.short_preamble; 711 info->control.short_preamble = txrc.short_preamble;
712 712
713 /* don't ask rate control when rate already injected via radiotap */
714 if (info->control.flags & IEEE80211_TX_CTRL_RATE_INJECT)
715 return TX_CONTINUE;
716
713 if (tx->sta) 717 if (tx->sta)
714 assoc = test_sta_flag(tx->sta, WLAN_STA_ASSOC); 718 assoc = test_sta_flag(tx->sta, WLAN_STA_ASSOC);
715 719
@@ -1112,11 +1116,15 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
1112 reset_agg_timer = true; 1116 reset_agg_timer = true;
1113 } else { 1117 } else {
1114 queued = true; 1118 queued = true;
1119 if (info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER) {
1120 clear_sta_flag(tx->sta, WLAN_STA_SP);
1121 ps_dbg(tx->sta->sdata,
1122 "STA %pM aid %d: SP frame queued, close the SP w/o telling the peer\n",
1123 tx->sta->sta.addr, tx->sta->sta.aid);
1124 }
1115 info->control.vif = &tx->sdata->vif; 1125 info->control.vif = &tx->sdata->vif;
1116 info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; 1126 info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
1117 info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS | 1127 info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS;
1118 IEEE80211_TX_CTL_NO_PS_BUFFER |
1119 IEEE80211_TX_STATUS_EOSP;
1120 __skb_queue_tail(&tid_tx->pending, skb); 1128 __skb_queue_tail(&tid_tx->pending, skb);
1121 if (skb_queue_len(&tid_tx->pending) > STA_MAX_TX_BUFFER) 1129 if (skb_queue_len(&tid_tx->pending) > STA_MAX_TX_BUFFER)
1122 purge_skb = __skb_dequeue(&tid_tx->pending); 1130 purge_skb = __skb_dequeue(&tid_tx->pending);
@@ -1243,7 +1251,8 @@ static void ieee80211_drv_tx(struct ieee80211_local *local,
1243 struct txq_info *txqi; 1251 struct txq_info *txqi;
1244 u8 ac; 1252 u8 ac;
1245 1253
1246 if (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE) 1254 if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) ||
1255 (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE))
1247 goto tx_normal; 1256 goto tx_normal;
1248 1257
1249 if (!ieee80211_is_data(hdr->frame_control)) 1258 if (!ieee80211_is_data(hdr->frame_control))
@@ -1266,7 +1275,11 @@ static void ieee80211_drv_tx(struct ieee80211_local *local,
1266 if (atomic_read(&sdata->txqs_len[ac]) >= local->hw.txq_ac_max_pending) 1275 if (atomic_read(&sdata->txqs_len[ac]) >= local->hw.txq_ac_max_pending)
1267 netif_stop_subqueue(sdata->dev, ac); 1276 netif_stop_subqueue(sdata->dev, ac);
1268 1277
1269 skb_queue_tail(&txqi->queue, skb); 1278 spin_lock_bh(&txqi->queue.lock);
1279 txqi->byte_cnt += skb->len;
1280 __skb_queue_tail(&txqi->queue, skb);
1281 spin_unlock_bh(&txqi->queue.lock);
1282
1270 drv_wake_tx_queue(local, txqi); 1283 drv_wake_tx_queue(local, txqi);
1271 1284
1272 return; 1285 return;
@@ -1294,6 +1307,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
1294 if (!skb) 1307 if (!skb)
1295 goto out; 1308 goto out;
1296 1309
1310 txqi->byte_cnt -= skb->len;
1311
1297 atomic_dec(&sdata->txqs_len[ac]); 1312 atomic_dec(&sdata->txqs_len[ac]);
1298 if (__netif_subqueue_stopped(sdata->dev, ac)) 1313 if (__netif_subqueue_stopped(sdata->dev, ac))
1299 ieee80211_propagate_queue_wake(local, sdata->vif.hw_queue[ac]); 1314 ieee80211_propagate_queue_wake(local, sdata->vif.hw_queue[ac]);
@@ -1665,15 +1680,24 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
1665 ieee80211_tx(sdata, sta, skb, false); 1680 ieee80211_tx(sdata, sta, skb, false);
1666} 1681}
1667 1682
1668static bool ieee80211_parse_tx_radiotap(struct sk_buff *skb) 1683static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local,
1684 struct sk_buff *skb)
1669{ 1685{
1670 struct ieee80211_radiotap_iterator iterator; 1686 struct ieee80211_radiotap_iterator iterator;
1671 struct ieee80211_radiotap_header *rthdr = 1687 struct ieee80211_radiotap_header *rthdr =
1672 (struct ieee80211_radiotap_header *) skb->data; 1688 (struct ieee80211_radiotap_header *) skb->data;
1673 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 1689 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
1690 struct ieee80211_supported_band *sband =
1691 local->hw.wiphy->bands[info->band];
1674 int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len, 1692 int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len,
1675 NULL); 1693 NULL);
1676 u16 txflags; 1694 u16 txflags;
1695 u16 rate = 0;
1696 bool rate_found = false;
1697 u8 rate_retries = 0;
1698 u16 rate_flags = 0;
1699 u8 mcs_known, mcs_flags;
1700 int i;
1677 1701
1678 info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | 1702 info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
1679 IEEE80211_TX_CTL_DONTFRAG; 1703 IEEE80211_TX_CTL_DONTFRAG;
@@ -1724,6 +1748,35 @@ static bool ieee80211_parse_tx_radiotap(struct sk_buff *skb)
1724 info->flags |= IEEE80211_TX_CTL_NO_ACK; 1748 info->flags |= IEEE80211_TX_CTL_NO_ACK;
1725 break; 1749 break;
1726 1750
1751 case IEEE80211_RADIOTAP_RATE:
1752 rate = *iterator.this_arg;
1753 rate_flags = 0;
1754 rate_found = true;
1755 break;
1756
1757 case IEEE80211_RADIOTAP_DATA_RETRIES:
1758 rate_retries = *iterator.this_arg;
1759 break;
1760
1761 case IEEE80211_RADIOTAP_MCS:
1762 mcs_known = iterator.this_arg[0];
1763 mcs_flags = iterator.this_arg[1];
1764 if (!(mcs_known & IEEE80211_RADIOTAP_MCS_HAVE_MCS))
1765 break;
1766
1767 rate_found = true;
1768 rate = iterator.this_arg[2];
1769 rate_flags = IEEE80211_TX_RC_MCS;
1770
1771 if (mcs_known & IEEE80211_RADIOTAP_MCS_HAVE_GI &&
1772 mcs_flags & IEEE80211_RADIOTAP_MCS_SGI)
1773 rate_flags |= IEEE80211_TX_RC_SHORT_GI;
1774
1775 if (mcs_known & IEEE80211_RADIOTAP_MCS_HAVE_BW &&
1776 mcs_flags & IEEE80211_RADIOTAP_MCS_BW_40)
1777 rate_flags |= IEEE80211_TX_RC_40_MHZ_WIDTH;
1778 break;
1779
1727 /* 1780 /*
1728 * Please update the file 1781 * Please update the file
1729 * Documentation/networking/mac80211-injection.txt 1782 * Documentation/networking/mac80211-injection.txt
@@ -1738,6 +1791,32 @@ static bool ieee80211_parse_tx_radiotap(struct sk_buff *skb)
1738 if (ret != -ENOENT) /* ie, if we didn't simply run out of fields */ 1791 if (ret != -ENOENT) /* ie, if we didn't simply run out of fields */
1739 return false; 1792 return false;
1740 1793
1794 if (rate_found) {
1795 info->control.flags |= IEEE80211_TX_CTRL_RATE_INJECT;
1796
1797 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
1798 info->control.rates[i].idx = -1;
1799 info->control.rates[i].flags = 0;
1800 info->control.rates[i].count = 0;
1801 }
1802
1803 if (rate_flags & IEEE80211_TX_RC_MCS) {
1804 info->control.rates[0].idx = rate;
1805 } else {
1806 for (i = 0; i < sband->n_bitrates; i++) {
1807 if (rate * 5 != sband->bitrates[i].bitrate)
1808 continue;
1809
1810 info->control.rates[0].idx = i;
1811 break;
1812 }
1813 }
1814
1815 info->control.rates[0].flags = rate_flags;
1816 info->control.rates[0].count = min_t(u8, rate_retries + 1,
1817 local->hw.max_rate_tries);
1818 }
1819
1741 /* 1820 /*
1742 * remove the radiotap header 1821 * remove the radiotap header
1743 * iterator->_max_length was sanity-checked against 1822 * iterator->_max_length was sanity-checked against
@@ -1818,10 +1897,6 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
1818 info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS | 1897 info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
1819 IEEE80211_TX_CTL_INJECTED; 1898 IEEE80211_TX_CTL_INJECTED;
1820 1899
1821 /* process and remove the injection radiotap header */
1822 if (!ieee80211_parse_tx_radiotap(skb))
1823 goto fail;
1824
1825 rcu_read_lock(); 1900 rcu_read_lock();
1826 1901
1827 /* 1902 /*
@@ -1883,6 +1958,11 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
1883 goto fail_rcu; 1958 goto fail_rcu;
1884 1959
1885 info->band = chandef->chan->band; 1960 info->band = chandef->chan->band;
1961
1962 /* process and remove the injection radiotap header */
1963 if (!ieee80211_parse_tx_radiotap(local, skb))
1964 goto fail_rcu;
1965
1886 ieee80211_xmit(sdata, NULL, skb); 1966 ieee80211_xmit(sdata, NULL, skb);
1887 rcu_read_unlock(); 1967 rcu_read_unlock();
1888 1968
@@ -2099,8 +2179,11 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
2099 mpp_lookup = true; 2179 mpp_lookup = true;
2100 } 2180 }
2101 2181
2102 if (mpp_lookup) 2182 if (mpp_lookup) {
2103 mppath = mpp_path_lookup(sdata, skb->data); 2183 mppath = mpp_path_lookup(sdata, skb->data);
2184 if (mppath)
2185 mppath->exp_time = jiffies;
2186 }
2104 2187
2105 if (mppath && mpath) 2188 if (mppath && mpath)
2106 mesh_path_del(mpath->sdata, mpath->dst); 2189 mesh_path_del(mpath->sdata, mpath->dst);
@@ -2380,7 +2463,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
2380 /* Update skb pointers to various headers since this modified frame 2463 /* Update skb pointers to various headers since this modified frame
2381 * is going to go through Linux networking code that may potentially 2464 * is going to go through Linux networking code that may potentially
2382 * need things like pointer to IP header. */ 2465 * need things like pointer to IP header. */
2383 skb_set_mac_header(skb, 0); 2466 skb_reset_mac_header(skb);
2384 skb_set_network_header(skb, nh_pos); 2467 skb_set_network_header(skb, nh_pos);
2385 skb_set_transport_header(skb, h_pos); 2468 skb_set_transport_header(skb, h_pos);
2386 2469
@@ -3895,9 +3978,9 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
3895{ 3978{
3896 int ac = ieee802_1d_to_ac[tid & 7]; 3979 int ac = ieee802_1d_to_ac[tid & 7];
3897 3980
3898 skb_set_mac_header(skb, 0); 3981 skb_reset_mac_header(skb);
3899 skb_set_network_header(skb, 0); 3982 skb_reset_network_header(skb);
3900 skb_set_transport_header(skb, 0); 3983 skb_reset_transport_header(skb);
3901 3984
3902 skb_set_queue_mapping(skb, ac); 3985 skb_set_queue_mapping(skb, ac);
3903 skb->priority = tid; 3986 skb->priority = tid;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 58f58bd5202f..7390de4946a9 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -4,7 +4,7 @@
4 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 4 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
5 * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> 5 * Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
6 * Copyright 2013-2014 Intel Mobile Communications GmbH 6 * Copyright 2013-2014 Intel Mobile Communications GmbH
7 * Copyright (C) 2015 Intel Deutschland GmbH 7 * Copyright (C) 2015-2016 Intel Deutschland GmbH
8 * 8 *
9 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as 10 * it under the terms of the GNU General Public License version 2 as
@@ -1928,6 +1928,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1928 BSS_CHANGED_IDLE | 1928 BSS_CHANGED_IDLE |
1929 BSS_CHANGED_TXPOWER; 1929 BSS_CHANGED_TXPOWER;
1930 1930
1931 if (sdata->vif.mu_mimo_owner)
1932 changed |= BSS_CHANGED_MU_GROUPS;
1933
1931 switch (sdata->vif.type) { 1934 switch (sdata->vif.type) {
1932 case NL80211_IFTYPE_STATION: 1935 case NL80211_IFTYPE_STATION:
1933 changed |= BSS_CHANGED_ASSOC | 1936 changed |= BSS_CHANGED_ASSOC |
@@ -2371,10 +2374,23 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
2371 2374
2372 switch (chandef->width) { 2375 switch (chandef->width) {
2373 case NL80211_CHAN_WIDTH_160: 2376 case NL80211_CHAN_WIDTH_160:
2374 vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_160MHZ; 2377 /*
2378 * Convert 160 MHz channel width to new style as interop
2379 * workaround.
2380 */
2381 vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ;
2382 vht_oper->center_freq_seg2_idx = vht_oper->center_freq_seg1_idx;
2383 if (chandef->chan->center_freq < chandef->center_freq1)
2384 vht_oper->center_freq_seg1_idx -= 8;
2385 else
2386 vht_oper->center_freq_seg1_idx += 8;
2375 break; 2387 break;
2376 case NL80211_CHAN_WIDTH_80P80: 2388 case NL80211_CHAN_WIDTH_80P80:
2377 vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_80P80MHZ; 2389 /*
2390 * Convert 80+80 MHz channel width to new style as interop
2391 * workaround.
2392 */
2393 vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ;
2378 break; 2394 break;
2379 case NL80211_CHAN_WIDTH_80: 2395 case NL80211_CHAN_WIDTH_80:
2380 vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ; 2396 vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ;
@@ -2390,17 +2406,13 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
2390 return pos + sizeof(struct ieee80211_vht_operation); 2406 return pos + sizeof(struct ieee80211_vht_operation);
2391} 2407}
2392 2408
2393void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan, 2409bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper,
2394 const struct ieee80211_ht_operation *ht_oper, 2410 struct cfg80211_chan_def *chandef)
2395 struct cfg80211_chan_def *chandef)
2396{ 2411{
2397 enum nl80211_channel_type channel_type; 2412 enum nl80211_channel_type channel_type;
2398 2413
2399 if (!ht_oper) { 2414 if (!ht_oper)
2400 cfg80211_chandef_create(chandef, control_chan, 2415 return false;
2401 NL80211_CHAN_NO_HT);
2402 return;
2403 }
2404 2416
2405 switch (ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { 2417 switch (ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
2406 case IEEE80211_HT_PARAM_CHA_SEC_NONE: 2418 case IEEE80211_HT_PARAM_CHA_SEC_NONE:
@@ -2414,42 +2426,66 @@ void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan,
2414 break; 2426 break;
2415 default: 2427 default:
2416 channel_type = NL80211_CHAN_NO_HT; 2428 channel_type = NL80211_CHAN_NO_HT;
2429 return false;
2417 } 2430 }
2418 2431
2419 cfg80211_chandef_create(chandef, control_chan, channel_type); 2432 cfg80211_chandef_create(chandef, chandef->chan, channel_type);
2433 return true;
2420} 2434}
2421 2435
2422void ieee80211_vht_oper_to_chandef(struct ieee80211_channel *control_chan, 2436bool ieee80211_chandef_vht_oper(const struct ieee80211_vht_operation *oper,
2423 const struct ieee80211_vht_operation *oper, 2437 struct cfg80211_chan_def *chandef)
2424 struct cfg80211_chan_def *chandef)
2425{ 2438{
2439 struct cfg80211_chan_def new = *chandef;
2440 int cf1, cf2;
2441
2426 if (!oper) 2442 if (!oper)
2427 return; 2443 return false;
2428 2444
2429 chandef->chan = control_chan; 2445 cf1 = ieee80211_channel_to_frequency(oper->center_freq_seg1_idx,
2446 chandef->chan->band);
2447 cf2 = ieee80211_channel_to_frequency(oper->center_freq_seg2_idx,
2448 chandef->chan->band);
2430 2449
2431 switch (oper->chan_width) { 2450 switch (oper->chan_width) {
2432 case IEEE80211_VHT_CHANWIDTH_USE_HT: 2451 case IEEE80211_VHT_CHANWIDTH_USE_HT:
2433 break; 2452 break;
2434 case IEEE80211_VHT_CHANWIDTH_80MHZ: 2453 case IEEE80211_VHT_CHANWIDTH_80MHZ:
2435 chandef->width = NL80211_CHAN_WIDTH_80; 2454 new.width = NL80211_CHAN_WIDTH_80;
2455 new.center_freq1 = cf1;
2456 /* If needed, adjust based on the newer interop workaround. */
2457 if (oper->center_freq_seg2_idx) {
2458 unsigned int diff;
2459
2460 diff = abs(oper->center_freq_seg2_idx -
2461 oper->center_freq_seg1_idx);
2462 if (diff == 8) {
2463 new.width = NL80211_CHAN_WIDTH_160;
2464 new.center_freq1 = cf2;
2465 } else if (diff > 8) {
2466 new.width = NL80211_CHAN_WIDTH_80P80;
2467 new.center_freq2 = cf2;
2468 }
2469 }
2436 break; 2470 break;
2437 case IEEE80211_VHT_CHANWIDTH_160MHZ: 2471 case IEEE80211_VHT_CHANWIDTH_160MHZ:
2438 chandef->width = NL80211_CHAN_WIDTH_160; 2472 new.width = NL80211_CHAN_WIDTH_160;
2473 new.center_freq1 = cf1;
2439 break; 2474 break;
2440 case IEEE80211_VHT_CHANWIDTH_80P80MHZ: 2475 case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
2441 chandef->width = NL80211_CHAN_WIDTH_80P80; 2476 new.width = NL80211_CHAN_WIDTH_80P80;
2477 new.center_freq1 = cf1;
2478 new.center_freq2 = cf2;
2442 break; 2479 break;
2443 default: 2480 default:
2444 break; 2481 return false;
2445 } 2482 }
2446 2483
2447 chandef->center_freq1 = 2484 if (!cfg80211_chandef_valid(&new))
2448 ieee80211_channel_to_frequency(oper->center_freq_seg1_idx, 2485 return false;
2449 control_chan->band); 2486
2450 chandef->center_freq2 = 2487 *chandef = new;
2451 ieee80211_channel_to_frequency(oper->center_freq_seg2_idx, 2488 return true;
2452 control_chan->band);
2453} 2489}
2454 2490
2455int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef, 2491int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
@@ -2672,6 +2708,18 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
2672 sband = local->hw.wiphy->bands[status->band]; 2708 sband = local->hw.wiphy->bands[status->band];
2673 bitrate = sband->bitrates[status->rate_idx].bitrate; 2709 bitrate = sband->bitrates[status->rate_idx].bitrate;
2674 ri.legacy = DIV_ROUND_UP(bitrate, (1 << shift)); 2710 ri.legacy = DIV_ROUND_UP(bitrate, (1 << shift));
2711
2712 if (status->flag & RX_FLAG_MACTIME_PLCP_START) {
2713 /* TODO: handle HT/VHT preambles */
2714 if (status->band == IEEE80211_BAND_5GHZ) {
2715 ts += 20 << shift;
2716 mpdu_offset += 2;
2717 } else if (status->flag & RX_FLAG_SHORTPRE) {
2718 ts += 96;
2719 } else {
2720 ts += 192;
2721 }
2722 }
2675 } 2723 }
2676 2724
2677 rate = cfg80211_calculate_bitrate(&ri); 2725 rate = cfg80211_calculate_bitrate(&ri);
@@ -3357,3 +3405,17 @@ void ieee80211_init_tx_queue(struct ieee80211_sub_if_data *sdata,
3357 txqi->txq.ac = IEEE80211_AC_BE; 3405 txqi->txq.ac = IEEE80211_AC_BE;
3358 } 3406 }
3359} 3407}
3408
3409void ieee80211_txq_get_depth(struct ieee80211_txq *txq,
3410 unsigned long *frame_cnt,
3411 unsigned long *byte_cnt)
3412{
3413 struct txq_info *txqi = to_txq_info(txq);
3414
3415 if (frame_cnt)
3416 *frame_cnt = txqi->queue.qlen;
3417
3418 if (byte_cnt)
3419 *byte_cnt = txqi->byte_cnt;
3420}
3421EXPORT_SYMBOL(ieee80211_txq_get_depth);
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index c38b2f07a919..e590e2ef9eaf 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -1,6 +1,9 @@
1/* 1/*
2 * VHT handling 2 * VHT handling
3 * 3 *
4 * Portions of this file
5 * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
6 *
4 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
@@ -278,6 +281,23 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
278 } 281 }
279 282
280 sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta); 283 sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta);
284
285 /* If HT IE reported 3839 bytes only, stay with that size. */
286 if (sta->sta.max_amsdu_len == IEEE80211_MAX_MPDU_LEN_HT_3839)
287 return;
288
289 switch (vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK) {
290 case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454:
291 sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454;
292 break;
293 case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991:
294 sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_7991;
295 break;
296 case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895:
297 default:
298 sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_3895;
299 break;
300 }
281} 301}
282 302
283enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta) 303enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta)
@@ -299,7 +319,30 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta)
299 return IEEE80211_STA_RX_BW_80; 319 return IEEE80211_STA_RX_BW_80;
300} 320}
301 321
302static enum ieee80211_sta_rx_bandwidth 322enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta)
323{
324 struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
325 u32 cap_width;
326
327 if (!vht_cap->vht_supported) {
328 if (!sta->sta.ht_cap.ht_supported)
329 return NL80211_CHAN_WIDTH_20_NOHT;
330
331 return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
332 NL80211_CHAN_WIDTH_40 : NL80211_CHAN_WIDTH_20;
333 }
334
335 cap_width = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
336
337 if (cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ)
338 return NL80211_CHAN_WIDTH_160;
339 else if (cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
340 return NL80211_CHAN_WIDTH_80P80;
341
342 return NL80211_CHAN_WIDTH_80;
343}
344
345enum ieee80211_sta_rx_bandwidth
303ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width) 346ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width)
304{ 347{
305 switch (width) { 348 switch (width) {
@@ -327,10 +370,7 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
327 370
328 bw = ieee80211_sta_cap_rx_bw(sta); 371 bw = ieee80211_sta_cap_rx_bw(sta);
329 bw = min(bw, sta->cur_max_bandwidth); 372 bw = min(bw, sta->cur_max_bandwidth);
330 373 bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width));
331 /* do not cap the BW of TDLS WIDER_BW peers by the bss */
332 if (!test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW))
333 bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width));
334 374
335 return bw; 375 return bw;
336} 376}
@@ -425,6 +465,43 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
425 return changed; 465 return changed;
426} 466}
427 467
468void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata,
469 struct ieee80211_mgmt *mgmt)
470{
471 struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
472
473 if (!sdata->vif.mu_mimo_owner)
474 return;
475
476 if (!memcmp(mgmt->u.action.u.vht_group_notif.position,
477 bss_conf->mu_group.position, WLAN_USER_POSITION_LEN) &&
478 !memcmp(mgmt->u.action.u.vht_group_notif.membership,
479 bss_conf->mu_group.membership, WLAN_MEMBERSHIP_LEN))
480 return;
481
482 memcpy(bss_conf->mu_group.membership,
483 mgmt->u.action.u.vht_group_notif.membership,
484 WLAN_MEMBERSHIP_LEN);
485 memcpy(bss_conf->mu_group.position,
486 mgmt->u.action.u.vht_group_notif.position,
487 WLAN_USER_POSITION_LEN);
488
489 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_MU_GROUPS);
490}
491
492void ieee80211_update_mu_groups(struct ieee80211_vif *vif,
493 const u8 *membership, const u8 *position)
494{
495 struct ieee80211_bss_conf *bss_conf = &vif->bss_conf;
496
497 if (WARN_ON_ONCE(!vif->mu_mimo_owner))
498 return;
499
500 memcpy(bss_conf->mu_group.membership, membership, WLAN_MEMBERSHIP_LEN);
501 memcpy(bss_conf->mu_group.position, position, WLAN_USER_POSITION_LEN);
502}
503EXPORT_SYMBOL_GPL(ieee80211_update_mu_groups);
504
428void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, 505void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
429 struct sta_info *sta, u8 opmode, 506 struct sta_info *sta, u8 opmode,
430 enum ieee80211_band band) 507 enum ieee80211_band band)
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index d824c38971ed..18848258adde 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * Copyright 2002-2004, Instant802 Networks, Inc. 2 * Copyright 2002-2004, Instant802 Networks, Inc.
3 * Copyright 2008, Jouni Malinen <j@w1.fi> 3 * Copyright 2008, Jouni Malinen <j@w1.fi>
4 * Copyright (C) 2016 Intel Deutschland GmbH
4 * 5 *
5 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
@@ -183,7 +184,6 @@ mic_fail_no_key:
183 return RX_DROP_UNUSABLE; 184 return RX_DROP_UNUSABLE;
184} 185}
185 186
186
187static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) 187static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
188{ 188{
189 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; 189 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
@@ -191,6 +191,7 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
191 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 191 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
192 unsigned int hdrlen; 192 unsigned int hdrlen;
193 int len, tail; 193 int len, tail;
194 u64 pn;
194 u8 *pos; 195 u8 *pos;
195 196
196 if (info->control.hw_key && 197 if (info->control.hw_key &&
@@ -222,12 +223,8 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
222 return 0; 223 return 0;
223 224
224 /* Increase IV for the frame */ 225 /* Increase IV for the frame */
225 spin_lock(&key->u.tkip.txlock); 226 pn = atomic64_inc_return(&key->conf.tx_pn);
226 key->u.tkip.tx.iv16++; 227 pos = ieee80211_tkip_add_iv(pos, &key->conf, pn);
227 if (key->u.tkip.tx.iv16 == 0)
228 key->u.tkip.tx.iv32++;
229 pos = ieee80211_tkip_add_iv(pos, key);
230 spin_unlock(&key->u.tkip.txlock);
231 228
232 /* hwaccel - with software IV */ 229 /* hwaccel - with software IV */
233 if (info->control.hw_key) 230 if (info->control.hw_key)
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index a13d02b7cee4..6a3e1c2181d3 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -17,9 +17,9 @@
17#include <linux/err.h> 17#include <linux/err.h>
18#include <linux/bug.h> 18#include <linux/bug.h>
19#include <linux/completion.h> 19#include <linux/completion.h>
20#include <linux/crypto.h>
21#include <linux/ieee802154.h> 20#include <linux/ieee802154.h>
22#include <crypto/aead.h> 21#include <crypto/aead.h>
22#include <crypto/skcipher.h>
23 23
24#include "ieee802154_i.h" 24#include "ieee802154_i.h"
25#include "llsec.h" 25#include "llsec.h"
@@ -144,18 +144,18 @@ llsec_key_alloc(const struct ieee802154_llsec_key *template)
144 goto err_tfm; 144 goto err_tfm;
145 } 145 }
146 146
147 key->tfm0 = crypto_alloc_blkcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC); 147 key->tfm0 = crypto_alloc_skcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC);
148 if (IS_ERR(key->tfm0)) 148 if (IS_ERR(key->tfm0))
149 goto err_tfm; 149 goto err_tfm;
150 150
151 if (crypto_blkcipher_setkey(key->tfm0, template->key, 151 if (crypto_skcipher_setkey(key->tfm0, template->key,
152 IEEE802154_LLSEC_KEY_SIZE)) 152 IEEE802154_LLSEC_KEY_SIZE))
153 goto err_tfm0; 153 goto err_tfm0;
154 154
155 return key; 155 return key;
156 156
157err_tfm0: 157err_tfm0:
158 crypto_free_blkcipher(key->tfm0); 158 crypto_free_skcipher(key->tfm0);
159err_tfm: 159err_tfm:
160 for (i = 0; i < ARRAY_SIZE(key->tfm); i++) 160 for (i = 0; i < ARRAY_SIZE(key->tfm); i++)
161 if (key->tfm[i]) 161 if (key->tfm[i])
@@ -175,7 +175,7 @@ static void llsec_key_release(struct kref *ref)
175 for (i = 0; i < ARRAY_SIZE(key->tfm); i++) 175 for (i = 0; i < ARRAY_SIZE(key->tfm); i++)
176 crypto_free_aead(key->tfm[i]); 176 crypto_free_aead(key->tfm[i]);
177 177
178 crypto_free_blkcipher(key->tfm0); 178 crypto_free_skcipher(key->tfm0);
179 kzfree(key); 179 kzfree(key);
180} 180}
181 181
@@ -620,15 +620,17 @@ llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
620{ 620{
621 u8 iv[16]; 621 u8 iv[16];
622 struct scatterlist src; 622 struct scatterlist src;
623 struct blkcipher_desc req = { 623 SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
624 .tfm = key->tfm0, 624 int err;
625 .info = iv,
626 .flags = 0,
627 };
628 625
629 llsec_geniv(iv, sec->params.hwaddr, &hdr->sec); 626 llsec_geniv(iv, sec->params.hwaddr, &hdr->sec);
630 sg_init_one(&src, skb->data, skb->len); 627 sg_init_one(&src, skb->data, skb->len);
631 return crypto_blkcipher_encrypt_iv(&req, &src, &src, skb->len); 628 skcipher_request_set_tfm(req, key->tfm0);
629 skcipher_request_set_callback(req, 0, NULL, NULL);
630 skcipher_request_set_crypt(req, &src, &src, skb->len, iv);
631 err = crypto_skcipher_encrypt(req);
632 skcipher_request_zero(req);
633 return err;
632} 634}
633 635
634static struct crypto_aead* 636static struct crypto_aead*
@@ -830,11 +832,8 @@ llsec_do_decrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
830 unsigned char *data; 832 unsigned char *data;
831 int datalen; 833 int datalen;
832 struct scatterlist src; 834 struct scatterlist src;
833 struct blkcipher_desc req = { 835 SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
834 .tfm = key->tfm0, 836 int err;
835 .info = iv,
836 .flags = 0,
837 };
838 837
839 llsec_geniv(iv, dev_addr, &hdr->sec); 838 llsec_geniv(iv, dev_addr, &hdr->sec);
840 data = skb_mac_header(skb) + skb->mac_len; 839 data = skb_mac_header(skb) + skb->mac_len;
@@ -842,7 +841,13 @@ llsec_do_decrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
842 841
843 sg_init_one(&src, data, datalen); 842 sg_init_one(&src, data, datalen);
844 843
845 return crypto_blkcipher_decrypt_iv(&req, &src, &src, datalen); 844 skcipher_request_set_tfm(req, key->tfm0);
845 skcipher_request_set_callback(req, 0, NULL, NULL);
846 skcipher_request_set_crypt(req, &src, &src, datalen, iv);
847
848 err = crypto_skcipher_decrypt(req);
849 skcipher_request_zero(req);
850 return err;
846} 851}
847 852
848static int 853static int
diff --git a/net/mac802154/llsec.h b/net/mac802154/llsec.h
index 950578e1d7be..6f3b658e3279 100644
--- a/net/mac802154/llsec.h
+++ b/net/mac802154/llsec.h
@@ -19,7 +19,6 @@
19 19
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/hashtable.h> 21#include <linux/hashtable.h>
22#include <linux/crypto.h>
23#include <linux/kref.h> 22#include <linux/kref.h>
24#include <linux/spinlock.h> 23#include <linux/spinlock.h>
25#include <net/af_ieee802154.h> 24#include <net/af_ieee802154.h>
@@ -30,7 +29,7 @@ struct mac802154_llsec_key {
30 29
31 /* one tfm for each authsize (4/8/16) */ 30 /* one tfm for each authsize (4/8/16) */
32 struct crypto_aead *tfm[3]; 31 struct crypto_aead *tfm[3];
33 struct crypto_blkcipher *tfm0; 32 struct crypto_skcipher *tfm0;
34 33
35 struct kref ref; 34 struct kref ref;
36}; 35};
diff --git a/net/mac802154/main.c b/net/mac802154/main.c
index e8cab5bb80c6..87da85ae5a6b 100644
--- a/net/mac802154/main.c
+++ b/net/mac802154/main.c
@@ -218,7 +218,6 @@ void ieee802154_unregister_hw(struct ieee802154_hw *hw)
218 218
219 tasklet_kill(&local->tasklet); 219 tasklet_kill(&local->tasklet);
220 flush_workqueue(local->workqueue); 220 flush_workqueue(local->workqueue);
221 destroy_workqueue(local->workqueue);
222 221
223 rtnl_lock(); 222 rtnl_lock();
224 223
@@ -226,6 +225,7 @@ void ieee802154_unregister_hw(struct ieee802154_hw *hw)
226 225
227 rtnl_unlock(); 226 rtnl_unlock();
228 227
228 destroy_workqueue(local->workqueue);
229 wpan_phy_unregister(local->phy); 229 wpan_phy_unregister(local->phy);
230} 230}
231EXPORT_SYMBOL(ieee802154_unregister_hw); 231EXPORT_SYMBOL(ieee802154_unregister_hw);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index b18c5ed42d95..0b80a7140cc4 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -543,6 +543,9 @@ static struct net_device *find_outdev(struct net *net,
543 if (!dev) 543 if (!dev)
544 return ERR_PTR(-ENODEV); 544 return ERR_PTR(-ENODEV);
545 545
546 if (IS_ERR(dev))
547 return dev;
548
546 /* The caller is holding rtnl anyways, so release the dev reference */ 549 /* The caller is holding rtnl anyways, so release the dev reference */
547 dev_put(dev); 550 dev_put(dev);
548 551
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index fb31aa87de81..644a8da6d4bd 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -227,5 +227,6 @@ static void __exit mpls_iptunnel_exit(void)
227} 227}
228module_exit(mpls_iptunnel_exit); 228module_exit(mpls_iptunnel_exit);
229 229
230MODULE_ALIAS_RTNL_LWT(MPLS);
230MODULE_DESCRIPTION("MultiProtocol Label Switching IP Tunnels"); 231MODULE_DESCRIPTION("MultiProtocol Label Switching IP Tunnels");
231MODULE_LICENSE("GPL v2"); 232MODULE_LICENSE("GPL v2");
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index b0bc475f641e..2e8e7e5fb4a6 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -95,7 +95,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
95 if (!nested) 95 if (!nested)
96 goto nla_put_failure; 96 goto nla_put_failure;
97 if (mtype_do_head(skb, map) || 97 if (mtype_do_head(skb, map) ||
98 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || 98 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
99 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) 99 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
100 goto nla_put_failure; 100 goto nla_put_failure;
101 if (unlikely(ip_set_put_flags(skb, set))) 101 if (unlikely(ip_set_put_flags(skb, set)))
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 29dde208381d..9a065f672d3a 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -267,6 +267,8 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
267 267
268 e.id = ip_to_id(map, ip); 268 e.id = ip_to_id(map, ip);
269 if (tb[IPSET_ATTR_ETHER]) { 269 if (tb[IPSET_ATTR_ETHER]) {
270 if (nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN)
271 return -IPSET_ERR_PROTOCOL;
270 memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN); 272 memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
271 e.add_mac = 1; 273 e.add_mac = 1;
272 } 274 }
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 95db43fc0303..a748b0c2c981 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -497,6 +497,26 @@ __ip_set_put(struct ip_set *set)
497 write_unlock_bh(&ip_set_ref_lock); 497 write_unlock_bh(&ip_set_ref_lock);
498} 498}
499 499
500/* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need
501 * a separate reference counter
502 */
503static inline void
504__ip_set_get_netlink(struct ip_set *set)
505{
506 write_lock_bh(&ip_set_ref_lock);
507 set->ref_netlink++;
508 write_unlock_bh(&ip_set_ref_lock);
509}
510
511static inline void
512__ip_set_put_netlink(struct ip_set *set)
513{
514 write_lock_bh(&ip_set_ref_lock);
515 BUG_ON(set->ref_netlink == 0);
516 set->ref_netlink--;
517 write_unlock_bh(&ip_set_ref_lock);
518}
519
500/* Add, del and test set entries from kernel. 520/* Add, del and test set entries from kernel.
501 * 521 *
502 * The set behind the index must exist and must be referenced 522 * The set behind the index must exist and must be referenced
@@ -985,6 +1005,9 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl,
985 if (unlikely(protocol_failed(attr))) 1005 if (unlikely(protocol_failed(attr)))
986 return -IPSET_ERR_PROTOCOL; 1006 return -IPSET_ERR_PROTOCOL;
987 1007
1008 /* Must wait for flush to be really finished in list:set */
1009 rcu_barrier();
1010
988 /* Commands are serialized and references are 1011 /* Commands are serialized and references are
989 * protected by the ip_set_ref_lock. 1012 * protected by the ip_set_ref_lock.
990 * External systems (i.e. xt_set) must call 1013 * External systems (i.e. xt_set) must call
@@ -999,7 +1022,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl,
999 if (!attr[IPSET_ATTR_SETNAME]) { 1022 if (!attr[IPSET_ATTR_SETNAME]) {
1000 for (i = 0; i < inst->ip_set_max; i++) { 1023 for (i = 0; i < inst->ip_set_max; i++) {
1001 s = ip_set(inst, i); 1024 s = ip_set(inst, i);
1002 if (s && s->ref) { 1025 if (s && (s->ref || s->ref_netlink)) {
1003 ret = -IPSET_ERR_BUSY; 1026 ret = -IPSET_ERR_BUSY;
1004 goto out; 1027 goto out;
1005 } 1028 }
@@ -1021,7 +1044,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl,
1021 if (!s) { 1044 if (!s) {
1022 ret = -ENOENT; 1045 ret = -ENOENT;
1023 goto out; 1046 goto out;
1024 } else if (s->ref) { 1047 } else if (s->ref || s->ref_netlink) {
1025 ret = -IPSET_ERR_BUSY; 1048 ret = -IPSET_ERR_BUSY;
1026 goto out; 1049 goto out;
1027 } 1050 }
@@ -1168,6 +1191,9 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
1168 from->family == to->family)) 1191 from->family == to->family))
1169 return -IPSET_ERR_TYPE_MISMATCH; 1192 return -IPSET_ERR_TYPE_MISMATCH;
1170 1193
1194 if (from->ref_netlink || to->ref_netlink)
1195 return -EBUSY;
1196
1171 strncpy(from_name, from->name, IPSET_MAXNAMELEN); 1197 strncpy(from_name, from->name, IPSET_MAXNAMELEN);
1172 strncpy(from->name, to->name, IPSET_MAXNAMELEN); 1198 strncpy(from->name, to->name, IPSET_MAXNAMELEN);
1173 strncpy(to->name, from_name, IPSET_MAXNAMELEN); 1199 strncpy(to->name, from_name, IPSET_MAXNAMELEN);
@@ -1203,7 +1229,7 @@ ip_set_dump_done(struct netlink_callback *cb)
1203 if (set->variant->uref) 1229 if (set->variant->uref)
1204 set->variant->uref(set, cb, false); 1230 set->variant->uref(set, cb, false);
1205 pr_debug("release set %s\n", set->name); 1231 pr_debug("release set %s\n", set->name);
1206 __ip_set_put_byindex(inst, index); 1232 __ip_set_put_netlink(set);
1207 } 1233 }
1208 return 0; 1234 return 0;
1209} 1235}
@@ -1325,7 +1351,7 @@ dump_last:
1325 if (!cb->args[IPSET_CB_ARG0]) { 1351 if (!cb->args[IPSET_CB_ARG0]) {
1326 /* Start listing: make sure set won't be destroyed */ 1352 /* Start listing: make sure set won't be destroyed */
1327 pr_debug("reference set\n"); 1353 pr_debug("reference set\n");
1328 set->ref++; 1354 set->ref_netlink++;
1329 } 1355 }
1330 write_unlock_bh(&ip_set_ref_lock); 1356 write_unlock_bh(&ip_set_ref_lock);
1331 nlh = start_msg(skb, NETLINK_CB(cb->skb).portid, 1357 nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
@@ -1393,7 +1419,7 @@ release_refcount:
1393 if (set->variant->uref) 1419 if (set->variant->uref)
1394 set->variant->uref(set, cb, false); 1420 set->variant->uref(set, cb, false);
1395 pr_debug("release set %s\n", set->name); 1421 pr_debug("release set %s\n", set->name);
1396 __ip_set_put_byindex(inst, index); 1422 __ip_set_put_netlink(set);
1397 cb->args[IPSET_CB_ARG0] = 0; 1423 cb->args[IPSET_CB_ARG0] = 0;
1398 } 1424 }
1399out: 1425out:
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index e5336ab36d67..d32fd6b036bf 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -1082,7 +1082,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
1082 if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask)) 1082 if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask))
1083 goto nla_put_failure; 1083 goto nla_put_failure;
1084#endif 1084#endif
1085 if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || 1085 if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
1086 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) 1086 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
1087 goto nla_put_failure; 1087 goto nla_put_failure;
1088 if (unlikely(ip_set_put_flags(skb, set))) 1088 if (unlikely(ip_set_put_flags(skb, set)))
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
index f1e7d2c0f685..8f004edad396 100644
--- a/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -110,7 +110,8 @@ hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[],
110 if (tb[IPSET_ATTR_LINENO]) 110 if (tb[IPSET_ATTR_LINENO])
111 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 111 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
112 112
113 if (unlikely(!tb[IPSET_ATTR_ETHER])) 113 if (unlikely(!tb[IPSET_ATTR_ETHER] ||
114 nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN))
114 return -IPSET_ERR_PROTOCOL; 115 return -IPSET_ERR_PROTOCOL;
115 116
116 ret = ip_set_get_extensions(set, tb, &ext); 117 ret = ip_set_get_extensions(set, tb, &ext);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index bbede95c9f68..a2a89e4e0a14 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -30,6 +30,7 @@ MODULE_ALIAS("ip_set_list:set");
30struct set_elem { 30struct set_elem {
31 struct rcu_head rcu; 31 struct rcu_head rcu;
32 struct list_head list; 32 struct list_head list;
33 struct ip_set *set; /* Sigh, in order to cleanup reference */
33 ip_set_id_t id; 34 ip_set_id_t id;
34} __aligned(__alignof__(u64)); 35} __aligned(__alignof__(u64));
35 36
@@ -151,30 +152,29 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb,
151/* Userspace interfaces: we are protected by the nfnl mutex */ 152/* Userspace interfaces: we are protected by the nfnl mutex */
152 153
153static void 154static void
154__list_set_del(struct ip_set *set, struct set_elem *e) 155__list_set_del_rcu(struct rcu_head * rcu)
155{ 156{
157 struct set_elem *e = container_of(rcu, struct set_elem, rcu);
158 struct ip_set *set = e->set;
156 struct list_set *map = set->data; 159 struct list_set *map = set->data;
157 160
158 ip_set_put_byindex(map->net, e->id); 161 ip_set_put_byindex(map->net, e->id);
159 /* We may call it, because we don't have a to be destroyed
160 * extension which is used by the kernel.
161 */
162 ip_set_ext_destroy(set, e); 162 ip_set_ext_destroy(set, e);
163 kfree_rcu(e, rcu); 163 kfree(e);
164} 164}
165 165
166static inline void 166static inline void
167list_set_del(struct ip_set *set, struct set_elem *e) 167list_set_del(struct ip_set *set, struct set_elem *e)
168{ 168{
169 list_del_rcu(&e->list); 169 list_del_rcu(&e->list);
170 __list_set_del(set, e); 170 call_rcu(&e->rcu, __list_set_del_rcu);
171} 171}
172 172
173static inline void 173static inline void
174list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old) 174list_set_replace(struct set_elem *e, struct set_elem *old)
175{ 175{
176 list_replace_rcu(&old->list, &e->list); 176 list_replace_rcu(&old->list, &e->list);
177 __list_set_del(set, old); 177 call_rcu(&old->rcu, __list_set_del_rcu);
178} 178}
179 179
180static void 180static void
@@ -244,9 +244,6 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
244 struct set_elem *e, *n, *prev, *next; 244 struct set_elem *e, *n, *prev, *next;
245 bool flag_exist = flags & IPSET_FLAG_EXIST; 245 bool flag_exist = flags & IPSET_FLAG_EXIST;
246 246
247 if (SET_WITH_TIMEOUT(set))
248 set_cleanup_entries(set);
249
250 /* Find where to add the new entry */ 247 /* Find where to add the new entry */
251 n = prev = next = NULL; 248 n = prev = next = NULL;
252 list_for_each_entry(e, &map->members, list) { 249 list_for_each_entry(e, &map->members, list) {
@@ -301,10 +298,11 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
301 if (!e) 298 if (!e)
302 return -ENOMEM; 299 return -ENOMEM;
303 e->id = d->id; 300 e->id = d->id;
301 e->set = set;
304 INIT_LIST_HEAD(&e->list); 302 INIT_LIST_HEAD(&e->list);
305 list_set_init_extensions(set, ext, e); 303 list_set_init_extensions(set, ext, e);
306 if (n) 304 if (n)
307 list_set_replace(set, e, n); 305 list_set_replace(e, n);
308 else if (next) 306 else if (next)
309 list_add_tail_rcu(&e->list, &next->list); 307 list_add_tail_rcu(&e->list, &next->list);
310 else if (prev) 308 else if (prev)
@@ -431,6 +429,7 @@ list_set_destroy(struct ip_set *set)
431 429
432 if (SET_WITH_TIMEOUT(set)) 430 if (SET_WITH_TIMEOUT(set))
433 del_timer_sync(&map->gc); 431 del_timer_sync(&map->gc);
432
434 list_for_each_entry_safe(e, n, &map->members, list) { 433 list_for_each_entry_safe(e, n, &map->members, list) {
435 list_del(&e->list); 434 list_del(&e->list);
436 ip_set_put_byindex(map->net, e->id); 435 ip_set_put_byindex(map->net, e->id);
@@ -450,14 +449,16 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
450 struct set_elem *e; 449 struct set_elem *e;
451 u32 n = 0; 450 u32 n = 0;
452 451
453 list_for_each_entry(e, &map->members, list) 452 rcu_read_lock();
453 list_for_each_entry_rcu(e, &map->members, list)
454 n++; 454 n++;
455 rcu_read_unlock();
455 456
456 nested = ipset_nest_start(skb, IPSET_ATTR_DATA); 457 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
457 if (!nested) 458 if (!nested)
458 goto nla_put_failure; 459 goto nla_put_failure;
459 if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || 460 if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
460 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || 461 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
461 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, 462 nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
462 htonl(sizeof(*map) + n * set->dsize))) 463 htonl(sizeof(*map) + n * set->dsize)))
463 goto nla_put_failure; 464 goto nla_put_failure;
@@ -483,33 +484,25 @@ list_set_list(const struct ip_set *set,
483 atd = ipset_nest_start(skb, IPSET_ATTR_ADT); 484 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
484 if (!atd) 485 if (!atd)
485 return -EMSGSIZE; 486 return -EMSGSIZE;
486 list_for_each_entry(e, &map->members, list) {
487 if (i == first)
488 break;
489 i++;
490 }
491 487
492 rcu_read_lock(); 488 rcu_read_lock();
493 list_for_each_entry_from(e, &map->members, list) { 489 list_for_each_entry_rcu(e, &map->members, list) {
494 i++; 490 if (i < first ||
495 if (SET_WITH_TIMEOUT(set) && 491 (SET_WITH_TIMEOUT(set) &&
496 ip_set_timeout_expired(ext_timeout(e, set))) 492 ip_set_timeout_expired(ext_timeout(e, set)))) {
493 i++;
497 continue; 494 continue;
495 }
498 nested = ipset_nest_start(skb, IPSET_ATTR_DATA); 496 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
499 if (!nested) { 497 if (!nested)
500 if (i == first) {
501 nla_nest_cancel(skb, atd);
502 ret = -EMSGSIZE;
503 goto out;
504 }
505 goto nla_put_failure; 498 goto nla_put_failure;
506 }
507 if (nla_put_string(skb, IPSET_ATTR_NAME, 499 if (nla_put_string(skb, IPSET_ATTR_NAME,
508 ip_set_name_byindex(map->net, e->id))) 500 ip_set_name_byindex(map->net, e->id)))
509 goto nla_put_failure; 501 goto nla_put_failure;
510 if (ip_set_put_extensions(skb, set, e, true)) 502 if (ip_set_put_extensions(skb, set, e, true))
511 goto nla_put_failure; 503 goto nla_put_failure;
512 ipset_nest_end(skb, nested); 504 ipset_nest_end(skb, nested);
505 i++;
513 } 506 }
514 507
515 ipset_nest_end(skb, atd); 508 ipset_nest_end(skb, atd);
@@ -520,10 +513,12 @@ list_set_list(const struct ip_set *set,
520nla_put_failure: 513nla_put_failure:
521 nla_nest_cancel(skb, nested); 514 nla_nest_cancel(skb, nested);
522 if (unlikely(i == first)) { 515 if (unlikely(i == first)) {
516 nla_nest_cancel(skb, atd);
523 cb->args[IPSET_CB_ARG0] = 0; 517 cb->args[IPSET_CB_ARG0] = 0;
524 ret = -EMSGSIZE; 518 ret = -EMSGSIZE;
519 } else {
520 cb->args[IPSET_CB_ARG0] = i;
525 } 521 }
526 cb->args[IPSET_CB_ARG0] = i - 1;
527 ipset_nest_end(skb, atd); 522 ipset_nest_end(skb, atd);
528out: 523out:
529 rcu_read_unlock(); 524 rcu_read_unlock();
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 0328f7250693..299edc6add5a 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -605,17 +605,13 @@ static const struct file_operations ip_vs_app_fops = {
605 605
606int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) 606int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
607{ 607{
608 struct net *net = ipvs->net;
609
610 INIT_LIST_HEAD(&ipvs->app_list); 608 INIT_LIST_HEAD(&ipvs->app_list);
611 proc_create("ip_vs_app", 0, net->proc_net, &ip_vs_app_fops); 609 proc_create("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_fops);
612 return 0; 610 return 0;
613} 611}
614 612
615void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs) 613void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs)
616{ 614{
617 struct net *net = ipvs->net;
618
619 unregister_ip_vs_app(ipvs, NULL /* all */); 615 unregister_ip_vs_app(ipvs, NULL /* all */);
620 remove_proc_entry("ip_vs_app", net->proc_net); 616 remove_proc_entry("ip_vs_app", ipvs->net->proc_net);
621} 617}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index f57b4dcdb233..b9a4082afa3a 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1089,6 +1089,7 @@ static inline bool is_new_conn_expected(const struct ip_vs_conn *cp,
1089 switch (cp->protocol) { 1089 switch (cp->protocol) {
1090 case IPPROTO_TCP: 1090 case IPPROTO_TCP:
1091 return (cp->state == IP_VS_TCP_S_TIME_WAIT) || 1091 return (cp->state == IP_VS_TCP_S_TIME_WAIT) ||
1092 (cp->state == IP_VS_TCP_S_CLOSE) ||
1092 ((conn_reuse_mode & 2) && 1093 ((conn_reuse_mode & 2) &&
1093 (cp->state == IP_VS_TCP_S_FIN_WAIT) && 1094 (cp->state == IP_VS_TCP_S_FIN_WAIT) &&
1094 (cp->flags & IP_VS_CONN_F_NOOUTPUT)); 1095 (cp->flags & IP_VS_CONN_F_NOOUTPUT));
@@ -1757,15 +1758,34 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
1757 cp = pp->conn_in_get(ipvs, af, skb, &iph); 1758 cp = pp->conn_in_get(ipvs, af, skb, &iph);
1758 1759
1759 conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); 1760 conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
1760 if (conn_reuse_mode && !iph.fragoffs && 1761 if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) {
1761 is_new_conn(skb, &iph) && cp && 1762 bool uses_ct = false, resched = false;
1762 ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest && 1763
1763 unlikely(!atomic_read(&cp->dest->weight))) || 1764 if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
1764 unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) { 1765 unlikely(!atomic_read(&cp->dest->weight))) {
1765 if (!atomic_read(&cp->n_control)) 1766 resched = true;
1766 ip_vs_conn_expire_now(cp); 1767 uses_ct = ip_vs_conn_uses_conntrack(cp, skb);
1767 __ip_vs_conn_put(cp); 1768 } else if (is_new_conn_expected(cp, conn_reuse_mode)) {
1768 cp = NULL; 1769 uses_ct = ip_vs_conn_uses_conntrack(cp, skb);
1770 if (!atomic_read(&cp->n_control)) {
1771 resched = true;
1772 } else {
1773 /* Do not reschedule controlling connection
1774 * that uses conntrack while it is still
1775 * referenced by controlled connection(s).
1776 */
1777 resched = !uses_ct;
1778 }
1779 }
1780
1781 if (resched) {
1782 if (!atomic_read(&cp->n_control))
1783 ip_vs_conn_expire_now(cp);
1784 __ip_vs_conn_put(cp);
1785 if (uses_ct)
1786 return NF_DROP;
1787 cp = NULL;
1788 }
1769 } 1789 }
1770 1790
1771 if (unlikely(!cp)) { 1791 if (unlikely(!cp)) {
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index e7c1b052c2a3..404b2a4f4b5b 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1376,8 +1376,6 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1376 struct ip_vs_pe *old_pe; 1376 struct ip_vs_pe *old_pe;
1377 struct netns_ipvs *ipvs = svc->ipvs; 1377 struct netns_ipvs *ipvs = svc->ipvs;
1378 1378
1379 pr_info("%s: enter\n", __func__);
1380
1381 /* Count only IPv4 services for old get/setsockopt interface */ 1379 /* Count only IPv4 services for old get/setsockopt interface */
1382 if (svc->af == AF_INET) 1380 if (svc->af == AF_INET)
1383 ipvs->num_services--; 1381 ipvs->num_services--;
@@ -3947,7 +3945,6 @@ static struct notifier_block ip_vs_dst_notifier = {
3947 3945
3948int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) 3946int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
3949{ 3947{
3950 struct net *net = ipvs->net;
3951 int i, idx; 3948 int i, idx;
3952 3949
3953 /* Initialize rs_table */ 3950 /* Initialize rs_table */
@@ -3974,9 +3971,9 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
3974 3971
3975 spin_lock_init(&ipvs->tot_stats.lock); 3972 spin_lock_init(&ipvs->tot_stats.lock);
3976 3973
3977 proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops); 3974 proc_create("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_fops);
3978 proc_create("ip_vs_stats", 0, net->proc_net, &ip_vs_stats_fops); 3975 proc_create("ip_vs_stats", 0, ipvs->net->proc_net, &ip_vs_stats_fops);
3979 proc_create("ip_vs_stats_percpu", 0, net->proc_net, 3976 proc_create("ip_vs_stats_percpu", 0, ipvs->net->proc_net,
3980 &ip_vs_stats_percpu_fops); 3977 &ip_vs_stats_percpu_fops);
3981 3978
3982 if (ip_vs_control_net_init_sysctl(ipvs)) 3979 if (ip_vs_control_net_init_sysctl(ipvs))
@@ -3991,13 +3988,11 @@ err:
3991 3988
3992void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs) 3989void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs)
3993{ 3990{
3994 struct net *net = ipvs->net;
3995
3996 ip_vs_trash_cleanup(ipvs); 3991 ip_vs_trash_cleanup(ipvs);
3997 ip_vs_control_net_cleanup_sysctl(ipvs); 3992 ip_vs_control_net_cleanup_sysctl(ipvs);
3998 remove_proc_entry("ip_vs_stats_percpu", net->proc_net); 3993 remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net);
3999 remove_proc_entry("ip_vs_stats", net->proc_net); 3994 remove_proc_entry("ip_vs_stats", ipvs->net->proc_net);
4000 remove_proc_entry("ip_vs", net->proc_net); 3995 remove_proc_entry("ip_vs", ipvs->net->proc_net);
4001 free_percpu(ipvs->tot_stats.cpustats); 3996 free_percpu(ipvs->tot_stats.cpustats);
4002} 3997}
4003 3998
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 1b8d594e493a..0a6eb5c0d9e9 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -70,10 +70,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
70 const char *dptr; 70 const char *dptr;
71 int retc; 71 int retc;
72 72
73 ip_vs_fill_iph_skb(p->af, skb, false, &iph); 73 retc = ip_vs_fill_iph_skb(p->af, skb, false, &iph);
74 74
75 /* Only useful with UDP */ 75 /* Only useful with UDP */
76 if (iph.protocol != IPPROTO_UDP) 76 if (!retc || iph.protocol != IPPROTO_UDP)
77 return -EINVAL; 77 return -EINVAL;
78 /* todo: IPv6 fragments: 78 /* todo: IPv6 fragments:
79 * I think this only should be done for the first fragment. /HS 79 * I think this only should be done for the first fragment. /HS
@@ -88,7 +88,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
88 dptr = skb->data + dataoff; 88 dptr = skb->data + dataoff;
89 datalen = skb->len - dataoff; 89 datalen = skb->len - dataoff;
90 90
91 if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen)) 91 if (get_callid(dptr, 0, datalen, &matchoff, &matchlen))
92 return -EINVAL; 92 return -EINVAL;
93 93
94 /* N.B: pe_data is only set on success, 94 /* N.B: pe_data is only set on success,
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 3264cb49b333..dc196a0f501d 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -531,8 +531,6 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
531 if (ret == NF_ACCEPT) { 531 if (ret == NF_ACCEPT) {
532 nf_reset(skb); 532 nf_reset(skb);
533 skb_forward_csum(skb); 533 skb_forward_csum(skb);
534 if (!skb->sk)
535 skb_sender_cpu_clear(skb);
536 } 534 }
537 return ret; 535 return ret;
538} 536}
@@ -573,8 +571,6 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
573 571
574 if (!local) { 572 if (!local) {
575 skb_forward_csum(skb); 573 skb_forward_csum(skb);
576 if (!skb->sk)
577 skb_sender_cpu_clear(skb);
578 NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, 574 NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
579 NULL, skb_dst(skb)->dev, dst_output); 575 NULL, skb_dst(skb)->dev, dst_output);
580 } else 576 } else
@@ -595,8 +591,6 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
595 if (!local) { 591 if (!local) {
596 ip_vs_drop_early_demux_sk(skb); 592 ip_vs_drop_early_demux_sk(skb);
597 skb_forward_csum(skb); 593 skb_forward_csum(skb);
598 if (!skb->sk)
599 skb_sender_cpu_clear(skb);
600 NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, 594 NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
601 NULL, skb_dst(skb)->dev, dst_output); 595 NULL, skb_dst(skb)->dev, dst_output);
602 } else 596 } else
@@ -1019,8 +1013,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1019 if (IS_ERR(skb)) 1013 if (IS_ERR(skb))
1020 goto tx_error; 1014 goto tx_error;
1021 1015
1022 skb = iptunnel_handle_offloads( 1016 skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af));
1023 skb, false, __tun_gso_type_mask(AF_INET, cp->af));
1024 if (IS_ERR(skb)) 1017 if (IS_ERR(skb))
1025 goto tx_error; 1018 goto tx_error;
1026 1019
@@ -1112,8 +1105,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1112 if (IS_ERR(skb)) 1105 if (IS_ERR(skb))
1113 goto tx_error; 1106 goto tx_error;
1114 1107
1115 skb = iptunnel_handle_offloads( 1108 skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af));
1116 skb, false, __tun_gso_type_mask(AF_INET6, cp->af));
1117 if (IS_ERR(skb)) 1109 if (IS_ERR(skb))
1118 goto tx_error; 1110 goto tx_error;
1119 1111
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f60b4fdeeb8c..e27fd17c6743 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -66,7 +66,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks);
66__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); 66__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
67EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); 67EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
68 68
69static __read_mostly spinlock_t nf_conntrack_locks_all_lock; 69static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
70static __read_mostly bool nf_conntrack_locks_all; 70static __read_mostly bool nf_conntrack_locks_all;
71 71
72void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) 72void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
@@ -74,8 +74,7 @@ void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
74 spin_lock(lock); 74 spin_lock(lock);
75 while (unlikely(nf_conntrack_locks_all)) { 75 while (unlikely(nf_conntrack_locks_all)) {
76 spin_unlock(lock); 76 spin_unlock(lock);
77 spin_lock(&nf_conntrack_locks_all_lock); 77 spin_unlock_wait(&nf_conntrack_locks_all_lock);
78 spin_unlock(&nf_conntrack_locks_all_lock);
79 spin_lock(lock); 78 spin_lock(lock);
80 } 79 }
81} 80}
@@ -121,8 +120,7 @@ static void nf_conntrack_all_lock(void)
121 nf_conntrack_locks_all = true; 120 nf_conntrack_locks_all = true;
122 121
123 for (i = 0; i < CONNTRACK_LOCKS; i++) { 122 for (i = 0; i < CONNTRACK_LOCKS; i++) {
124 spin_lock(&nf_conntrack_locks[i]); 123 spin_unlock_wait(&nf_conntrack_locks[i]);
125 spin_unlock(&nf_conntrack_locks[i]);
126 } 124 }
127} 125}
128 126
@@ -1780,6 +1778,7 @@ void nf_conntrack_init_end(void)
1780 1778
1781int nf_conntrack_init_net(struct net *net) 1779int nf_conntrack_init_net(struct net *net)
1782{ 1780{
1781 static atomic64_t unique_id;
1783 int ret = -ENOMEM; 1782 int ret = -ENOMEM;
1784 int cpu; 1783 int cpu;
1785 1784
@@ -1802,7 +1801,8 @@ int nf_conntrack_init_net(struct net *net)
1802 if (!net->ct.stat) 1801 if (!net->ct.stat)
1803 goto err_pcpu_lists; 1802 goto err_pcpu_lists;
1804 1803
1805 net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net); 1804 net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%llu",
1805 (u64)atomic64_inc_return(&unique_id));
1806 if (!net->ct.slabname) 1806 if (!net->ct.slabname)
1807 goto err_slabname; 1807 goto err_slabname;
1808 1808
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 278f3b9356ef..7cc1d9c22a9f 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -410,6 +410,8 @@ static void tcp_options(const struct sk_buff *skb,
410 length--; 410 length--;
411 continue; 411 continue;
412 default: 412 default:
413 if (length < 2)
414 return;
413 opsize=*ptr++; 415 opsize=*ptr++;
414 if (opsize < 2) /* "silly options" */ 416 if (opsize < 2) /* "silly options" */
415 return; 417 return;
@@ -470,6 +472,8 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
470 length--; 472 length--;
471 continue; 473 continue;
472 default: 474 default:
475 if (length < 2)
476 return;
473 opsize = *ptr++; 477 opsize = *ptr++;
474 if (opsize < 2) /* "silly options" */ 478 if (opsize < 2) /* "silly options" */
475 return; 479 return;
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index 8414ee1a0319..7ec69723940f 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -31,7 +31,6 @@ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif)
31 skb_push(skb, skb->mac_len); 31 skb_push(skb, skb->mac_len);
32 32
33 skb->dev = dev; 33 skb->dev = dev;
34 skb_sender_cpu_clear(skb);
35 dev_queue_xmit(skb); 34 dev_queue_xmit(skb);
36} 35}
37EXPORT_SYMBOL_GPL(nf_dup_netdev_egress); 36EXPORT_SYMBOL_GPL(nf_dup_netdev_egress);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 857ae89633af..2278d9ab723b 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -127,13 +127,6 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group)
127} 127}
128EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); 128EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
129 129
130struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
131 u32 dst_portid, gfp_t gfp_mask)
132{
133 return netlink_alloc_skb(net->nfnl, size, dst_portid, gfp_mask);
134}
135EXPORT_SYMBOL_GPL(nfnetlink_alloc_skb);
136
137int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, 130int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
138 unsigned int group, int echo, gfp_t flags) 131 unsigned int group, int echo, gfp_t flags)
139{ 132{
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 5274b04c42a6..dbd0803b1827 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -96,6 +96,8 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl,
96 return -EINVAL; 96 return -EINVAL;
97 if (flags & NFACCT_F_OVERQUOTA) 97 if (flags & NFACCT_F_OVERQUOTA)
98 return -EINVAL; 98 return -EINVAL;
99 if ((flags & NFACCT_F_QUOTA) && !tb[NFACCT_QUOTA])
100 return -EINVAL;
99 101
100 size += sizeof(u64); 102 size += sizeof(u64);
101 } 103 }
@@ -242,6 +244,9 @@ nfacct_filter_alloc(const struct nlattr * const attr)
242 if (err < 0) 244 if (err < 0)
243 return ERR_PTR(err); 245 return ERR_PTR(err);
244 246
247 if (!tb[NFACCT_FILTER_MASK] || !tb[NFACCT_FILTER_VALUE])
248 return ERR_PTR(-EINVAL);
249
245 filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL); 250 filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL);
246 if (!filter) 251 if (!filter)
247 return ERR_PTR(-ENOMEM); 252 return ERR_PTR(-ENOMEM);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 8ca932057c13..11f81c8385fc 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -330,14 +330,13 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size,
330 * message. WARNING: has to be <= 128k due to slab restrictions */ 330 * message. WARNING: has to be <= 128k due to slab restrictions */
331 331
332 n = max(inst_size, pkt_size); 332 n = max(inst_size, pkt_size);
333 skb = nfnetlink_alloc_skb(net, n, peer_portid, GFP_ATOMIC); 333 skb = alloc_skb(n, GFP_ATOMIC);
334 if (!skb) { 334 if (!skb) {
335 if (n > pkt_size) { 335 if (n > pkt_size) {
336 /* try to allocate only as much as we need for current 336 /* try to allocate only as much as we need for current
337 * packet */ 337 * packet */
338 338
339 skb = nfnetlink_alloc_skb(net, pkt_size, 339 skb = alloc_skb(pkt_size, GFP_ATOMIC);
340 peer_portid, GFP_ATOMIC);
341 } 340 }
342 } 341 }
343 342
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 1d3936587ace..cb5b630a645b 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -301,7 +301,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
301 __be32 **packet_id_ptr) 301 __be32 **packet_id_ptr)
302{ 302{
303 size_t size; 303 size_t size;
304 size_t data_len = 0, cap_len = 0, rem_len = 0; 304 size_t data_len = 0, cap_len = 0;
305 unsigned int hlen = 0; 305 unsigned int hlen = 0;
306 struct sk_buff *skb; 306 struct sk_buff *skb;
307 struct nlattr *nla; 307 struct nlattr *nla;
@@ -361,7 +361,6 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
361 hlen = min_t(unsigned int, hlen, data_len); 361 hlen = min_t(unsigned int, hlen, data_len);
362 size += sizeof(struct nlattr) + hlen; 362 size += sizeof(struct nlattr) + hlen;
363 cap_len = entskb->len; 363 cap_len = entskb->len;
364 rem_len = data_len - hlen;
365 break; 364 break;
366 } 365 }
367 366
@@ -386,8 +385,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
386 size += nla_total_size(seclen); 385 size += nla_total_size(seclen);
387 } 386 }
388 387
389 skb = __netlink_alloc_skb(net->nfnl, size, rem_len, queue->peer_portid, 388 skb = alloc_skb(size, GFP_ATOMIC);
390 GFP_ATOMIC);
391 if (!skb) { 389 if (!skb) {
392 skb_tx_error(entskb); 390 skb_tx_error(entskb);
393 return NULL; 391 return NULL;
@@ -584,7 +582,12 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
584 /* nfnetlink_unicast will either free the nskb or add it to a socket */ 582 /* nfnetlink_unicast will either free the nskb or add it to a socket */
585 err = nfnetlink_unicast(nskb, net, queue->peer_portid, MSG_DONTWAIT); 583 err = nfnetlink_unicast(nskb, net, queue->peer_portid, MSG_DONTWAIT);
586 if (err < 0) { 584 if (err < 0) {
587 queue->queue_user_dropped++; 585 if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
586 failopen = 1;
587 err = 0;
588 } else {
589 queue->queue_user_dropped++;
590 }
588 goto err_out_unlock; 591 goto err_out_unlock;
589 } 592 }
590 593
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 454841baa4d0..6228c422c766 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -660,6 +660,9 @@ nft_match_select_ops(const struct nft_ctx *ctx,
660 if (IS_ERR(match)) 660 if (IS_ERR(match))
661 return ERR_PTR(-ENOENT); 661 return ERR_PTR(-ENOENT);
662 662
663 if (match->matchsize > nla_len(tb[NFTA_MATCH_INFO]))
664 return ERR_PTR(-EINVAL);
665
663 /* This is the first time we use this match, allocate operations */ 666 /* This is the first time we use this match, allocate operations */
664 nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL); 667 nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
665 if (nft_match == NULL) 668 if (nft_match == NULL)
@@ -740,6 +743,9 @@ nft_target_select_ops(const struct nft_ctx *ctx,
740 if (IS_ERR(target)) 743 if (IS_ERR(target))
741 return ERR_PTR(-ENOENT); 744 return ERR_PTR(-ENOENT);
742 745
746 if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO]))
747 return ERR_PTR(-EINVAL);
748
743 /* This is the first time we use this target, allocate operations */ 749 /* This is the first time we use this target, allocate operations */
744 nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL); 750 nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
745 if (nft_target == NULL) 751 if (nft_target == NULL)
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 9aea747b43ea..81b5ad6165ac 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -17,7 +17,9 @@
17#include <net/netfilter/nft_masq.h> 17#include <net/netfilter/nft_masq.h>
18 18
19const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = { 19const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
20 [NFTA_MASQ_FLAGS] = { .type = NLA_U32 }, 20 [NFTA_MASQ_FLAGS] = { .type = NLA_U32 },
21 [NFTA_MASQ_REG_PROTO_MIN] = { .type = NLA_U32 },
22 [NFTA_MASQ_REG_PROTO_MAX] = { .type = NLA_U32 },
21}; 23};
22EXPORT_SYMBOL_GPL(nft_masq_policy); 24EXPORT_SYMBOL_GPL(nft_masq_policy);
23 25
@@ -40,6 +42,7 @@ int nft_masq_init(const struct nft_ctx *ctx,
40 const struct nft_expr *expr, 42 const struct nft_expr *expr,
41 const struct nlattr * const tb[]) 43 const struct nlattr * const tb[])
42{ 44{
45 u32 plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
43 struct nft_masq *priv = nft_expr_priv(expr); 46 struct nft_masq *priv = nft_expr_priv(expr);
44 int err; 47 int err;
45 48
@@ -47,12 +50,32 @@ int nft_masq_init(const struct nft_ctx *ctx,
47 if (err) 50 if (err)
48 return err; 51 return err;
49 52
50 if (tb[NFTA_MASQ_FLAGS] == NULL) 53 if (tb[NFTA_MASQ_FLAGS]) {
51 return 0; 54 priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
52 55 if (priv->flags & ~NF_NAT_RANGE_MASK)
53 priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS])); 56 return -EINVAL;
54 if (priv->flags & ~NF_NAT_RANGE_MASK) 57 }
55 return -EINVAL; 58
59 if (tb[NFTA_MASQ_REG_PROTO_MIN]) {
60 priv->sreg_proto_min =
61 nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MIN]);
62
63 err = nft_validate_register_load(priv->sreg_proto_min, plen);
64 if (err < 0)
65 return err;
66
67 if (tb[NFTA_MASQ_REG_PROTO_MAX]) {
68 priv->sreg_proto_max =
69 nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MAX]);
70
71 err = nft_validate_register_load(priv->sreg_proto_max,
72 plen);
73 if (err < 0)
74 return err;
75 } else {
76 priv->sreg_proto_max = priv->sreg_proto_min;
77 }
78 }
56 79
57 return 0; 80 return 0;
58} 81}
@@ -62,12 +85,18 @@ int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr)
62{ 85{
63 const struct nft_masq *priv = nft_expr_priv(expr); 86 const struct nft_masq *priv = nft_expr_priv(expr);
64 87
65 if (priv->flags == 0) 88 if (priv->flags != 0 &&
66 return 0; 89 nla_put_be32(skb, NFTA_MASQ_FLAGS, htonl(priv->flags)))
67
68 if (nla_put_be32(skb, NFTA_MASQ_FLAGS, htonl(priv->flags)))
69 goto nla_put_failure; 90 goto nla_put_failure;
70 91
92 if (priv->sreg_proto_min) {
93 if (nft_dump_register(skb, NFTA_MASQ_REG_PROTO_MIN,
94 priv->sreg_proto_min) ||
95 nft_dump_register(skb, NFTA_MASQ_REG_PROTO_MAX,
96 priv->sreg_proto_max))
97 goto nla_put_failure;
98 }
99
71 return 0; 100 return 0;
72 101
73nla_put_failure: 102nla_put_failure:
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index fe885bf271c5..16c50b0dd426 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -28,6 +28,8 @@
28 28
29#include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */ 29#include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */
30 30
31static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
32
31void nft_meta_get_eval(const struct nft_expr *expr, 33void nft_meta_get_eval(const struct nft_expr *expr,
32 struct nft_regs *regs, 34 struct nft_regs *regs,
33 const struct nft_pktinfo *pkt) 35 const struct nft_pktinfo *pkt)
@@ -181,6 +183,11 @@ void nft_meta_get_eval(const struct nft_expr *expr,
181 *dest = sock_cgroup_classid(&sk->sk_cgrp_data); 183 *dest = sock_cgroup_classid(&sk->sk_cgrp_data);
182 break; 184 break;
183#endif 185#endif
186 case NFT_META_PRANDOM: {
187 struct rnd_state *state = this_cpu_ptr(&nft_prandom_state);
188 *dest = prandom_u32_state(state);
189 break;
190 }
184 default: 191 default:
185 WARN_ON(1); 192 WARN_ON(1);
186 goto err; 193 goto err;
@@ -277,6 +284,10 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
277 case NFT_META_OIFNAME: 284 case NFT_META_OIFNAME:
278 len = IFNAMSIZ; 285 len = IFNAMSIZ;
279 break; 286 break;
287 case NFT_META_PRANDOM:
288 prandom_init_once(&nft_prandom_state);
289 len = sizeof(u32);
290 break;
280 default: 291 default:
281 return -EOPNOTSUPP; 292 return -EOPNOTSUPP;
282 } 293 }
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index c8a0b7da5ff4..582c9cfd6567 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -659,6 +659,9 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
659 struct xt_table_info *info = NULL; 659 struct xt_table_info *info = NULL;
660 size_t sz = sizeof(*info) + size; 660 size_t sz = sizeof(*info) + size;
661 661
662 if (sz < sizeof(*info))
663 return NULL;
664
662 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ 665 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
663 if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages) 666 if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
664 return NULL; 667 return NULL;
@@ -694,12 +697,45 @@ EXPORT_SYMBOL(xt_free_table_info);
694struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, 697struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
695 const char *name) 698 const char *name)
696{ 699{
697 struct xt_table *t; 700 struct xt_table *t, *found = NULL;
698 701
699 mutex_lock(&xt[af].mutex); 702 mutex_lock(&xt[af].mutex);
700 list_for_each_entry(t, &net->xt.tables[af], list) 703 list_for_each_entry(t, &net->xt.tables[af], list)
701 if (strcmp(t->name, name) == 0 && try_module_get(t->me)) 704 if (strcmp(t->name, name) == 0 && try_module_get(t->me))
702 return t; 705 return t;
706
707 if (net == &init_net)
708 goto out;
709
710 /* Table doesn't exist in this netns, re-try init */
711 list_for_each_entry(t, &init_net.xt.tables[af], list) {
712 if (strcmp(t->name, name))
713 continue;
714 if (!try_module_get(t->me))
715 return NULL;
716
717 mutex_unlock(&xt[af].mutex);
718 if (t->table_init(net) != 0) {
719 module_put(t->me);
720 return NULL;
721 }
722
723 found = t;
724
725 mutex_lock(&xt[af].mutex);
726 break;
727 }
728
729 if (!found)
730 goto out;
731
732 /* and once again: */
733 list_for_each_entry(t, &net->xt.tables[af], list)
734 if (strcmp(t->name, name) == 0)
735 return t;
736
737 module_put(found->me);
738 out:
703 mutex_unlock(&xt[af].mutex); 739 mutex_unlock(&xt[af].mutex);
704 return NULL; 740 return NULL;
705} 741}
@@ -1170,20 +1206,20 @@ static const struct file_operations xt_target_ops = {
1170#endif /* CONFIG_PROC_FS */ 1206#endif /* CONFIG_PROC_FS */
1171 1207
1172/** 1208/**
1173 * xt_hook_link - set up hooks for a new table 1209 * xt_hook_ops_alloc - set up hooks for a new table
1174 * @table: table with metadata needed to set up hooks 1210 * @table: table with metadata needed to set up hooks
1175 * @fn: Hook function 1211 * @fn: Hook function
1176 * 1212 *
1177 * This function will take care of creating and registering the necessary 1213 * This function will create the nf_hook_ops that the x_table needs
1178 * Netfilter hooks for XT tables. 1214 * to hand to xt_hook_link_net().
1179 */ 1215 */
1180struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn) 1216struct nf_hook_ops *
1217xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn)
1181{ 1218{
1182 unsigned int hook_mask = table->valid_hooks; 1219 unsigned int hook_mask = table->valid_hooks;
1183 uint8_t i, num_hooks = hweight32(hook_mask); 1220 uint8_t i, num_hooks = hweight32(hook_mask);
1184 uint8_t hooknum; 1221 uint8_t hooknum;
1185 struct nf_hook_ops *ops; 1222 struct nf_hook_ops *ops;
1186 int ret;
1187 1223
1188 ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL); 1224 ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL);
1189 if (ops == NULL) 1225 if (ops == NULL)
@@ -1200,27 +1236,9 @@ struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
1200 ++i; 1236 ++i;
1201 } 1237 }
1202 1238
1203 ret = nf_register_hooks(ops, num_hooks);
1204 if (ret < 0) {
1205 kfree(ops);
1206 return ERR_PTR(ret);
1207 }
1208
1209 return ops; 1239 return ops;
1210} 1240}
1211EXPORT_SYMBOL_GPL(xt_hook_link); 1241EXPORT_SYMBOL_GPL(xt_hook_ops_alloc);
1212
1213/**
1214 * xt_hook_unlink - remove hooks for a table
1215 * @ops: nf_hook_ops array as returned by nf_hook_link
1216 * @hook_mask: the very same mask that was passed to nf_hook_link
1217 */
1218void xt_hook_unlink(const struct xt_table *table, struct nf_hook_ops *ops)
1219{
1220 nf_unregister_hooks(ops, hweight32(table->valid_hooks));
1221 kfree(ops);
1222}
1223EXPORT_SYMBOL_GPL(xt_hook_unlink);
1224 1242
1225int xt_proto_init(struct net *net, u_int8_t af) 1243int xt_proto_init(struct net *net, u_int8_t af)
1226{ 1244{
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index 29d2c31f406c..daf45da448fa 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -236,6 +236,7 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par)
236 236
237 list_del(&info->timer->entry); 237 list_del(&info->timer->entry);
238 del_timer_sync(&info->timer->timer); 238 del_timer_sync(&info->timer->timer);
239 cancel_work_sync(&info->timer->work);
239 sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); 240 sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr);
240 kfree(info->timer->attr.attr.name); 241 kfree(info->timer->attr.attr.name);
241 kfree(info->timer); 242 kfree(info->timer);
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 3ab591e73ec0..7f4414d26a66 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -105,19 +105,24 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
105 * belonging to established connections going through that one. 105 * belonging to established connections going through that one.
106 */ 106 */
107static inline struct sock * 107static inline struct sock *
108nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, 108nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
109 const u8 protocol,
109 const __be32 saddr, const __be32 daddr, 110 const __be32 saddr, const __be32 daddr,
110 const __be16 sport, const __be16 dport, 111 const __be16 sport, const __be16 dport,
111 const struct net_device *in, 112 const struct net_device *in,
112 const enum nf_tproxy_lookup_t lookup_type) 113 const enum nf_tproxy_lookup_t lookup_type)
113{ 114{
114 struct sock *sk; 115 struct sock *sk;
116 struct tcphdr *tcph;
115 117
116 switch (protocol) { 118 switch (protocol) {
117 case IPPROTO_TCP: 119 case IPPROTO_TCP:
118 switch (lookup_type) { 120 switch (lookup_type) {
119 case NFT_LOOKUP_LISTENER: 121 case NFT_LOOKUP_LISTENER:
120 sk = inet_lookup_listener(net, &tcp_hashinfo, 122 tcph = hp;
123 sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
124 ip_hdrlen(skb) +
125 __tcp_hdrlen(tcph),
121 saddr, sport, 126 saddr, sport,
122 daddr, dport, 127 daddr, dport,
123 in->ifindex); 128 in->ifindex);
@@ -169,19 +174,23 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
169 174
170#ifdef XT_TPROXY_HAVE_IPV6 175#ifdef XT_TPROXY_HAVE_IPV6
171static inline struct sock * 176static inline struct sock *
172nf_tproxy_get_sock_v6(struct net *net, const u8 protocol, 177nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
178 const u8 protocol,
173 const struct in6_addr *saddr, const struct in6_addr *daddr, 179 const struct in6_addr *saddr, const struct in6_addr *daddr,
174 const __be16 sport, const __be16 dport, 180 const __be16 sport, const __be16 dport,
175 const struct net_device *in, 181 const struct net_device *in,
176 const enum nf_tproxy_lookup_t lookup_type) 182 const enum nf_tproxy_lookup_t lookup_type)
177{ 183{
178 struct sock *sk; 184 struct sock *sk;
185 struct tcphdr *tcph;
179 186
180 switch (protocol) { 187 switch (protocol) {
181 case IPPROTO_TCP: 188 case IPPROTO_TCP:
182 switch (lookup_type) { 189 switch (lookup_type) {
183 case NFT_LOOKUP_LISTENER: 190 case NFT_LOOKUP_LISTENER:
184 sk = inet6_lookup_listener(net, &tcp_hashinfo, 191 tcph = hp;
192 sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
193 thoff + __tcp_hdrlen(tcph),
185 saddr, sport, 194 saddr, sport,
186 daddr, ntohs(dport), 195 daddr, ntohs(dport),
187 in->ifindex); 196 in->ifindex);
@@ -267,7 +276,7 @@ tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
267 * to a listener socket if there's one */ 276 * to a listener socket if there's one */
268 struct sock *sk2; 277 struct sock *sk2;
269 278
270 sk2 = nf_tproxy_get_sock_v4(net, iph->protocol, 279 sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
271 iph->saddr, laddr ? laddr : iph->daddr, 280 iph->saddr, laddr ? laddr : iph->daddr,
272 hp->source, lport ? lport : hp->dest, 281 hp->source, lport ? lport : hp->dest,
273 skb->dev, NFT_LOOKUP_LISTENER); 282 skb->dev, NFT_LOOKUP_LISTENER);
@@ -305,7 +314,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
305 * addresses, this happens if the redirect already happened 314 * addresses, this happens if the redirect already happened
306 * and the current packet belongs to an already established 315 * and the current packet belongs to an already established
307 * connection */ 316 * connection */
308 sk = nf_tproxy_get_sock_v4(net, iph->protocol, 317 sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
309 iph->saddr, iph->daddr, 318 iph->saddr, iph->daddr,
310 hp->source, hp->dest, 319 hp->source, hp->dest,
311 skb->dev, NFT_LOOKUP_ESTABLISHED); 320 skb->dev, NFT_LOOKUP_ESTABLISHED);
@@ -321,7 +330,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
321 else if (!sk) 330 else if (!sk)
322 /* no, there's no established connection, check if 331 /* no, there's no established connection, check if
323 * there's a listener on the redirected addr/port */ 332 * there's a listener on the redirected addr/port */
324 sk = nf_tproxy_get_sock_v4(net, iph->protocol, 333 sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
325 iph->saddr, laddr, 334 iph->saddr, laddr,
326 hp->source, lport, 335 hp->source, lport,
327 skb->dev, NFT_LOOKUP_LISTENER); 336 skb->dev, NFT_LOOKUP_LISTENER);
@@ -429,7 +438,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
429 * to a listener socket if there's one */ 438 * to a listener socket if there's one */
430 struct sock *sk2; 439 struct sock *sk2;
431 440
432 sk2 = nf_tproxy_get_sock_v6(par->net, tproto, 441 sk2 = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto,
433 &iph->saddr, 442 &iph->saddr,
434 tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr), 443 tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
435 hp->source, 444 hp->source,
@@ -472,7 +481,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
472 * addresses, this happens if the redirect already happened 481 * addresses, this happens if the redirect already happened
473 * and the current packet belongs to an already established 482 * and the current packet belongs to an already established
474 * connection */ 483 * connection */
475 sk = nf_tproxy_get_sock_v6(par->net, tproto, 484 sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto,
476 &iph->saddr, &iph->daddr, 485 &iph->saddr, &iph->daddr,
477 hp->source, hp->dest, 486 hp->source, hp->dest,
478 par->in, NFT_LOOKUP_ESTABLISHED); 487 par->in, NFT_LOOKUP_ESTABLISHED);
@@ -487,8 +496,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
487 else if (!sk) 496 else if (!sk)
488 /* no there's no established connection, check if 497 /* no there's no established connection, check if
489 * there's a listener on the redirected addr/port */ 498 * there's a listener on the redirected addr/port */
490 sk = nf_tproxy_get_sock_v6(par->net, tproto, 499 sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp,
491 &iph->saddr, laddr, 500 tproto, &iph->saddr, laddr,
492 hp->source, lport, 501 hp->source, lport,
493 par->in, NFT_LOOKUP_LISTENER); 502 par->in, NFT_LOOKUP_LISTENER);
494 503
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 4e3c3affd285..2455b69b5810 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -262,7 +262,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
262 if (f->opt[optnum].kind == (*optp)) { 262 if (f->opt[optnum].kind == (*optp)) {
263 __u32 len = f->opt[optnum].length; 263 __u32 len = f->opt[optnum].length;
264 const __u8 *optend = optp + len; 264 const __u8 *optend = optp + len;
265 int loop_cont = 0;
266 265
267 fmatch = FMATCH_OK; 266 fmatch = FMATCH_OK;
268 267
@@ -275,7 +274,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
275 mss = ntohs((__force __be16)mss); 274 mss = ntohs((__force __be16)mss);
276 break; 275 break;
277 case OSFOPT_TS: 276 case OSFOPT_TS:
278 loop_cont = 1;
279 break; 277 break;
280 } 278 }
281 279
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 2ec08f04b816..49d14ecad444 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -112,14 +112,15 @@ extract_icmp4_fields(const struct sk_buff *skb,
112 * box. 112 * box.
113 */ 113 */
114static struct sock * 114static struct sock *
115xt_socket_get_sock_v4(struct net *net, const u8 protocol, 115xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
116 const u8 protocol,
116 const __be32 saddr, const __be32 daddr, 117 const __be32 saddr, const __be32 daddr,
117 const __be16 sport, const __be16 dport, 118 const __be16 sport, const __be16 dport,
118 const struct net_device *in) 119 const struct net_device *in)
119{ 120{
120 switch (protocol) { 121 switch (protocol) {
121 case IPPROTO_TCP: 122 case IPPROTO_TCP:
122 return __inet_lookup(net, &tcp_hashinfo, 123 return __inet_lookup(net, &tcp_hashinfo, skb, doff,
123 saddr, sport, daddr, dport, 124 saddr, sport, daddr, dport,
124 in->ifindex); 125 in->ifindex);
125 case IPPROTO_UDP: 126 case IPPROTO_UDP:
@@ -148,6 +149,8 @@ static struct sock *xt_socket_lookup_slow_v4(struct net *net,
148 const struct net_device *indev) 149 const struct net_device *indev)
149{ 150{
150 const struct iphdr *iph = ip_hdr(skb); 151 const struct iphdr *iph = ip_hdr(skb);
152 struct sk_buff *data_skb = NULL;
153 int doff = 0;
151 __be32 uninitialized_var(daddr), uninitialized_var(saddr); 154 __be32 uninitialized_var(daddr), uninitialized_var(saddr);
152 __be16 uninitialized_var(dport), uninitialized_var(sport); 155 __be16 uninitialized_var(dport), uninitialized_var(sport);
153 u8 uninitialized_var(protocol); 156 u8 uninitialized_var(protocol);
@@ -169,6 +172,10 @@ static struct sock *xt_socket_lookup_slow_v4(struct net *net,
169 sport = hp->source; 172 sport = hp->source;
170 daddr = iph->daddr; 173 daddr = iph->daddr;
171 dport = hp->dest; 174 dport = hp->dest;
175 data_skb = (struct sk_buff *)skb;
176 doff = iph->protocol == IPPROTO_TCP ?
177 ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) :
178 ip_hdrlen(skb) + sizeof(*hp);
172 179
173 } else if (iph->protocol == IPPROTO_ICMP) { 180 } else if (iph->protocol == IPPROTO_ICMP) {
174 if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, 181 if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
@@ -198,8 +205,8 @@ static struct sock *xt_socket_lookup_slow_v4(struct net *net,
198 } 205 }
199#endif 206#endif
200 207
201 return xt_socket_get_sock_v4(net, protocol, saddr, daddr, 208 return xt_socket_get_sock_v4(net, data_skb, doff, protocol, saddr,
202 sport, dport, indev); 209 daddr, sport, dport, indev);
203} 210}
204 211
205static bool 212static bool
@@ -318,14 +325,15 @@ extract_icmp6_fields(const struct sk_buff *skb,
318} 325}
319 326
320static struct sock * 327static struct sock *
321xt_socket_get_sock_v6(struct net *net, const u8 protocol, 328xt_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
329 const u8 protocol,
322 const struct in6_addr *saddr, const struct in6_addr *daddr, 330 const struct in6_addr *saddr, const struct in6_addr *daddr,
323 const __be16 sport, const __be16 dport, 331 const __be16 sport, const __be16 dport,
324 const struct net_device *in) 332 const struct net_device *in)
325{ 333{
326 switch (protocol) { 334 switch (protocol) {
327 case IPPROTO_TCP: 335 case IPPROTO_TCP:
328 return inet6_lookup(net, &tcp_hashinfo, 336 return inet6_lookup(net, &tcp_hashinfo, skb, doff,
329 saddr, sport, daddr, dport, 337 saddr, sport, daddr, dport,
330 in->ifindex); 338 in->ifindex);
331 case IPPROTO_UDP: 339 case IPPROTO_UDP:
@@ -343,6 +351,8 @@ static struct sock *xt_socket_lookup_slow_v6(struct net *net,
343 __be16 uninitialized_var(dport), uninitialized_var(sport); 351 __be16 uninitialized_var(dport), uninitialized_var(sport);
344 const struct in6_addr *daddr = NULL, *saddr = NULL; 352 const struct in6_addr *daddr = NULL, *saddr = NULL;
345 struct ipv6hdr *iph = ipv6_hdr(skb); 353 struct ipv6hdr *iph = ipv6_hdr(skb);
354 struct sk_buff *data_skb = NULL;
355 int doff = 0;
346 int thoff = 0, tproto; 356 int thoff = 0, tproto;
347 357
348 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); 358 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
@@ -362,6 +372,10 @@ static struct sock *xt_socket_lookup_slow_v6(struct net *net,
362 sport = hp->source; 372 sport = hp->source;
363 daddr = &iph->daddr; 373 daddr = &iph->daddr;
364 dport = hp->dest; 374 dport = hp->dest;
375 data_skb = (struct sk_buff *)skb;
376 doff = tproto == IPPROTO_TCP ?
377 thoff + __tcp_hdrlen((struct tcphdr *)hp) :
378 thoff + sizeof(*hp);
365 379
366 } else if (tproto == IPPROTO_ICMPV6) { 380 } else if (tproto == IPPROTO_ICMPV6) {
367 struct ipv6hdr ipv6_var; 381 struct ipv6hdr ipv6_var;
@@ -373,7 +387,7 @@ static struct sock *xt_socket_lookup_slow_v6(struct net *net,
373 return NULL; 387 return NULL;
374 } 388 }
375 389
376 return xt_socket_get_sock_v6(net, tproto, saddr, daddr, 390 return xt_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr,
377 sport, dport, indev); 391 sport, dport, indev);
378} 392}
379 393
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index f0cb92f3ddaf..ada67422234b 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -55,8 +55,8 @@ struct netlbl_domhsh_tbl {
55static DEFINE_SPINLOCK(netlbl_domhsh_lock); 55static DEFINE_SPINLOCK(netlbl_domhsh_lock);
56#define netlbl_domhsh_rcu_deref(p) \ 56#define netlbl_domhsh_rcu_deref(p) \
57 rcu_dereference_check(p, lockdep_is_held(&netlbl_domhsh_lock)) 57 rcu_dereference_check(p, lockdep_is_held(&netlbl_domhsh_lock))
58static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL; 58static struct netlbl_domhsh_tbl *netlbl_domhsh;
59static struct netlbl_dom_map *netlbl_domhsh_def = NULL; 59static struct netlbl_dom_map *netlbl_domhsh_def;
60 60
61/* 61/*
62 * Domain Hash Table Helper Functions 62 * Domain Hash Table Helper Functions
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index b0380927f05f..9eaa9a1e8629 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -116,11 +116,11 @@ struct netlbl_unlhsh_walk_arg {
116static DEFINE_SPINLOCK(netlbl_unlhsh_lock); 116static DEFINE_SPINLOCK(netlbl_unlhsh_lock);
117#define netlbl_unlhsh_rcu_deref(p) \ 117#define netlbl_unlhsh_rcu_deref(p) \
118 rcu_dereference_check(p, lockdep_is_held(&netlbl_unlhsh_lock)) 118 rcu_dereference_check(p, lockdep_is_held(&netlbl_unlhsh_lock))
119static struct netlbl_unlhsh_tbl *netlbl_unlhsh = NULL; 119static struct netlbl_unlhsh_tbl *netlbl_unlhsh;
120static struct netlbl_unlhsh_iface *netlbl_unlhsh_def = NULL; 120static struct netlbl_unlhsh_iface *netlbl_unlhsh_def;
121 121
122/* Accept unlabeled packets flag */ 122/* Accept unlabeled packets flag */
123static u8 netlabel_unlabel_acceptflg = 0; 123static u8 netlabel_unlabel_acceptflg;
124 124
125/* NetLabel Generic NETLINK unlabeled family */ 125/* NetLabel Generic NETLINK unlabeled family */
126static struct genl_family netlbl_unlabel_gnl_family = { 126static struct genl_family netlbl_unlabel_gnl_family = {
diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig
index 2c5e95e9bfbd..5d6e8c05b3d4 100644
--- a/net/netlink/Kconfig
+++ b/net/netlink/Kconfig
@@ -2,15 +2,6 @@
2# Netlink Sockets 2# Netlink Sockets
3# 3#
4 4
5config NETLINK_MMAP
6 bool "NETLINK: mmaped IO"
7 ---help---
8 This option enables support for memory mapped netlink IO. This
9 reduces overhead by avoiding copying data between kernel- and
10 userspace.
11
12 If unsure, say N.
13
14config NETLINK_DIAG 5config NETLINK_DIAG
15 tristate "NETLINK: socket monitoring interface" 6 tristate "NETLINK: socket monitoring interface"
16 default n 7 default n
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index f1ffb34e253f..330ebd600f25 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -225,7 +225,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
225 225
226 dev_hold(dev); 226 dev_hold(dev);
227 227
228 if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head)) 228 if (is_vmalloc_addr(skb->head))
229 nskb = netlink_to_full_skb(skb, GFP_ATOMIC); 229 nskb = netlink_to_full_skb(skb, GFP_ATOMIC);
230 else 230 else
231 nskb = skb_clone(skb, GFP_ATOMIC); 231 nskb = skb_clone(skb, GFP_ATOMIC);
@@ -300,610 +300,8 @@ static void netlink_rcv_wake(struct sock *sk)
300 wake_up_interruptible(&nlk->wait); 300 wake_up_interruptible(&nlk->wait);
301} 301}
302 302
303#ifdef CONFIG_NETLINK_MMAP
304static bool netlink_rx_is_mmaped(struct sock *sk)
305{
306 return nlk_sk(sk)->rx_ring.pg_vec != NULL;
307}
308
309static bool netlink_tx_is_mmaped(struct sock *sk)
310{
311 return nlk_sk(sk)->tx_ring.pg_vec != NULL;
312}
313
314static __pure struct page *pgvec_to_page(const void *addr)
315{
316 if (is_vmalloc_addr(addr))
317 return vmalloc_to_page(addr);
318 else
319 return virt_to_page(addr);
320}
321
322static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len)
323{
324 unsigned int i;
325
326 for (i = 0; i < len; i++) {
327 if (pg_vec[i] != NULL) {
328 if (is_vmalloc_addr(pg_vec[i]))
329 vfree(pg_vec[i]);
330 else
331 free_pages((unsigned long)pg_vec[i], order);
332 }
333 }
334 kfree(pg_vec);
335}
336
337static void *alloc_one_pg_vec_page(unsigned long order)
338{
339 void *buffer;
340 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO |
341 __GFP_NOWARN | __GFP_NORETRY;
342
343 buffer = (void *)__get_free_pages(gfp_flags, order);
344 if (buffer != NULL)
345 return buffer;
346
347 buffer = vzalloc((1 << order) * PAGE_SIZE);
348 if (buffer != NULL)
349 return buffer;
350
351 gfp_flags &= ~__GFP_NORETRY;
352 return (void *)__get_free_pages(gfp_flags, order);
353}
354
355static void **alloc_pg_vec(struct netlink_sock *nlk,
356 struct nl_mmap_req *req, unsigned int order)
357{
358 unsigned int block_nr = req->nm_block_nr;
359 unsigned int i;
360 void **pg_vec;
361
362 pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
363 if (pg_vec == NULL)
364 return NULL;
365
366 for (i = 0; i < block_nr; i++) {
367 pg_vec[i] = alloc_one_pg_vec_page(order);
368 if (pg_vec[i] == NULL)
369 goto err1;
370 }
371
372 return pg_vec;
373err1:
374 free_pg_vec(pg_vec, order, block_nr);
375 return NULL;
376}
377
378
379static void
380__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec,
381 unsigned int order)
382{
383 struct netlink_sock *nlk = nlk_sk(sk);
384 struct sk_buff_head *queue;
385 struct netlink_ring *ring;
386
387 queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
388 ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
389
390 spin_lock_bh(&queue->lock);
391
392 ring->frame_max = req->nm_frame_nr - 1;
393 ring->head = 0;
394 ring->frame_size = req->nm_frame_size;
395 ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
396
397 swap(ring->pg_vec_len, req->nm_block_nr);
398 swap(ring->pg_vec_order, order);
399 swap(ring->pg_vec, pg_vec);
400
401 __skb_queue_purge(queue);
402 spin_unlock_bh(&queue->lock);
403
404 WARN_ON(atomic_read(&nlk->mapped));
405
406 if (pg_vec)
407 free_pg_vec(pg_vec, order, req->nm_block_nr);
408}
409
410static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
411 bool tx_ring)
412{
413 struct netlink_sock *nlk = nlk_sk(sk);
414 struct netlink_ring *ring;
415 void **pg_vec = NULL;
416 unsigned int order = 0;
417
418 ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
419
420 if (atomic_read(&nlk->mapped))
421 return -EBUSY;
422 if (atomic_read(&ring->pending))
423 return -EBUSY;
424
425 if (req->nm_block_nr) {
426 if (ring->pg_vec != NULL)
427 return -EBUSY;
428
429 if ((int)req->nm_block_size <= 0)
430 return -EINVAL;
431 if (!PAGE_ALIGNED(req->nm_block_size))
432 return -EINVAL;
433 if (req->nm_frame_size < NL_MMAP_HDRLEN)
434 return -EINVAL;
435 if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT))
436 return -EINVAL;
437
438 ring->frames_per_block = req->nm_block_size /
439 req->nm_frame_size;
440 if (ring->frames_per_block == 0)
441 return -EINVAL;
442 if (ring->frames_per_block * req->nm_block_nr !=
443 req->nm_frame_nr)
444 return -EINVAL;
445
446 order = get_order(req->nm_block_size);
447 pg_vec = alloc_pg_vec(nlk, req, order);
448 if (pg_vec == NULL)
449 return -ENOMEM;
450 } else {
451 if (req->nm_frame_nr)
452 return -EINVAL;
453 }
454
455 mutex_lock(&nlk->pg_vec_lock);
456 if (atomic_read(&nlk->mapped) == 0) {
457 __netlink_set_ring(sk, req, tx_ring, pg_vec, order);
458 mutex_unlock(&nlk->pg_vec_lock);
459 return 0;
460 }
461
462 mutex_unlock(&nlk->pg_vec_lock);
463
464 if (pg_vec)
465 free_pg_vec(pg_vec, order, req->nm_block_nr);
466
467 return -EBUSY;
468}
469
470static void netlink_mm_open(struct vm_area_struct *vma)
471{
472 struct file *file = vma->vm_file;
473 struct socket *sock = file->private_data;
474 struct sock *sk = sock->sk;
475
476 if (sk)
477 atomic_inc(&nlk_sk(sk)->mapped);
478}
479
480static void netlink_mm_close(struct vm_area_struct *vma)
481{
482 struct file *file = vma->vm_file;
483 struct socket *sock = file->private_data;
484 struct sock *sk = sock->sk;
485
486 if (sk)
487 atomic_dec(&nlk_sk(sk)->mapped);
488}
489
490static const struct vm_operations_struct netlink_mmap_ops = {
491 .open = netlink_mm_open,
492 .close = netlink_mm_close,
493};
494
495static int netlink_mmap(struct file *file, struct socket *sock,
496 struct vm_area_struct *vma)
497{
498 struct sock *sk = sock->sk;
499 struct netlink_sock *nlk = nlk_sk(sk);
500 struct netlink_ring *ring;
501 unsigned long start, size, expected;
502 unsigned int i;
503 int err = -EINVAL;
504
505 if (vma->vm_pgoff)
506 return -EINVAL;
507
508 mutex_lock(&nlk->pg_vec_lock);
509
510 expected = 0;
511 for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
512 if (ring->pg_vec == NULL)
513 continue;
514 expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE;
515 }
516
517 if (expected == 0)
518 goto out;
519
520 size = vma->vm_end - vma->vm_start;
521 if (size != expected)
522 goto out;
523
524 start = vma->vm_start;
525 for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
526 if (ring->pg_vec == NULL)
527 continue;
528
529 for (i = 0; i < ring->pg_vec_len; i++) {
530 struct page *page;
531 void *kaddr = ring->pg_vec[i];
532 unsigned int pg_num;
533
534 for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) {
535 page = pgvec_to_page(kaddr);
536 err = vm_insert_page(vma, start, page);
537 if (err < 0)
538 goto out;
539 start += PAGE_SIZE;
540 kaddr += PAGE_SIZE;
541 }
542 }
543 }
544
545 atomic_inc(&nlk->mapped);
546 vma->vm_ops = &netlink_mmap_ops;
547 err = 0;
548out:
549 mutex_unlock(&nlk->pg_vec_lock);
550 return err;
551}
552
553static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len)
554{
555#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
556 struct page *p_start, *p_end;
557
558 /* First page is flushed through netlink_{get,set}_status */
559 p_start = pgvec_to_page(hdr + PAGE_SIZE);
560 p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1);
561 while (p_start <= p_end) {
562 flush_dcache_page(p_start);
563 p_start++;
564 }
565#endif
566}
567
568static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
569{
570 smp_rmb();
571 flush_dcache_page(pgvec_to_page(hdr));
572 return hdr->nm_status;
573}
574
575static void netlink_set_status(struct nl_mmap_hdr *hdr,
576 enum nl_mmap_status status)
577{
578 smp_mb();
579 hdr->nm_status = status;
580 flush_dcache_page(pgvec_to_page(hdr));
581}
582
583static struct nl_mmap_hdr *
584__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos)
585{
586 unsigned int pg_vec_pos, frame_off;
587
588 pg_vec_pos = pos / ring->frames_per_block;
589 frame_off = pos % ring->frames_per_block;
590
591 return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size);
592}
593
594static struct nl_mmap_hdr *
595netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos,
596 enum nl_mmap_status status)
597{
598 struct nl_mmap_hdr *hdr;
599
600 hdr = __netlink_lookup_frame(ring, pos);
601 if (netlink_get_status(hdr) != status)
602 return NULL;
603
604 return hdr;
605}
606
607static struct nl_mmap_hdr *
608netlink_current_frame(const struct netlink_ring *ring,
609 enum nl_mmap_status status)
610{
611 return netlink_lookup_frame(ring, ring->head, status);
612}
613
614static void netlink_increment_head(struct netlink_ring *ring)
615{
616 ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0;
617}
618
619static void netlink_forward_ring(struct netlink_ring *ring)
620{
621 unsigned int head = ring->head;
622 const struct nl_mmap_hdr *hdr;
623
624 do {
625 hdr = __netlink_lookup_frame(ring, ring->head);
626 if (hdr->nm_status == NL_MMAP_STATUS_UNUSED)
627 break;
628 if (hdr->nm_status != NL_MMAP_STATUS_SKIP)
629 break;
630 netlink_increment_head(ring);
631 } while (ring->head != head);
632}
633
634static bool netlink_has_valid_frame(struct netlink_ring *ring)
635{
636 unsigned int head = ring->head, pos = head;
637 const struct nl_mmap_hdr *hdr;
638
639 do {
640 hdr = __netlink_lookup_frame(ring, pos);
641 if (hdr->nm_status == NL_MMAP_STATUS_VALID)
642 return true;
643 pos = pos != 0 ? pos - 1 : ring->frame_max;
644 } while (pos != head);
645
646 return false;
647}
648
649static bool netlink_dump_space(struct netlink_sock *nlk)
650{
651 struct netlink_ring *ring = &nlk->rx_ring;
652 struct nl_mmap_hdr *hdr;
653 unsigned int n;
654
655 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
656 if (hdr == NULL)
657 return false;
658
659 n = ring->head + ring->frame_max / 2;
660 if (n > ring->frame_max)
661 n -= ring->frame_max;
662
663 hdr = __netlink_lookup_frame(ring, n);
664
665 return hdr->nm_status == NL_MMAP_STATUS_UNUSED;
666}
667
668static unsigned int netlink_poll(struct file *file, struct socket *sock,
669 poll_table *wait)
670{
671 struct sock *sk = sock->sk;
672 struct netlink_sock *nlk = nlk_sk(sk);
673 unsigned int mask;
674 int err;
675
676 if (nlk->rx_ring.pg_vec != NULL) {
677 /* Memory mapped sockets don't call recvmsg(), so flow control
678 * for dumps is performed here. A dump is allowed to continue
679 * if at least half the ring is unused.
680 */
681 while (nlk->cb_running && netlink_dump_space(nlk)) {
682 err = netlink_dump(sk);
683 if (err < 0) {
684 sk->sk_err = -err;
685 sk->sk_error_report(sk);
686 break;
687 }
688 }
689 netlink_rcv_wake(sk);
690 }
691
692 mask = datagram_poll(file, sock, wait);
693
694 /* We could already have received frames in the normal receive
695 * queue, that will show up as NL_MMAP_STATUS_COPY in the ring,
696 * so if mask contains pollin/etc already, there's no point
697 * walking the ring.
698 */
699 if ((mask & (POLLIN | POLLRDNORM)) != (POLLIN | POLLRDNORM)) {
700 spin_lock_bh(&sk->sk_receive_queue.lock);
701 if (nlk->rx_ring.pg_vec) {
702 if (netlink_has_valid_frame(&nlk->rx_ring))
703 mask |= POLLIN | POLLRDNORM;
704 }
705 spin_unlock_bh(&sk->sk_receive_queue.lock);
706 }
707
708 spin_lock_bh(&sk->sk_write_queue.lock);
709 if (nlk->tx_ring.pg_vec) {
710 if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED))
711 mask |= POLLOUT | POLLWRNORM;
712 }
713 spin_unlock_bh(&sk->sk_write_queue.lock);
714
715 return mask;
716}
717
718static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb)
719{
720 return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN);
721}
722
723static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
724 struct netlink_ring *ring,
725 struct nl_mmap_hdr *hdr)
726{
727 unsigned int size;
728 void *data;
729
730 size = ring->frame_size - NL_MMAP_HDRLEN;
731 data = (void *)hdr + NL_MMAP_HDRLEN;
732
733 skb->head = data;
734 skb->data = data;
735 skb_reset_tail_pointer(skb);
736 skb->end = skb->tail + size;
737 skb->len = 0;
738
739 skb->destructor = netlink_skb_destructor;
740 NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED;
741 NETLINK_CB(skb).sk = sk;
742}
743
744static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
745 u32 dst_portid, u32 dst_group,
746 struct scm_cookie *scm)
747{
748 struct netlink_sock *nlk = nlk_sk(sk);
749 struct netlink_ring *ring;
750 struct nl_mmap_hdr *hdr;
751 struct sk_buff *skb;
752 unsigned int maxlen;
753 int err = 0, len = 0;
754
755 mutex_lock(&nlk->pg_vec_lock);
756
757 ring = &nlk->tx_ring;
758 maxlen = ring->frame_size - NL_MMAP_HDRLEN;
759
760 do {
761 unsigned int nm_len;
762
763 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
764 if (hdr == NULL) {
765 if (!(msg->msg_flags & MSG_DONTWAIT) &&
766 atomic_read(&nlk->tx_ring.pending))
767 schedule();
768 continue;
769 }
770
771 nm_len = ACCESS_ONCE(hdr->nm_len);
772 if (nm_len > maxlen) {
773 err = -EINVAL;
774 goto out;
775 }
776
777 netlink_frame_flush_dcache(hdr, nm_len);
778
779 skb = alloc_skb(nm_len, GFP_KERNEL);
780 if (skb == NULL) {
781 err = -ENOBUFS;
782 goto out;
783 }
784 __skb_put(skb, nm_len);
785 memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len);
786 netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
787
788 netlink_increment_head(ring);
789
790 NETLINK_CB(skb).portid = nlk->portid;
791 NETLINK_CB(skb).dst_group = dst_group;
792 NETLINK_CB(skb).creds = scm->creds;
793
794 err = security_netlink_send(sk, skb);
795 if (err) {
796 kfree_skb(skb);
797 goto out;
798 }
799
800 if (unlikely(dst_group)) {
801 atomic_inc(&skb->users);
802 netlink_broadcast(sk, skb, dst_portid, dst_group,
803 GFP_KERNEL);
804 }
805 err = netlink_unicast(sk, skb, dst_portid,
806 msg->msg_flags & MSG_DONTWAIT);
807 if (err < 0)
808 goto out;
809 len += err;
810
811 } while (hdr != NULL ||
812 (!(msg->msg_flags & MSG_DONTWAIT) &&
813 atomic_read(&nlk->tx_ring.pending)));
814
815 if (len > 0)
816 err = len;
817out:
818 mutex_unlock(&nlk->pg_vec_lock);
819 return err;
820}
821
822static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
823{
824 struct nl_mmap_hdr *hdr;
825
826 hdr = netlink_mmap_hdr(skb);
827 hdr->nm_len = skb->len;
828 hdr->nm_group = NETLINK_CB(skb).dst_group;
829 hdr->nm_pid = NETLINK_CB(skb).creds.pid;
830 hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
831 hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
832 netlink_frame_flush_dcache(hdr, hdr->nm_len);
833 netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
834
835 NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
836 kfree_skb(skb);
837}
838
839static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
840{
841 struct netlink_sock *nlk = nlk_sk(sk);
842 struct netlink_ring *ring = &nlk->rx_ring;
843 struct nl_mmap_hdr *hdr;
844
845 spin_lock_bh(&sk->sk_receive_queue.lock);
846 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
847 if (hdr == NULL) {
848 spin_unlock_bh(&sk->sk_receive_queue.lock);
849 kfree_skb(skb);
850 netlink_overrun(sk);
851 return;
852 }
853 netlink_increment_head(ring);
854 __skb_queue_tail(&sk->sk_receive_queue, skb);
855 spin_unlock_bh(&sk->sk_receive_queue.lock);
856
857 hdr->nm_len = skb->len;
858 hdr->nm_group = NETLINK_CB(skb).dst_group;
859 hdr->nm_pid = NETLINK_CB(skb).creds.pid;
860 hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
861 hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
862 netlink_set_status(hdr, NL_MMAP_STATUS_COPY);
863}
864
865#else /* CONFIG_NETLINK_MMAP */
866#define netlink_rx_is_mmaped(sk) false
867#define netlink_tx_is_mmaped(sk) false
868#define netlink_mmap sock_no_mmap
869#define netlink_poll datagram_poll
870#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, scm) 0
871#endif /* CONFIG_NETLINK_MMAP */
872
873static void netlink_skb_destructor(struct sk_buff *skb) 303static void netlink_skb_destructor(struct sk_buff *skb)
874{ 304{
875#ifdef CONFIG_NETLINK_MMAP
876 struct nl_mmap_hdr *hdr;
877 struct netlink_ring *ring;
878 struct sock *sk;
879
880 /* If a packet from the kernel to userspace was freed because of an
881 * error without being delivered to userspace, the kernel must reset
882 * the status. In the direction userspace to kernel, the status is
883 * always reset here after the packet was processed and freed.
884 */
885 if (netlink_skb_is_mmaped(skb)) {
886 hdr = netlink_mmap_hdr(skb);
887 sk = NETLINK_CB(skb).sk;
888
889 if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) {
890 netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
891 ring = &nlk_sk(sk)->tx_ring;
892 } else {
893 if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) {
894 hdr->nm_len = 0;
895 netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
896 }
897 ring = &nlk_sk(sk)->rx_ring;
898 }
899
900 WARN_ON(atomic_read(&ring->pending) == 0);
901 atomic_dec(&ring->pending);
902 sock_put(sk);
903
904 skb->head = NULL;
905 }
906#endif
907 if (is_vmalloc_addr(skb->head)) { 305 if (is_vmalloc_addr(skb->head)) {
908 if (!skb->cloned || 306 if (!skb->cloned ||
909 !atomic_dec_return(&(skb_shinfo(skb)->dataref))) 307 !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
@@ -937,18 +335,6 @@ static void netlink_sock_destruct(struct sock *sk)
937 } 335 }
938 336
939 skb_queue_purge(&sk->sk_receive_queue); 337 skb_queue_purge(&sk->sk_receive_queue);
940#ifdef CONFIG_NETLINK_MMAP
941 if (1) {
942 struct nl_mmap_req req;
943
944 memset(&req, 0, sizeof(req));
945 if (nlk->rx_ring.pg_vec)
946 __netlink_set_ring(sk, &req, false, NULL, 0);
947 memset(&req, 0, sizeof(req));
948 if (nlk->tx_ring.pg_vec)
949 __netlink_set_ring(sk, &req, true, NULL, 0);
950 }
951#endif /* CONFIG_NETLINK_MMAP */
952 338
953 if (!sock_flag(sk, SOCK_DEAD)) { 339 if (!sock_flag(sk, SOCK_DEAD)) {
954 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); 340 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
@@ -1194,9 +580,6 @@ static int __netlink_create(struct net *net, struct socket *sock,
1194 mutex_init(nlk->cb_mutex); 580 mutex_init(nlk->cb_mutex);
1195 } 581 }
1196 init_waitqueue_head(&nlk->wait); 582 init_waitqueue_head(&nlk->wait);
1197#ifdef CONFIG_NETLINK_MMAP
1198 mutex_init(&nlk->pg_vec_lock);
1199#endif
1200 583
1201 sk->sk_destruct = netlink_sock_destruct; 584 sk->sk_destruct = netlink_sock_destruct;
1202 sk->sk_protocol = protocol; 585 sk->sk_protocol = protocol;
@@ -1305,7 +688,7 @@ static int netlink_release(struct socket *sock)
1305 688
1306 skb_queue_purge(&sk->sk_write_queue); 689 skb_queue_purge(&sk->sk_write_queue);
1307 690
1308 if (nlk->portid) { 691 if (nlk->portid && nlk->bound) {
1309 struct netlink_notify n = { 692 struct netlink_notify n = {
1310 .net = sock_net(sk), 693 .net = sock_net(sk),
1311 .protocol = sk->sk_protocol, 694 .protocol = sk->sk_protocol,
@@ -1650,6 +1033,14 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
1650 return 0; 1033 return 0;
1651} 1034}
1652 1035
1036static int netlink_ioctl(struct socket *sock, unsigned int cmd,
1037 unsigned long arg)
1038{
1039 /* try to hand this ioctl down to the NIC drivers.
1040 */
1041 return -ENOIOCTLCMD;
1042}
1043
1653static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) 1044static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
1654{ 1045{
1655 struct sock *sock; 1046 struct sock *sock;
@@ -1728,8 +1119,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
1728 nlk = nlk_sk(sk); 1119 nlk = nlk_sk(sk);
1729 1120
1730 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1121 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
1731 test_bit(NETLINK_S_CONGESTED, &nlk->state)) && 1122 test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
1732 !netlink_skb_is_mmaped(skb)) {
1733 DECLARE_WAITQUEUE(wait, current); 1123 DECLARE_WAITQUEUE(wait, current);
1734 if (!*timeo) { 1124 if (!*timeo) {
1735 if (!ssk || netlink_is_kernel(ssk)) 1125 if (!ssk || netlink_is_kernel(ssk))
@@ -1767,14 +1157,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1767 1157
1768 netlink_deliver_tap(skb); 1158 netlink_deliver_tap(skb);
1769 1159
1770#ifdef CONFIG_NETLINK_MMAP 1160 skb_queue_tail(&sk->sk_receive_queue, skb);
1771 if (netlink_skb_is_mmaped(skb))
1772 netlink_queue_mmaped_skb(sk, skb);
1773 else if (netlink_rx_is_mmaped(sk))
1774 netlink_ring_set_copied(sk, skb);
1775 else
1776#endif /* CONFIG_NETLINK_MMAP */
1777 skb_queue_tail(&sk->sk_receive_queue, skb);
1778 sk->sk_data_ready(sk); 1161 sk->sk_data_ready(sk);
1779 return len; 1162 return len;
1780} 1163}
@@ -1798,9 +1181,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
1798 int delta; 1181 int delta;
1799 1182
1800 WARN_ON(skb->sk != NULL); 1183 WARN_ON(skb->sk != NULL);
1801 if (netlink_skb_is_mmaped(skb))
1802 return skb;
1803
1804 delta = skb->end - skb->tail; 1184 delta = skb->end - skb->tail;
1805 if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) 1185 if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize)
1806 return skb; 1186 return skb;
@@ -1876,79 +1256,6 @@ retry:
1876} 1256}
1877EXPORT_SYMBOL(netlink_unicast); 1257EXPORT_SYMBOL(netlink_unicast);
1878 1258
1879struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size,
1880 unsigned int ldiff, u32 dst_portid,
1881 gfp_t gfp_mask)
1882{
1883#ifdef CONFIG_NETLINK_MMAP
1884 unsigned int maxlen, linear_size;
1885 struct sock *sk = NULL;
1886 struct sk_buff *skb;
1887 struct netlink_ring *ring;
1888 struct nl_mmap_hdr *hdr;
1889
1890 sk = netlink_getsockbyportid(ssk, dst_portid);
1891 if (IS_ERR(sk))
1892 goto out;
1893
1894 ring = &nlk_sk(sk)->rx_ring;
1895 /* fast-path without atomic ops for common case: non-mmaped receiver */
1896 if (ring->pg_vec == NULL)
1897 goto out_put;
1898
1899 /* We need to account the full linear size needed as a ring
1900 * slot cannot have non-linear parts.
1901 */
1902 linear_size = size + ldiff;
1903 if (ring->frame_size - NL_MMAP_HDRLEN < linear_size)
1904 goto out_put;
1905
1906 skb = alloc_skb_head(gfp_mask);
1907 if (skb == NULL)
1908 goto err1;
1909
1910 spin_lock_bh(&sk->sk_receive_queue.lock);
1911 /* check again under lock */
1912 if (ring->pg_vec == NULL)
1913 goto out_free;
1914
1915 /* check again under lock */
1916 maxlen = ring->frame_size - NL_MMAP_HDRLEN;
1917 if (maxlen < linear_size)
1918 goto out_free;
1919
1920 netlink_forward_ring(ring);
1921 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
1922 if (hdr == NULL)
1923 goto err2;
1924
1925 netlink_ring_setup_skb(skb, sk, ring, hdr);
1926 netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
1927 atomic_inc(&ring->pending);
1928 netlink_increment_head(ring);
1929
1930 spin_unlock_bh(&sk->sk_receive_queue.lock);
1931 return skb;
1932
1933err2:
1934 kfree_skb(skb);
1935 spin_unlock_bh(&sk->sk_receive_queue.lock);
1936 netlink_overrun(sk);
1937err1:
1938 sock_put(sk);
1939 return NULL;
1940
1941out_free:
1942 kfree_skb(skb);
1943 spin_unlock_bh(&sk->sk_receive_queue.lock);
1944out_put:
1945 sock_put(sk);
1946out:
1947#endif
1948 return alloc_skb(size, gfp_mask);
1949}
1950EXPORT_SYMBOL_GPL(__netlink_alloc_skb);
1951
1952int netlink_has_listeners(struct sock *sk, unsigned int group) 1259int netlink_has_listeners(struct sock *sk, unsigned int group)
1953{ 1260{
1954 int res = 0; 1261 int res = 0;
@@ -2225,8 +1532,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
2225 if (level != SOL_NETLINK) 1532 if (level != SOL_NETLINK)
2226 return -ENOPROTOOPT; 1533 return -ENOPROTOOPT;
2227 1534
2228 if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING && 1535 if (optlen >= sizeof(int) &&
2229 optlen >= sizeof(int) &&
2230 get_user(val, (unsigned int __user *)optval)) 1536 get_user(val, (unsigned int __user *)optval))
2231 return -EFAULT; 1537 return -EFAULT;
2232 1538
@@ -2279,25 +1585,6 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
2279 } 1585 }
2280 err = 0; 1586 err = 0;
2281 break; 1587 break;
2282#ifdef CONFIG_NETLINK_MMAP
2283 case NETLINK_RX_RING:
2284 case NETLINK_TX_RING: {
2285 struct nl_mmap_req req;
2286
2287 /* Rings might consume more memory than queue limits, require
2288 * CAP_NET_ADMIN.
2289 */
2290 if (!capable(CAP_NET_ADMIN))
2291 return -EPERM;
2292 if (optlen < sizeof(req))
2293 return -EINVAL;
2294 if (copy_from_user(&req, optval, sizeof(req)))
2295 return -EFAULT;
2296 err = netlink_set_ring(sk, &req,
2297 optname == NETLINK_TX_RING);
2298 break;
2299 }
2300#endif /* CONFIG_NETLINK_MMAP */
2301 case NETLINK_LISTEN_ALL_NSID: 1588 case NETLINK_LISTEN_ALL_NSID:
2302 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST)) 1589 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
2303 return -EPERM; 1590 return -EPERM;
@@ -2467,18 +1754,6 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
2467 smp_rmb(); 1754 smp_rmb();
2468 } 1755 }
2469 1756
2470 /* It's a really convoluted way for userland to ask for mmaped
2471 * sendmsg(), but that's what we've got...
2472 */
2473 if (netlink_tx_is_mmaped(sk) &&
2474 iter_is_iovec(&msg->msg_iter) &&
2475 msg->msg_iter.nr_segs == 1 &&
2476 msg->msg_iter.iov->iov_base == NULL) {
2477 err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
2478 &scm);
2479 goto out;
2480 }
2481
2482 err = -EMSGSIZE; 1757 err = -EMSGSIZE;
2483 if (len > sk->sk_sndbuf - 32) 1758 if (len > sk->sk_sndbuf - 32)
2484 goto out; 1759 goto out;
@@ -2794,8 +2069,7 @@ static int netlink_dump(struct sock *sk)
2794 goto errout_skb; 2069 goto errout_skb;
2795 } 2070 }
2796 2071
2797 if (!netlink_rx_is_mmaped(sk) && 2072 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2798 atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2799 goto errout_skb; 2073 goto errout_skb;
2800 2074
2801 /* NLMSG_GOODSIZE is small to avoid high order allocations being 2075 /* NLMSG_GOODSIZE is small to avoid high order allocations being
@@ -2808,15 +2082,12 @@ static int netlink_dump(struct sock *sk)
2808 2082
2809 if (alloc_min_size < nlk->max_recvmsg_len) { 2083 if (alloc_min_size < nlk->max_recvmsg_len) {
2810 alloc_size = nlk->max_recvmsg_len; 2084 alloc_size = nlk->max_recvmsg_len;
2811 skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, 2085 skb = alloc_skb(alloc_size, GFP_KERNEL |
2812 GFP_KERNEL | 2086 __GFP_NOWARN | __GFP_NORETRY);
2813 __GFP_NOWARN |
2814 __GFP_NORETRY);
2815 } 2087 }
2816 if (!skb) { 2088 if (!skb) {
2817 alloc_size = alloc_min_size; 2089 alloc_size = alloc_min_size;
2818 skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, 2090 skb = alloc_skb(alloc_size, GFP_KERNEL);
2819 GFP_KERNEL);
2820 } 2091 }
2821 if (!skb) 2092 if (!skb)
2822 goto errout_skb; 2093 goto errout_skb;
@@ -2831,8 +2102,7 @@ static int netlink_dump(struct sock *sk)
2831 * reasonable static buffer based on the expected largest dump of a 2102 * reasonable static buffer based on the expected largest dump of a
2832 * single netdev. The outcome is MSG_TRUNC error. 2103 * single netdev. The outcome is MSG_TRUNC error.
2833 */ 2104 */
2834 if (!netlink_rx_is_mmaped(sk)) 2105 skb_reserve(skb, skb_tailroom(skb) - alloc_size);
2835 skb_reserve(skb, skb_tailroom(skb) - alloc_size);
2836 netlink_skb_set_owner_r(skb, sk); 2106 netlink_skb_set_owner_r(skb, sk);
2837 2107
2838 len = cb->dump(skb, cb); 2108 len = cb->dump(skb, cb);
@@ -2884,16 +2154,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
2884 struct netlink_sock *nlk; 2154 struct netlink_sock *nlk;
2885 int ret; 2155 int ret;
2886 2156
2887 /* Memory mapped dump requests need to be copied to avoid looping 2157 atomic_inc(&skb->users);
2888 * on the pending state in netlink_mmap_sendmsg() while the CB hold
2889 * a reference to the skb.
2890 */
2891 if (netlink_skb_is_mmaped(skb)) {
2892 skb = skb_copy(skb, GFP_KERNEL);
2893 if (skb == NULL)
2894 return -ENOBUFS;
2895 } else
2896 atomic_inc(&skb->users);
2897 2158
2898 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); 2159 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
2899 if (sk == NULL) { 2160 if (sk == NULL) {
@@ -2966,8 +2227,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
2966 if (!(nlk->flags & NETLINK_F_CAP_ACK) && err) 2227 if (!(nlk->flags & NETLINK_F_CAP_ACK) && err)
2967 payload += nlmsg_len(nlh); 2228 payload += nlmsg_len(nlh);
2968 2229
2969 skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload), 2230 skb = nlmsg_new(payload, GFP_KERNEL);
2970 NETLINK_CB(in_skb).portid, GFP_KERNEL);
2971 if (!skb) { 2231 if (!skb) {
2972 struct sock *sk; 2232 struct sock *sk;
2973 2233
@@ -3241,15 +2501,15 @@ static const struct proto_ops netlink_ops = {
3241 .socketpair = sock_no_socketpair, 2501 .socketpair = sock_no_socketpair,
3242 .accept = sock_no_accept, 2502 .accept = sock_no_accept,
3243 .getname = netlink_getname, 2503 .getname = netlink_getname,
3244 .poll = netlink_poll, 2504 .poll = datagram_poll,
3245 .ioctl = sock_no_ioctl, 2505 .ioctl = netlink_ioctl,
3246 .listen = sock_no_listen, 2506 .listen = sock_no_listen,
3247 .shutdown = sock_no_shutdown, 2507 .shutdown = sock_no_shutdown,
3248 .setsockopt = netlink_setsockopt, 2508 .setsockopt = netlink_setsockopt,
3249 .getsockopt = netlink_getsockopt, 2509 .getsockopt = netlink_getsockopt,
3250 .sendmsg = netlink_sendmsg, 2510 .sendmsg = netlink_sendmsg,
3251 .recvmsg = netlink_recvmsg, 2511 .recvmsg = netlink_recvmsg,
3252 .mmap = netlink_mmap, 2512 .mmap = sock_no_mmap,
3253 .sendpage = sock_no_sendpage, 2513 .sendpage = sock_no_sendpage,
3254}; 2514};
3255 2515
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 14437d9b1965..e68ef9ccd703 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -44,12 +44,6 @@ struct netlink_sock {
44 int (*netlink_bind)(struct net *net, int group); 44 int (*netlink_bind)(struct net *net, int group);
45 void (*netlink_unbind)(struct net *net, int group); 45 void (*netlink_unbind)(struct net *net, int group);
46 struct module *module; 46 struct module *module;
47#ifdef CONFIG_NETLINK_MMAP
48 struct mutex pg_vec_lock;
49 struct netlink_ring rx_ring;
50 struct netlink_ring tx_ring;
51 atomic_t mapped;
52#endif /* CONFIG_NETLINK_MMAP */
53 47
54 struct rhash_head node; 48 struct rhash_head node;
55 struct rcu_head rcu; 49 struct rcu_head rcu;
@@ -60,15 +54,6 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk)
60 return container_of(sk, struct netlink_sock, sk); 54 return container_of(sk, struct netlink_sock, sk);
61} 55}
62 56
63static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb)
64{
65#ifdef CONFIG_NETLINK_MMAP
66 return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
67#else
68 return false;
69#endif /* CONFIG_NETLINK_MMAP */
70}
71
72struct netlink_table { 57struct netlink_table {
73 struct rhashtable hash; 58 struct rhashtable hash;
74 struct hlist_head mc_list; 59 struct hlist_head mc_list;
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 3ee63a3cff30..8dd836a8dd60 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -8,41 +8,6 @@
8 8
9#include "af_netlink.h" 9#include "af_netlink.h"
10 10
11#ifdef CONFIG_NETLINK_MMAP
12static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type,
13 struct sk_buff *nlskb)
14{
15 struct netlink_diag_ring ndr;
16
17 ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT;
18 ndr.ndr_block_nr = ring->pg_vec_len;
19 ndr.ndr_frame_size = ring->frame_size;
20 ndr.ndr_frame_nr = ring->frame_max + 1;
21
22 return nla_put(nlskb, nl_type, sizeof(ndr), &ndr);
23}
24
25static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
26{
27 struct netlink_sock *nlk = nlk_sk(sk);
28 int ret;
29
30 mutex_lock(&nlk->pg_vec_lock);
31 ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb);
32 if (!ret)
33 ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING,
34 nlskb);
35 mutex_unlock(&nlk->pg_vec_lock);
36
37 return ret;
38}
39#else
40static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
41{
42 return 0;
43}
44#endif
45
46static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb) 11static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb)
47{ 12{
48 struct netlink_sock *nlk = nlk_sk(sk); 13 struct netlink_sock *nlk = nlk_sk(sk);
@@ -87,10 +52,6 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
87 sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO)) 52 sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO))
88 goto out_nlmsg_trim; 53 goto out_nlmsg_trim;
89 54
90 if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) &&
91 sk_diag_put_rings_cfg(sk, skb))
92 goto out_nlmsg_trim;
93
94 nlmsg_end(skb, nlh); 55 nlmsg_end(skb, nlh);
95 return 0; 56 return 0;
96 57
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index f830326b3b1d..a09132a69869 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -463,26 +463,6 @@ int genl_unregister_family(struct genl_family *family)
463EXPORT_SYMBOL(genl_unregister_family); 463EXPORT_SYMBOL(genl_unregister_family);
464 464
465/** 465/**
466 * genlmsg_new_unicast - Allocate generic netlink message for unicast
467 * @payload: size of the message payload
468 * @info: information on destination
469 * @flags: the type of memory to allocate
470 *
471 * Allocates a new sk_buff large enough to cover the specified payload
472 * plus required Netlink headers. Will check receiving socket for
473 * memory mapped i/o capability and use it if enabled. Will fall back
474 * to non-mapped skb if message size exceeds the frame size of the ring.
475 */
476struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info,
477 gfp_t flags)
478{
479 size_t len = nlmsg_total_size(genlmsg_total_size(payload));
480
481 return netlink_alloc_skb(info->dst_sk, len, info->snd_portid, flags);
482}
483EXPORT_SYMBOL_GPL(genlmsg_new_unicast);
484
485/**
486 * genlmsg_put - Add generic netlink header to netlink message 466 * genlmsg_put - Add generic netlink header to netlink message
487 * @skb: socket buffer holding the message 467 * @skb: socket buffer holding the message
488 * @portid: netlink portid the message is addressed to 468 * @portid: netlink portid the message is addressed to
@@ -580,6 +560,10 @@ static int genl_family_rcv_msg(struct genl_family *family,
580 !netlink_capable(skb, CAP_NET_ADMIN)) 560 !netlink_capable(skb, CAP_NET_ADMIN))
581 return -EPERM; 561 return -EPERM;
582 562
563 if ((ops->flags & GENL_UNS_ADMIN_PERM) &&
564 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
565 return -EPERM;
566
583 if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { 567 if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
584 int rc; 568 int rc;
585 569
@@ -638,7 +622,6 @@ static int genl_family_rcv_msg(struct genl_family *family,
638 info.genlhdr = nlmsg_data(nlh); 622 info.genlhdr = nlmsg_data(nlh);
639 info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; 623 info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
640 info.attrs = attrbuf; 624 info.attrs = attrbuf;
641 info.dst_sk = skb->sk;
642 genl_info_net_set(&info, net); 625 genl_info_net_set(&info, net);
643 memset(&info.user_ptr, 0, sizeof(info.user_ptr)); 626 memset(&info.user_ptr, 0, sizeof(info.user_ptr));
644 627
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index 3621a902cb6e..3425532c39f7 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -663,7 +663,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
663 return -ENOBUFS; 663 return -ENOBUFS;
664 } 664 }
665 665
666 msg_data = kzalloc(len, GFP_KERNEL); 666 msg_data = kmalloc(len, GFP_USER | __GFP_NOWARN);
667 if (msg_data == NULL) 667 if (msg_data == NULL)
668 return -ENOMEM; 668 return -ENOMEM;
669 669
@@ -729,7 +729,7 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,
729 if (local == NULL) 729 if (local == NULL)
730 return -ENODEV; 730 return -ENODEV;
731 731
732 msg_data = kzalloc(len, GFP_KERNEL); 732 msg_data = kmalloc(len, GFP_USER | __GFP_NOWARN);
733 if (msg_data == NULL) 733 if (msg_data == NULL)
734 return -ENOMEM; 734 return -ENOMEM;
735 735
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index ecf0a0196f18..b9edf5fae6ae 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -509,6 +509,11 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
509 memset(llcp_addr, 0, sizeof(*llcp_addr)); 509 memset(llcp_addr, 0, sizeof(*llcp_addr));
510 *len = sizeof(struct sockaddr_nfc_llcp); 510 *len = sizeof(struct sockaddr_nfc_llcp);
511 511
512 lock_sock(sk);
513 if (!llcp_sock->dev) {
514 release_sock(sk);
515 return -EBADFD;
516 }
512 llcp_addr->sa_family = AF_NFC; 517 llcp_addr->sa_family = AF_NFC;
513 llcp_addr->dev_idx = llcp_sock->dev->idx; 518 llcp_addr->dev_idx = llcp_sock->dev->idx;
514 llcp_addr->target_idx = llcp_sock->target_idx; 519 llcp_addr->target_idx = llcp_sock->target_idx;
@@ -518,6 +523,7 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
518 llcp_addr->service_name_len = llcp_sock->service_name_len; 523 llcp_addr->service_name_len = llcp_sock->service_name_len;
519 memcpy(llcp_addr->service_name, llcp_sock->service_name, 524 memcpy(llcp_addr->service_name, llcp_sock->service_name,
520 llcp_addr->service_name_len); 525 llcp_addr->service_name_len);
526 release_sock(sk);
521 527
522 return 0; 528 return 0;
523} 529}
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index 21d8875673a4..c468eabd6943 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -171,14 +171,7 @@ static int nci_uart_tty_open(struct tty_struct *tty)
171 tty->disc_data = NULL; 171 tty->disc_data = NULL;
172 tty->receive_room = 65536; 172 tty->receive_room = 65536;
173 173
174 /* Flush any pending characters in the driver and line discipline. */ 174 /* Flush any pending characters in the driver */
175
176 /* FIXME: why is this needed. Note don't use ldisc_ref here as the
177 * open path is before the ldisc is referencable.
178 */
179
180 if (tty->ldisc->ops->flush_buffer)
181 tty->ldisc->ops->flush_buffer(tty);
182 tty_driver_flush_buffer(tty); 175 tty_driver_flush_buffer(tty);
183 176
184 return 0; 177 return 0;
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index d143aa9f6654..ce947292ae77 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -6,10 +6,14 @@ config OPENVSWITCH
6 tristate "Open vSwitch" 6 tristate "Open vSwitch"
7 depends on INET 7 depends on INET
8 depends on !NF_CONNTRACK || \ 8 depends on !NF_CONNTRACK || \
9 (NF_CONNTRACK && (!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6)) 9 (NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \
10 (!NF_NAT || NF_NAT) && \
11 (!NF_NAT_IPV4 || NF_NAT_IPV4) && \
12 (!NF_NAT_IPV6 || NF_NAT_IPV6)))
10 select LIBCRC32C 13 select LIBCRC32C
11 select MPLS 14 select MPLS
12 select NET_MPLS_GSO 15 select NET_MPLS_GSO
16 select DST_CACHE
13 ---help--- 17 ---help---
14 Open vSwitch is a multilayer Ethernet switch targeted at virtualized 18 Open vSwitch is a multilayer Ethernet switch targeted at virtualized
15 environments. In addition to supporting a variety of features 19 environments. In addition to supporting a variety of features
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 2d59df521915..879185fe183f 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -158,9 +158,7 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
158 new_mpls_lse = (__be32 *)skb_mpls_header(skb); 158 new_mpls_lse = (__be32 *)skb_mpls_header(skb);
159 *new_mpls_lse = mpls->mpls_lse; 159 *new_mpls_lse = mpls->mpls_lse;
160 160
161 if (skb->ip_summed == CHECKSUM_COMPLETE) 161 skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
162 skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
163 MPLS_HLEN, 0));
164 162
165 hdr = eth_hdr(skb); 163 hdr = eth_hdr(skb);
166 hdr->h_proto = mpls->mpls_ethertype; 164 hdr->h_proto = mpls->mpls_ethertype;
@@ -280,7 +278,7 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
280 ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst, 278 ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
281 mask->eth_dst); 279 mask->eth_dst);
282 280
283 ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); 281 skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
284 282
285 ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source); 283 ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
286 ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest); 284 ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
@@ -463,7 +461,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
463 mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked); 461 mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
464 462
465 if (unlikely(memcmp(saddr, masked, sizeof(masked)))) { 463 if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
466 set_ipv6_addr(skb, key->ipv6_proto, saddr, masked, 464 set_ipv6_addr(skb, flow_key->ip.proto, saddr, masked,
467 true); 465 true);
468 memcpy(&flow_key->ipv6.addr.src, masked, 466 memcpy(&flow_key->ipv6.addr.src, masked,
469 sizeof(flow_key->ipv6.addr.src)); 467 sizeof(flow_key->ipv6.addr.src));
@@ -485,7 +483,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
485 NULL, &flags) 483 NULL, &flags)
486 != NEXTHDR_ROUTING); 484 != NEXTHDR_ROUTING);
487 485
488 set_ipv6_addr(skb, key->ipv6_proto, daddr, masked, 486 set_ipv6_addr(skb, flow_key->ip.proto, daddr, masked,
489 recalc_csum); 487 recalc_csum);
490 memcpy(&flow_key->ipv6.addr.dst, masked, 488 memcpy(&flow_key->ipv6.addr.dst, masked,
491 sizeof(flow_key->ipv6.addr.dst)); 489 sizeof(flow_key->ipv6.addr.dst));
@@ -639,7 +637,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk
639 /* Reconstruct the MAC header. */ 637 /* Reconstruct the MAC header. */
640 skb_push(skb, data->l2_len); 638 skb_push(skb, data->l2_len);
641 memcpy(skb->data, &data->l2_data, data->l2_len); 639 memcpy(skb->data, &data->l2_data, data->l2_len);
642 ovs_skb_postpush_rcsum(skb, skb->data, data->l2_len); 640 skb_postpush_rcsum(skb, skb->data, data->l2_len);
643 skb_reset_mac_header(skb); 641 skb_reset_mac_header(skb);
644 642
645 ovs_vport_send(vport, skb); 643 ovs_vport_send(vport, skb);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index ee6ff8ffc12d..10c84d882881 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -13,21 +13,31 @@
13 13
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/openvswitch.h> 15#include <linux/openvswitch.h>
16#include <linux/tcp.h>
17#include <linux/udp.h>
18#include <linux/sctp.h>
16#include <net/ip.h> 19#include <net/ip.h>
17#include <net/netfilter/nf_conntrack_core.h> 20#include <net/netfilter/nf_conntrack_core.h>
18#include <net/netfilter/nf_conntrack_helper.h> 21#include <net/netfilter/nf_conntrack_helper.h>
19#include <net/netfilter/nf_conntrack_labels.h> 22#include <net/netfilter/nf_conntrack_labels.h>
23#include <net/netfilter/nf_conntrack_seqadj.h>
20#include <net/netfilter/nf_conntrack_zones.h> 24#include <net/netfilter/nf_conntrack_zones.h>
21#include <net/netfilter/ipv6/nf_defrag_ipv6.h> 25#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
22 26
27#ifdef CONFIG_NF_NAT_NEEDED
28#include <linux/netfilter/nf_nat.h>
29#include <net/netfilter/nf_nat_core.h>
30#include <net/netfilter/nf_nat_l3proto.h>
31#endif
32
23#include "datapath.h" 33#include "datapath.h"
24#include "conntrack.h" 34#include "conntrack.h"
25#include "flow.h" 35#include "flow.h"
26#include "flow_netlink.h" 36#include "flow_netlink.h"
27 37
28struct ovs_ct_len_tbl { 38struct ovs_ct_len_tbl {
29 size_t maxlen; 39 int maxlen;
30 size_t minlen; 40 int minlen;
31}; 41};
32 42
33/* Metadata mark for masked write to conntrack mark */ 43/* Metadata mark for masked write to conntrack mark */
@@ -42,15 +52,25 @@ struct md_labels {
42 struct ovs_key_ct_labels mask; 52 struct ovs_key_ct_labels mask;
43}; 53};
44 54
55enum ovs_ct_nat {
56 OVS_CT_NAT = 1 << 0, /* NAT for committed connections only. */
57 OVS_CT_SRC_NAT = 1 << 1, /* Source NAT for NEW connections. */
58 OVS_CT_DST_NAT = 1 << 2, /* Destination NAT for NEW connections. */
59};
60
45/* Conntrack action context for execution. */ 61/* Conntrack action context for execution. */
46struct ovs_conntrack_info { 62struct ovs_conntrack_info {
47 struct nf_conntrack_helper *helper; 63 struct nf_conntrack_helper *helper;
48 struct nf_conntrack_zone zone; 64 struct nf_conntrack_zone zone;
49 struct nf_conn *ct; 65 struct nf_conn *ct;
50 u8 commit : 1; 66 u8 commit : 1;
67 u8 nat : 3; /* enum ovs_ct_nat */
51 u16 family; 68 u16 family;
52 struct md_mark mark; 69 struct md_mark mark;
53 struct md_labels labels; 70 struct md_labels labels;
71#ifdef CONFIG_NF_NAT_NEEDED
72 struct nf_nat_range range; /* Only present for SRC NAT and DST NAT. */
73#endif
54}; 74};
55 75
56static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info); 76static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info);
@@ -75,7 +95,6 @@ static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
75 switch (ctinfo) { 95 switch (ctinfo) {
76 case IP_CT_ESTABLISHED_REPLY: 96 case IP_CT_ESTABLISHED_REPLY:
77 case IP_CT_RELATED_REPLY: 97 case IP_CT_RELATED_REPLY:
78 case IP_CT_NEW_REPLY:
79 ct_state |= OVS_CS_F_REPLY_DIR; 98 ct_state |= OVS_CS_F_REPLY_DIR;
80 break; 99 break;
81 default: 100 default:
@@ -92,7 +111,6 @@ static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
92 ct_state |= OVS_CS_F_RELATED; 111 ct_state |= OVS_CS_F_RELATED;
93 break; 112 break;
94 case IP_CT_NEW: 113 case IP_CT_NEW:
95 case IP_CT_NEW_REPLY:
96 ct_state |= OVS_CS_F_NEW; 114 ct_state |= OVS_CS_F_NEW;
97 break; 115 break;
98 default: 116 default:
@@ -139,12 +157,15 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
139 ovs_ct_get_labels(ct, &key->ct.labels); 157 ovs_ct_get_labels(ct, &key->ct.labels);
140} 158}
141 159
142/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has 160/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has
143 * previously sent the packet to conntrack via the ct action. 161 * previously sent the packet to conntrack via the ct action. If
162 * 'keep_nat_flags' is true, the existing NAT flags retained, else they are
163 * initialized from the connection status.
144 */ 164 */
145static void ovs_ct_update_key(const struct sk_buff *skb, 165static void ovs_ct_update_key(const struct sk_buff *skb,
146 const struct ovs_conntrack_info *info, 166 const struct ovs_conntrack_info *info,
147 struct sw_flow_key *key, bool post_ct) 167 struct sw_flow_key *key, bool post_ct,
168 bool keep_nat_flags)
148{ 169{
149 const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; 170 const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
150 enum ip_conntrack_info ctinfo; 171 enum ip_conntrack_info ctinfo;
@@ -154,10 +175,22 @@ static void ovs_ct_update_key(const struct sk_buff *skb,
154 ct = nf_ct_get(skb, &ctinfo); 175 ct = nf_ct_get(skb, &ctinfo);
155 if (ct) { 176 if (ct) {
156 state = ovs_ct_get_state(ctinfo); 177 state = ovs_ct_get_state(ctinfo);
178 /* All unconfirmed entries are NEW connections. */
157 if (!nf_ct_is_confirmed(ct)) 179 if (!nf_ct_is_confirmed(ct))
158 state |= OVS_CS_F_NEW; 180 state |= OVS_CS_F_NEW;
181 /* OVS persists the related flag for the duration of the
182 * connection.
183 */
159 if (ct->master) 184 if (ct->master)
160 state |= OVS_CS_F_RELATED; 185 state |= OVS_CS_F_RELATED;
186 if (keep_nat_flags) {
187 state |= key->ct.state & OVS_CS_F_NAT_MASK;
188 } else {
189 if (ct->status & IPS_SRC_NAT)
190 state |= OVS_CS_F_SRC_NAT;
191 if (ct->status & IPS_DST_NAT)
192 state |= OVS_CS_F_DST_NAT;
193 }
161 zone = nf_ct_zone(ct); 194 zone = nf_ct_zone(ct);
162 } else if (post_ct) { 195 } else if (post_ct) {
163 state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID; 196 state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
@@ -167,9 +200,12 @@ static void ovs_ct_update_key(const struct sk_buff *skb,
167 __ovs_ct_update_key(key, state, zone, ct); 200 __ovs_ct_update_key(key, state, zone, ct);
168} 201}
169 202
203/* This is called to initialize CT key fields possibly coming in from the local
204 * stack.
205 */
170void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) 206void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
171{ 207{
172 ovs_ct_update_key(skb, NULL, key, false); 208 ovs_ct_update_key(skb, NULL, key, false, false);
173} 209}
174 210
175int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) 211int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
@@ -201,7 +237,6 @@ static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key,
201 struct nf_conn *ct; 237 struct nf_conn *ct;
202 u32 new_mark; 238 u32 new_mark;
203 239
204
205 /* The connection could be invalid, in which case set_mark is no-op. */ 240 /* The connection could be invalid, in which case set_mark is no-op. */
206 ct = nf_ct_get(skb, &ctinfo); 241 ct = nf_ct_get(skb, &ctinfo);
207 if (!ct) 242 if (!ct)
@@ -259,6 +294,7 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
259 enum ip_conntrack_info ctinfo; 294 enum ip_conntrack_info ctinfo;
260 unsigned int protoff; 295 unsigned int protoff;
261 struct nf_conn *ct; 296 struct nf_conn *ct;
297 int err;
262 298
263 ct = nf_ct_get(skb, &ctinfo); 299 ct = nf_ct_get(skb, &ctinfo);
264 if (!ct || ctinfo == IP_CT_RELATED_REPLY) 300 if (!ct || ctinfo == IP_CT_RELATED_REPLY)
@@ -295,7 +331,18 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
295 return NF_DROP; 331 return NF_DROP;
296 } 332 }
297 333
298 return helper->help(skb, protoff, ct, ctinfo); 334 err = helper->help(skb, protoff, ct, ctinfo);
335 if (err != NF_ACCEPT)
336 return err;
337
338 /* Adjust seqs after helper. This is needed due to some helpers (e.g.,
339 * FTP with NAT) adusting the TCP payload size when mangling IP
340 * addresses and/or port numbers in the text-based control connection.
341 */
342 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
343 !nf_ct_seq_adjust(skb, ct, ctinfo, protoff))
344 return NF_DROP;
345 return NF_ACCEPT;
299} 346}
300 347
301/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero 348/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
@@ -320,6 +367,7 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
320 } else if (key->eth.type == htons(ETH_P_IPV6)) { 367 } else if (key->eth.type == htons(ETH_P_IPV6)) {
321 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; 368 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
322 369
370 skb_orphan(skb);
323 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); 371 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
324 err = nf_ct_frag6_gather(net, skb, user); 372 err = nf_ct_frag6_gather(net, skb, user);
325 if (err) 373 if (err)
@@ -352,14 +400,101 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
352 return __nf_ct_expect_find(net, zone, &tuple); 400 return __nf_ct_expect_find(net, zone, &tuple);
353} 401}
354 402
403/* This replicates logic from nf_conntrack_core.c that is not exported. */
404static enum ip_conntrack_info
405ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h)
406{
407 const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
408
409 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
410 return IP_CT_ESTABLISHED_REPLY;
411 /* Once we've had two way comms, always ESTABLISHED. */
412 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status))
413 return IP_CT_ESTABLISHED;
414 if (test_bit(IPS_EXPECTED_BIT, &ct->status))
415 return IP_CT_RELATED;
416 return IP_CT_NEW;
417}
418
419/* Find an existing connection which this packet belongs to without
420 * re-attributing statistics or modifying the connection state. This allows an
421 * skb->nfct lost due to an upcall to be recovered during actions execution.
422 *
423 * Must be called with rcu_read_lock.
424 *
425 * On success, populates skb->nfct and skb->nfctinfo, and returns the
426 * connection. Returns NULL if there is no existing entry.
427 */
428static struct nf_conn *
429ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
430 u8 l3num, struct sk_buff *skb)
431{
432 struct nf_conntrack_l3proto *l3proto;
433 struct nf_conntrack_l4proto *l4proto;
434 struct nf_conntrack_tuple tuple;
435 struct nf_conntrack_tuple_hash *h;
436 enum ip_conntrack_info ctinfo;
437 struct nf_conn *ct;
438 unsigned int dataoff;
439 u8 protonum;
440
441 l3proto = __nf_ct_l3proto_find(l3num);
442 if (!l3proto) {
443 pr_debug("ovs_ct_find_existing: Can't get l3proto\n");
444 return NULL;
445 }
446 if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
447 &protonum) <= 0) {
448 pr_debug("ovs_ct_find_existing: Can't get protonum\n");
449 return NULL;
450 }
451 l4proto = __nf_ct_l4proto_find(l3num, protonum);
452 if (!l4proto) {
453 pr_debug("ovs_ct_find_existing: Can't get l4proto\n");
454 return NULL;
455 }
456 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
457 protonum, net, &tuple, l3proto, l4proto)) {
458 pr_debug("ovs_ct_find_existing: Can't get tuple\n");
459 return NULL;
460 }
461
462 /* look for tuple match */
463 h = nf_conntrack_find_get(net, zone, &tuple);
464 if (!h)
465 return NULL; /* Not found. */
466
467 ct = nf_ct_tuplehash_to_ctrack(h);
468
469 ctinfo = ovs_ct_get_info(h);
470 if (ctinfo == IP_CT_NEW) {
471 /* This should not happen. */
472 WARN_ONCE(1, "ovs_ct_find_existing: new packet for %p\n", ct);
473 }
474 skb->nfct = &ct->ct_general;
475 skb->nfctinfo = ctinfo;
476 return ct;
477}
478
355/* Determine whether skb->nfct is equal to the result of conntrack lookup. */ 479/* Determine whether skb->nfct is equal to the result of conntrack lookup. */
356static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb, 480static bool skb_nfct_cached(struct net *net,
357 const struct ovs_conntrack_info *info) 481 const struct sw_flow_key *key,
482 const struct ovs_conntrack_info *info,
483 struct sk_buff *skb)
358{ 484{
359 enum ip_conntrack_info ctinfo; 485 enum ip_conntrack_info ctinfo;
360 struct nf_conn *ct; 486 struct nf_conn *ct;
361 487
362 ct = nf_ct_get(skb, &ctinfo); 488 ct = nf_ct_get(skb, &ctinfo);
489 /* If no ct, check if we have evidence that an existing conntrack entry
490 * might be found for this skb. This happens when we lose a skb->nfct
491 * due to an upcall. If the connection was not confirmed, it is not
492 * cached and needs to be run through conntrack again.
493 */
494 if (!ct && key->ct.state & OVS_CS_F_TRACKED &&
495 !(key->ct.state & OVS_CS_F_INVALID) &&
496 key->ct.zone == info->zone.id)
497 ct = ovs_ct_find_existing(net, &info->zone, info->family, skb);
363 if (!ct) 498 if (!ct)
364 return false; 499 return false;
365 if (!net_eq(net, read_pnet(&ct->ct_net))) 500 if (!net_eq(net, read_pnet(&ct->ct_net)))
@@ -377,6 +512,207 @@ static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb,
377 return true; 512 return true;
378} 513}
379 514
515#ifdef CONFIG_NF_NAT_NEEDED
516/* Modelled after nf_nat_ipv[46]_fn().
517 * range is only used for new, uninitialized NAT state.
518 * Returns either NF_ACCEPT or NF_DROP.
519 */
520static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
521 enum ip_conntrack_info ctinfo,
522 const struct nf_nat_range *range,
523 enum nf_nat_manip_type maniptype)
524{
525 int hooknum, nh_off, err = NF_ACCEPT;
526
527 nh_off = skb_network_offset(skb);
528 skb_pull(skb, nh_off);
529
530 /* See HOOK2MANIP(). */
531 if (maniptype == NF_NAT_MANIP_SRC)
532 hooknum = NF_INET_LOCAL_IN; /* Source NAT */
533 else
534 hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */
535
536 switch (ctinfo) {
537 case IP_CT_RELATED:
538 case IP_CT_RELATED_REPLY:
539 if (IS_ENABLED(CONFIG_NF_NAT_IPV4) &&
540 skb->protocol == htons(ETH_P_IP) &&
541 ip_hdr(skb)->protocol == IPPROTO_ICMP) {
542 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
543 hooknum))
544 err = NF_DROP;
545 goto push;
546 } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) &&
547 skb->protocol == htons(ETH_P_IPV6)) {
548 __be16 frag_off;
549 u8 nexthdr = ipv6_hdr(skb)->nexthdr;
550 int hdrlen = ipv6_skip_exthdr(skb,
551 sizeof(struct ipv6hdr),
552 &nexthdr, &frag_off);
553
554 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
555 if (!nf_nat_icmpv6_reply_translation(skb, ct,
556 ctinfo,
557 hooknum,
558 hdrlen))
559 err = NF_DROP;
560 goto push;
561 }
562 }
563 /* Non-ICMP, fall thru to initialize if needed. */
564 case IP_CT_NEW:
565 /* Seen it before? This can happen for loopback, retrans,
566 * or local packets.
567 */
568 if (!nf_nat_initialized(ct, maniptype)) {
569 /* Initialize according to the NAT action. */
570 err = (range && range->flags & NF_NAT_RANGE_MAP_IPS)
571 /* Action is set up to establish a new
572 * mapping.
573 */
574 ? nf_nat_setup_info(ct, range, maniptype)
575 : nf_nat_alloc_null_binding(ct, hooknum);
576 if (err != NF_ACCEPT)
577 goto push;
578 }
579 break;
580
581 case IP_CT_ESTABLISHED:
582 case IP_CT_ESTABLISHED_REPLY:
583 break;
584
585 default:
586 err = NF_DROP;
587 goto push;
588 }
589
590 err = nf_nat_packet(ct, ctinfo, hooknum, skb);
591push:
592 skb_push(skb, nh_off);
593
594 return err;
595}
596
597static void ovs_nat_update_key(struct sw_flow_key *key,
598 const struct sk_buff *skb,
599 enum nf_nat_manip_type maniptype)
600{
601 if (maniptype == NF_NAT_MANIP_SRC) {
602 __be16 src;
603
604 key->ct.state |= OVS_CS_F_SRC_NAT;
605 if (key->eth.type == htons(ETH_P_IP))
606 key->ipv4.addr.src = ip_hdr(skb)->saddr;
607 else if (key->eth.type == htons(ETH_P_IPV6))
608 memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,
609 sizeof(key->ipv6.addr.src));
610 else
611 return;
612
613 if (key->ip.proto == IPPROTO_UDP)
614 src = udp_hdr(skb)->source;
615 else if (key->ip.proto == IPPROTO_TCP)
616 src = tcp_hdr(skb)->source;
617 else if (key->ip.proto == IPPROTO_SCTP)
618 src = sctp_hdr(skb)->source;
619 else
620 return;
621
622 key->tp.src = src;
623 } else {
624 __be16 dst;
625
626 key->ct.state |= OVS_CS_F_DST_NAT;
627 if (key->eth.type == htons(ETH_P_IP))
628 key->ipv4.addr.dst = ip_hdr(skb)->daddr;
629 else if (key->eth.type == htons(ETH_P_IPV6))
630 memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,
631 sizeof(key->ipv6.addr.dst));
632 else
633 return;
634
635 if (key->ip.proto == IPPROTO_UDP)
636 dst = udp_hdr(skb)->dest;
637 else if (key->ip.proto == IPPROTO_TCP)
638 dst = tcp_hdr(skb)->dest;
639 else if (key->ip.proto == IPPROTO_SCTP)
640 dst = sctp_hdr(skb)->dest;
641 else
642 return;
643
644 key->tp.dst = dst;
645 }
646}
647
648/* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */
649static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
650 const struct ovs_conntrack_info *info,
651 struct sk_buff *skb, struct nf_conn *ct,
652 enum ip_conntrack_info ctinfo)
653{
654 enum nf_nat_manip_type maniptype;
655 int err;
656
657 if (nf_ct_is_untracked(ct)) {
658 /* A NAT action may only be performed on tracked packets. */
659 return NF_ACCEPT;
660 }
661
662 /* Add NAT extension if not confirmed yet. */
663 if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct))
664 return NF_ACCEPT; /* Can't NAT. */
665
666 /* Determine NAT type.
667 * Check if the NAT type can be deduced from the tracked connection.
668 * Make sure new expected connections (IP_CT_RELATED) are NATted only
669 * when committing.
670 */
671 if (info->nat & OVS_CT_NAT && ctinfo != IP_CT_NEW &&
672 ct->status & IPS_NAT_MASK &&
673 (ctinfo != IP_CT_RELATED || info->commit)) {
674 /* NAT an established or related connection like before. */
675 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY)
676 /* This is the REPLY direction for a connection
677 * for which NAT was applied in the forward
678 * direction. Do the reverse NAT.
679 */
680 maniptype = ct->status & IPS_SRC_NAT
681 ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC;
682 else
683 maniptype = ct->status & IPS_SRC_NAT
684 ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST;
685 } else if (info->nat & OVS_CT_SRC_NAT) {
686 maniptype = NF_NAT_MANIP_SRC;
687 } else if (info->nat & OVS_CT_DST_NAT) {
688 maniptype = NF_NAT_MANIP_DST;
689 } else {
690 return NF_ACCEPT; /* Connection is not NATed. */
691 }
692 err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype);
693
694 /* Mark NAT done if successful and update the flow key. */
695 if (err == NF_ACCEPT)
696 ovs_nat_update_key(key, skb, maniptype);
697
698 return err;
699}
700#else /* !CONFIG_NF_NAT_NEEDED */
701static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
702 const struct ovs_conntrack_info *info,
703 struct sk_buff *skb, struct nf_conn *ct,
704 enum ip_conntrack_info ctinfo)
705{
706 return NF_ACCEPT;
707}
708#endif
709
710/* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if
711 * not done already. Update key with new CT state after passing the packet
712 * through conntrack.
713 * Note that if the packet is deemed invalid by conntrack, skb->nfct will be
714 * set to NULL and 0 will be returned.
715 */
380static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, 716static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
381 const struct ovs_conntrack_info *info, 717 const struct ovs_conntrack_info *info,
382 struct sk_buff *skb) 718 struct sk_buff *skb)
@@ -386,8 +722,13 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
386 * actually run the packet through conntrack twice unless it's for a 722 * actually run the packet through conntrack twice unless it's for a
387 * different zone. 723 * different zone.
388 */ 724 */
389 if (!skb_nfct_cached(net, skb, info)) { 725 bool cached = skb_nfct_cached(net, key, info, skb);
726 enum ip_conntrack_info ctinfo;
727 struct nf_conn *ct;
728
729 if (!cached) {
390 struct nf_conn *tmpl = info->ct; 730 struct nf_conn *tmpl = info->ct;
731 int err;
391 732
392 /* Associate skb with specified zone. */ 733 /* Associate skb with specified zone. */
393 if (tmpl) { 734 if (tmpl) {
@@ -398,17 +739,66 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
398 skb->nfctinfo = IP_CT_NEW; 739 skb->nfctinfo = IP_CT_NEW;
399 } 740 }
400 741
401 if (nf_conntrack_in(net, info->family, NF_INET_PRE_ROUTING, 742 /* Repeat if requested, see nf_iterate(). */
402 skb) != NF_ACCEPT) 743 do {
744 err = nf_conntrack_in(net, info->family,
745 NF_INET_PRE_ROUTING, skb);
746 } while (err == NF_REPEAT);
747
748 if (err != NF_ACCEPT)
403 return -ENOENT; 749 return -ENOENT;
404 750
405 if (ovs_ct_helper(skb, info->family) != NF_ACCEPT) { 751 /* Clear CT state NAT flags to mark that we have not yet done
406 WARN_ONCE(1, "helper rejected packet"); 752 * NAT after the nf_conntrack_in() call. We can actually clear
753 * the whole state, as it will be re-initialized below.
754 */
755 key->ct.state = 0;
756
757 /* Update the key, but keep the NAT flags. */
758 ovs_ct_update_key(skb, info, key, true, true);
759 }
760
761 ct = nf_ct_get(skb, &ctinfo);
762 if (ct) {
763 /* Packets starting a new connection must be NATted before the
764 * helper, so that the helper knows about the NAT. We enforce
765 * this by delaying both NAT and helper calls for unconfirmed
766 * connections until the committing CT action. For later
767 * packets NAT and Helper may be called in either order.
768 *
769 * NAT will be done only if the CT action has NAT, and only
770 * once per packet (per zone), as guarded by the NAT bits in
771 * the key->ct.state.
772 */
773 if (info->nat && !(key->ct.state & OVS_CS_F_NAT_MASK) &&
774 (nf_ct_is_confirmed(ct) || info->commit) &&
775 ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) {
407 return -EINVAL; 776 return -EINVAL;
408 } 777 }
409 }
410 778
411 ovs_ct_update_key(skb, info, key, true); 779 /* Userspace may decide to perform a ct lookup without a helper
780 * specified followed by a (recirculate and) commit with one.
781 * Therefore, for unconfirmed connections which we will commit,
782 * we need to attach the helper here.
783 */
784 if (!nf_ct_is_confirmed(ct) && info->commit &&
785 info->helper && !nfct_help(ct)) {
786 int err = __nf_ct_try_assign_helper(ct, info->ct,
787 GFP_ATOMIC);
788 if (err)
789 return err;
790 }
791
792 /* Call the helper only if:
793 * - nf_conntrack_in() was executed above ("!cached") for a
794 * confirmed connection, or
795 * - When committing an unconfirmed connection.
796 */
797 if ((nf_ct_is_confirmed(ct) ? !cached : info->commit) &&
798 ovs_ct_helper(skb, info->family) != NF_ACCEPT) {
799 return -EINVAL;
800 }
801 }
412 802
413 return 0; 803 return 0;
414} 804}
@@ -420,19 +810,24 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
420{ 810{
421 struct nf_conntrack_expect *exp; 811 struct nf_conntrack_expect *exp;
422 812
813 /* If we pass an expected packet through nf_conntrack_in() the
814 * expectation is typically removed, but the packet could still be
815 * lost in upcall processing. To prevent this from happening we
816 * perform an explicit expectation lookup. Expected connections are
817 * always new, and will be passed through conntrack only when they are
818 * committed, as it is OK to remove the expectation at that time.
819 */
423 exp = ovs_ct_expect_find(net, &info->zone, info->family, skb); 820 exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
424 if (exp) { 821 if (exp) {
425 u8 state; 822 u8 state;
426 823
824 /* NOTE: New connections are NATted and Helped only when
825 * committed, so we are not calling into NAT here.
826 */
427 state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED; 827 state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
428 __ovs_ct_update_key(key, state, &info->zone, exp->master); 828 __ovs_ct_update_key(key, state, &info->zone, exp->master);
429 } else { 829 } else
430 int err; 830 return __ovs_ct_lookup(net, key, info, skb);
431
432 err = __ovs_ct_lookup(net, key, info, skb);
433 if (err)
434 return err;
435 }
436 831
437 return 0; 832 return 0;
438} 833}
@@ -442,21 +837,12 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
442 const struct ovs_conntrack_info *info, 837 const struct ovs_conntrack_info *info,
443 struct sk_buff *skb) 838 struct sk_buff *skb)
444{ 839{
445 u8 state;
446 int err; 840 int err;
447 841
448 state = key->ct.state;
449 if (key->ct.zone == info->zone.id &&
450 ((state & OVS_CS_F_TRACKED) && !(state & OVS_CS_F_NEW))) {
451 /* Previous lookup has shown that this connection is already
452 * tracked and committed. Skip committing.
453 */
454 return 0;
455 }
456
457 err = __ovs_ct_lookup(net, key, info, skb); 842 err = __ovs_ct_lookup(net, key, info, skb);
458 if (err) 843 if (err)
459 return err; 844 return err;
845 /* This is a no-op if the connection has already been confirmed. */
460 if (nf_conntrack_confirm(skb) != NF_ACCEPT) 846 if (nf_conntrack_confirm(skb) != NF_ACCEPT)
461 return -EINVAL; 847 return -EINVAL;
462 848
@@ -541,6 +927,136 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
541 return 0; 927 return 0;
542} 928}
543 929
930#ifdef CONFIG_NF_NAT_NEEDED
931static int parse_nat(const struct nlattr *attr,
932 struct ovs_conntrack_info *info, bool log)
933{
934 struct nlattr *a;
935 int rem;
936 bool have_ip_max = false;
937 bool have_proto_max = false;
938 bool ip_vers = (info->family == NFPROTO_IPV6);
939
940 nla_for_each_nested(a, attr, rem) {
941 static const int ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1][2] = {
942 [OVS_NAT_ATTR_SRC] = {0, 0},
943 [OVS_NAT_ATTR_DST] = {0, 0},
944 [OVS_NAT_ATTR_IP_MIN] = {sizeof(struct in_addr),
945 sizeof(struct in6_addr)},
946 [OVS_NAT_ATTR_IP_MAX] = {sizeof(struct in_addr),
947 sizeof(struct in6_addr)},
948 [OVS_NAT_ATTR_PROTO_MIN] = {sizeof(u16), sizeof(u16)},
949 [OVS_NAT_ATTR_PROTO_MAX] = {sizeof(u16), sizeof(u16)},
950 [OVS_NAT_ATTR_PERSISTENT] = {0, 0},
951 [OVS_NAT_ATTR_PROTO_HASH] = {0, 0},
952 [OVS_NAT_ATTR_PROTO_RANDOM] = {0, 0},
953 };
954 int type = nla_type(a);
955
956 if (type > OVS_NAT_ATTR_MAX) {
957 OVS_NLERR(log,
958 "Unknown NAT attribute (type=%d, max=%d).\n",
959 type, OVS_NAT_ATTR_MAX);
960 return -EINVAL;
961 }
962
963 if (nla_len(a) != ovs_nat_attr_lens[type][ip_vers]) {
964 OVS_NLERR(log,
965 "NAT attribute type %d has unexpected length (%d != %d).\n",
966 type, nla_len(a),
967 ovs_nat_attr_lens[type][ip_vers]);
968 return -EINVAL;
969 }
970
971 switch (type) {
972 case OVS_NAT_ATTR_SRC:
973 case OVS_NAT_ATTR_DST:
974 if (info->nat) {
975 OVS_NLERR(log,
976 "Only one type of NAT may be specified.\n"
977 );
978 return -ERANGE;
979 }
980 info->nat |= OVS_CT_NAT;
981 info->nat |= ((type == OVS_NAT_ATTR_SRC)
982 ? OVS_CT_SRC_NAT : OVS_CT_DST_NAT);
983 break;
984
985 case OVS_NAT_ATTR_IP_MIN:
986 nla_memcpy(&info->range.min_addr, a,
987 sizeof(info->range.min_addr));
988 info->range.flags |= NF_NAT_RANGE_MAP_IPS;
989 break;
990
991 case OVS_NAT_ATTR_IP_MAX:
992 have_ip_max = true;
993 nla_memcpy(&info->range.max_addr, a,
994 sizeof(info->range.max_addr));
995 info->range.flags |= NF_NAT_RANGE_MAP_IPS;
996 break;
997
998 case OVS_NAT_ATTR_PROTO_MIN:
999 info->range.min_proto.all = htons(nla_get_u16(a));
1000 info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
1001 break;
1002
1003 case OVS_NAT_ATTR_PROTO_MAX:
1004 have_proto_max = true;
1005 info->range.max_proto.all = htons(nla_get_u16(a));
1006 info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
1007 break;
1008
1009 case OVS_NAT_ATTR_PERSISTENT:
1010 info->range.flags |= NF_NAT_RANGE_PERSISTENT;
1011 break;
1012
1013 case OVS_NAT_ATTR_PROTO_HASH:
1014 info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM;
1015 break;
1016
1017 case OVS_NAT_ATTR_PROTO_RANDOM:
1018 info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM_FULLY;
1019 break;
1020
1021 default:
1022 OVS_NLERR(log, "Unknown nat attribute (%d).\n", type);
1023 return -EINVAL;
1024 }
1025 }
1026
1027 if (rem > 0) {
1028 OVS_NLERR(log, "NAT attribute has %d unknown bytes.\n", rem);
1029 return -EINVAL;
1030 }
1031 if (!info->nat) {
1032 /* Do not allow flags if no type is given. */
1033 if (info->range.flags) {
1034 OVS_NLERR(log,
1035 "NAT flags may be given only when NAT range (SRC or DST) is also specified.\n"
1036 );
1037 return -EINVAL;
1038 }
1039 info->nat = OVS_CT_NAT; /* NAT existing connections. */
1040 } else if (!info->commit) {
1041 OVS_NLERR(log,
1042 "NAT attributes may be specified only when CT COMMIT flag is also specified.\n"
1043 );
1044 return -EINVAL;
1045 }
1046 /* Allow missing IP_MAX. */
1047 if (info->range.flags & NF_NAT_RANGE_MAP_IPS && !have_ip_max) {
1048 memcpy(&info->range.max_addr, &info->range.min_addr,
1049 sizeof(info->range.max_addr));
1050 }
1051 /* Allow missing PROTO_MAX. */
1052 if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED &&
1053 !have_proto_max) {
1054 info->range.max_proto.all = info->range.min_proto.all;
1055 }
1056 return 0;
1057}
1058#endif
1059
544static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { 1060static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
545 [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 }, 1061 [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 },
546 [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), 1062 [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16),
@@ -550,7 +1066,11 @@ static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
550 [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels), 1066 [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels),
551 .maxlen = sizeof(struct md_labels) }, 1067 .maxlen = sizeof(struct md_labels) },
552 [OVS_CT_ATTR_HELPER] = { .minlen = 1, 1068 [OVS_CT_ATTR_HELPER] = { .minlen = 1,
553 .maxlen = NF_CT_HELPER_NAME_LEN } 1069 .maxlen = NF_CT_HELPER_NAME_LEN },
1070#ifdef CONFIG_NF_NAT_NEEDED
1071 /* NAT length is checked when parsing the nested attributes. */
1072 [OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX },
1073#endif
554}; 1074};
555 1075
556static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, 1076static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
@@ -617,6 +1137,15 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
617 return -EINVAL; 1137 return -EINVAL;
618 } 1138 }
619 break; 1139 break;
1140#ifdef CONFIG_NF_NAT_NEEDED
1141 case OVS_CT_ATTR_NAT: {
1142 int err = parse_nat(a, info, log);
1143
1144 if (err)
1145 return err;
1146 break;
1147 }
1148#endif
620 default: 1149 default:
621 OVS_NLERR(log, "Unknown conntrack attr (%d)", 1150 OVS_NLERR(log, "Unknown conntrack attr (%d)",
622 type); 1151 type);
@@ -704,6 +1233,74 @@ err_free_ct:
704 return err; 1233 return err;
705} 1234}
706 1235
1236#ifdef CONFIG_NF_NAT_NEEDED
1237static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info,
1238 struct sk_buff *skb)
1239{
1240 struct nlattr *start;
1241
1242 start = nla_nest_start(skb, OVS_CT_ATTR_NAT);
1243 if (!start)
1244 return false;
1245
1246 if (info->nat & OVS_CT_SRC_NAT) {
1247 if (nla_put_flag(skb, OVS_NAT_ATTR_SRC))
1248 return false;
1249 } else if (info->nat & OVS_CT_DST_NAT) {
1250 if (nla_put_flag(skb, OVS_NAT_ATTR_DST))
1251 return false;
1252 } else {
1253 goto out;
1254 }
1255
1256 if (info->range.flags & NF_NAT_RANGE_MAP_IPS) {
1257 if (IS_ENABLED(CONFIG_NF_NAT_IPV4) &&
1258 info->family == NFPROTO_IPV4) {
1259 if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN,
1260 info->range.min_addr.ip) ||
1261 (info->range.max_addr.ip
1262 != info->range.min_addr.ip &&
1263 (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX,
1264 info->range.max_addr.ip))))
1265 return false;
1266 } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) &&
1267 info->family == NFPROTO_IPV6) {
1268 if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN,
1269 &info->range.min_addr.in6) ||
1270 (memcmp(&info->range.max_addr.in6,
1271 &info->range.min_addr.in6,
1272 sizeof(info->range.max_addr.in6)) &&
1273 (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MAX,
1274 &info->range.max_addr.in6))))
1275 return false;
1276 } else {
1277 return false;
1278 }
1279 }
1280 if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED &&
1281 (nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MIN,
1282 ntohs(info->range.min_proto.all)) ||
1283 (info->range.max_proto.all != info->range.min_proto.all &&
1284 nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MAX,
1285 ntohs(info->range.max_proto.all)))))
1286 return false;
1287
1288 if (info->range.flags & NF_NAT_RANGE_PERSISTENT &&
1289 nla_put_flag(skb, OVS_NAT_ATTR_PERSISTENT))
1290 return false;
1291 if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM &&
1292 nla_put_flag(skb, OVS_NAT_ATTR_PROTO_HASH))
1293 return false;
1294 if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY &&
1295 nla_put_flag(skb, OVS_NAT_ATTR_PROTO_RANDOM))
1296 return false;
1297out:
1298 nla_nest_end(skb, start);
1299
1300 return true;
1301}
1302#endif
1303
707int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, 1304int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
708 struct sk_buff *skb) 1305 struct sk_buff *skb)
709{ 1306{
@@ -732,7 +1329,10 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
732 ct_info->helper->name)) 1329 ct_info->helper->name))
733 return -EMSGSIZE; 1330 return -EMSGSIZE;
734 } 1331 }
735 1332#ifdef CONFIG_NF_NAT_NEEDED
1333 if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb))
1334 return -EMSGSIZE;
1335#endif
736 nla_nest_end(skb, start); 1336 nla_nest_end(skb, start);
737 1337
738 return 0; 1338 return 0;
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
index a7544f405c16..8f6230bd6183 100644
--- a/net/openvswitch/conntrack.h
+++ b/net/openvswitch/conntrack.h
@@ -37,7 +37,8 @@ void ovs_ct_free_action(const struct nlattr *a);
37 37
38#define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \ 38#define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \
39 OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | \ 39 OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | \
40 OVS_CS_F_INVALID | OVS_CS_F_TRACKED) 40 OVS_CS_F_INVALID | OVS_CS_F_TRACKED | \
41 OVS_CS_F_SRC_NAT | OVS_CS_F_DST_NAT)
41#else 42#else
42#include <linux/errno.h> 43#include <linux/errno.h>
43 44
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index deadfdab1bc3..0cc66a4e492d 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -422,10 +422,6 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
422 struct sk_buff *nskb = NULL; 422 struct sk_buff *nskb = NULL;
423 struct sk_buff *user_skb = NULL; /* to be queued to userspace */ 423 struct sk_buff *user_skb = NULL; /* to be queued to userspace */
424 struct nlattr *nla; 424 struct nlattr *nla;
425 struct genl_info info = {
426 .dst_sk = ovs_dp_get_net(dp)->genl_sock,
427 .snd_portid = upcall_info->portid,
428 };
429 size_t len; 425 size_t len;
430 unsigned int hlen; 426 unsigned int hlen;
431 int err, dp_ifindex; 427 int err, dp_ifindex;
@@ -466,7 +462,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
466 hlen = skb->len; 462 hlen = skb->len;
467 463
468 len = upcall_msg_size(upcall_info, hlen); 464 len = upcall_msg_size(upcall_info, hlen);
469 user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC); 465 user_skb = genlmsg_new(len, GFP_ATOMIC);
470 if (!user_skb) { 466 if (!user_skb) {
471 err = -ENOMEM; 467 err = -ENOMEM;
472 goto out; 468 goto out;
@@ -654,7 +650,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
654 650
655static const struct genl_ops dp_packet_genl_ops[] = { 651static const struct genl_ops dp_packet_genl_ops[] = {
656 { .cmd = OVS_PACKET_CMD_EXECUTE, 652 { .cmd = OVS_PACKET_CMD_EXECUTE,
657 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 653 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
658 .policy = packet_policy, 654 .policy = packet_policy,
659 .doit = ovs_packet_cmd_execute 655 .doit = ovs_packet_cmd_execute
660 } 656 }
@@ -876,7 +872,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act
876 return NULL; 872 return NULL;
877 873
878 len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags); 874 len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
879 skb = genlmsg_new_unicast(len, info, GFP_KERNEL); 875 skb = genlmsg_new(len, GFP_KERNEL);
880 if (!skb) 876 if (!skb)
881 return ERR_PTR(-ENOMEM); 877 return ERR_PTR(-ENOMEM);
882 878
@@ -1100,26 +1096,32 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1100 struct sw_flow_match match; 1096 struct sw_flow_match match;
1101 struct sw_flow_id sfid; 1097 struct sw_flow_id sfid;
1102 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 1098 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1103 int error; 1099 int error = 0;
1104 bool log = !a[OVS_FLOW_ATTR_PROBE]; 1100 bool log = !a[OVS_FLOW_ATTR_PROBE];
1105 bool ufid_present; 1101 bool ufid_present;
1106 1102
1107 /* Extract key. */
1108 error = -EINVAL;
1109 if (!a[OVS_FLOW_ATTR_KEY]) {
1110 OVS_NLERR(log, "Flow key attribute not present in set flow.");
1111 goto error;
1112 }
1113
1114 ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); 1103 ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1115 ovs_match_init(&match, &key, &mask); 1104 if (a[OVS_FLOW_ATTR_KEY]) {
1116 error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], 1105 ovs_match_init(&match, &key, &mask);
1117 a[OVS_FLOW_ATTR_MASK], log); 1106 error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1107 a[OVS_FLOW_ATTR_MASK], log);
1108 } else if (!ufid_present) {
1109 OVS_NLERR(log,
1110 "Flow set message rejected, Key attribute missing.");
1111 error = -EINVAL;
1112 }
1118 if (error) 1113 if (error)
1119 goto error; 1114 goto error;
1120 1115
1121 /* Validate actions. */ 1116 /* Validate actions. */
1122 if (a[OVS_FLOW_ATTR_ACTIONS]) { 1117 if (a[OVS_FLOW_ATTR_ACTIONS]) {
1118 if (!a[OVS_FLOW_ATTR_KEY]) {
1119 OVS_NLERR(log,
1120 "Flow key attribute not present in set flow.");
1121 error = -EINVAL;
1122 goto error;
1123 }
1124
1123 acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key, 1125 acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key,
1124 &mask, log); 1126 &mask, log);
1125 if (IS_ERR(acts)) { 1127 if (IS_ERR(acts)) {
@@ -1391,12 +1393,12 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1391 1393
1392static const struct genl_ops dp_flow_genl_ops[] = { 1394static const struct genl_ops dp_flow_genl_ops[] = {
1393 { .cmd = OVS_FLOW_CMD_NEW, 1395 { .cmd = OVS_FLOW_CMD_NEW,
1394 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1396 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1395 .policy = flow_policy, 1397 .policy = flow_policy,
1396 .doit = ovs_flow_cmd_new 1398 .doit = ovs_flow_cmd_new
1397 }, 1399 },
1398 { .cmd = OVS_FLOW_CMD_DEL, 1400 { .cmd = OVS_FLOW_CMD_DEL,
1399 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1401 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1400 .policy = flow_policy, 1402 .policy = flow_policy,
1401 .doit = ovs_flow_cmd_del 1403 .doit = ovs_flow_cmd_del
1402 }, 1404 },
@@ -1407,7 +1409,7 @@ static const struct genl_ops dp_flow_genl_ops[] = {
1407 .dumpit = ovs_flow_cmd_dump 1409 .dumpit = ovs_flow_cmd_dump
1408 }, 1410 },
1409 { .cmd = OVS_FLOW_CMD_SET, 1411 { .cmd = OVS_FLOW_CMD_SET,
1410 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1412 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1411 .policy = flow_policy, 1413 .policy = flow_policy,
1412 .doit = ovs_flow_cmd_set, 1414 .doit = ovs_flow_cmd_set,
1413 }, 1415 },
@@ -1481,9 +1483,9 @@ error:
1481 return -EMSGSIZE; 1483 return -EMSGSIZE;
1482} 1484}
1483 1485
1484static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info) 1486static struct sk_buff *ovs_dp_cmd_alloc_info(void)
1485{ 1487{
1486 return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); 1488 return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1487} 1489}
1488 1490
1489/* Called with rcu_read_lock or ovs_mutex. */ 1491/* Called with rcu_read_lock or ovs_mutex. */
@@ -1536,7 +1538,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1536 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) 1538 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1537 goto err; 1539 goto err;
1538 1540
1539 reply = ovs_dp_cmd_alloc_info(info); 1541 reply = ovs_dp_cmd_alloc_info();
1540 if (!reply) 1542 if (!reply)
1541 return -ENOMEM; 1543 return -ENOMEM;
1542 1544
@@ -1657,7 +1659,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1657 struct datapath *dp; 1659 struct datapath *dp;
1658 int err; 1660 int err;
1659 1661
1660 reply = ovs_dp_cmd_alloc_info(info); 1662 reply = ovs_dp_cmd_alloc_info();
1661 if (!reply) 1663 if (!reply)
1662 return -ENOMEM; 1664 return -ENOMEM;
1663 1665
@@ -1690,7 +1692,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1690 struct datapath *dp; 1692 struct datapath *dp;
1691 int err; 1693 int err;
1692 1694
1693 reply = ovs_dp_cmd_alloc_info(info); 1695 reply = ovs_dp_cmd_alloc_info();
1694 if (!reply) 1696 if (!reply)
1695 return -ENOMEM; 1697 return -ENOMEM;
1696 1698
@@ -1723,7 +1725,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1723 struct datapath *dp; 1725 struct datapath *dp;
1724 int err; 1726 int err;
1725 1727
1726 reply = ovs_dp_cmd_alloc_info(info); 1728 reply = ovs_dp_cmd_alloc_info();
1727 if (!reply) 1729 if (!reply)
1728 return -ENOMEM; 1730 return -ENOMEM;
1729 1731
@@ -1777,12 +1779,12 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1777 1779
1778static const struct genl_ops dp_datapath_genl_ops[] = { 1780static const struct genl_ops dp_datapath_genl_ops[] = {
1779 { .cmd = OVS_DP_CMD_NEW, 1781 { .cmd = OVS_DP_CMD_NEW,
1780 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1782 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1781 .policy = datapath_policy, 1783 .policy = datapath_policy,
1782 .doit = ovs_dp_cmd_new 1784 .doit = ovs_dp_cmd_new
1783 }, 1785 },
1784 { .cmd = OVS_DP_CMD_DEL, 1786 { .cmd = OVS_DP_CMD_DEL,
1785 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1787 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1786 .policy = datapath_policy, 1788 .policy = datapath_policy,
1787 .doit = ovs_dp_cmd_del 1789 .doit = ovs_dp_cmd_del
1788 }, 1790 },
@@ -1793,7 +1795,7 @@ static const struct genl_ops dp_datapath_genl_ops[] = {
1793 .dumpit = ovs_dp_cmd_dump 1795 .dumpit = ovs_dp_cmd_dump
1794 }, 1796 },
1795 { .cmd = OVS_DP_CMD_SET, 1797 { .cmd = OVS_DP_CMD_SET,
1796 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1798 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1797 .policy = datapath_policy, 1799 .policy = datapath_policy,
1798 .doit = ovs_dp_cmd_set, 1800 .doit = ovs_dp_cmd_set,
1799 }, 1801 },
@@ -1912,6 +1914,29 @@ static struct vport *lookup_vport(struct net *net,
1912 return ERR_PTR(-EINVAL); 1914 return ERR_PTR(-EINVAL);
1913} 1915}
1914 1916
1917/* Called with ovs_mutex */
1918static void update_headroom(struct datapath *dp)
1919{
1920 unsigned dev_headroom, max_headroom = 0;
1921 struct net_device *dev;
1922 struct vport *vport;
1923 int i;
1924
1925 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1926 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
1927 dev = vport->dev;
1928 dev_headroom = netdev_get_fwd_headroom(dev);
1929 if (dev_headroom > max_headroom)
1930 max_headroom = dev_headroom;
1931 }
1932 }
1933
1934 dp->max_headroom = max_headroom;
1935 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1936 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
1937 netdev_set_rx_headroom(vport->dev, max_headroom);
1938}
1939
1915static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) 1940static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1916{ 1941{
1917 struct nlattr **a = info->attrs; 1942 struct nlattr **a = info->attrs;
@@ -1977,6 +2002,12 @@ restart:
1977 2002
1978 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 2003 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1979 info->snd_seq, 0, OVS_VPORT_CMD_NEW); 2004 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2005
2006 if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
2007 update_headroom(dp);
2008 else
2009 netdev_set_rx_headroom(vport->dev, dp->max_headroom);
2010
1980 BUG_ON(err < 0); 2011 BUG_ON(err < 0);
1981 ovs_unlock(); 2012 ovs_unlock();
1982 2013
@@ -2043,8 +2074,10 @@ exit_unlock_free:
2043 2074
2044static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) 2075static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2045{ 2076{
2077 bool must_update_headroom = false;
2046 struct nlattr **a = info->attrs; 2078 struct nlattr **a = info->attrs;
2047 struct sk_buff *reply; 2079 struct sk_buff *reply;
2080 struct datapath *dp;
2048 struct vport *vport; 2081 struct vport *vport;
2049 int err; 2082 int err;
2050 2083
@@ -2066,7 +2099,16 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2066 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 2099 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2067 info->snd_seq, 0, OVS_VPORT_CMD_DEL); 2100 info->snd_seq, 0, OVS_VPORT_CMD_DEL);
2068 BUG_ON(err < 0); 2101 BUG_ON(err < 0);
2102
2103 /* the vport deletion may trigger dp headroom update */
2104 dp = vport->dp;
2105 if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
2106 must_update_headroom = true;
2107 netdev_reset_rx_headroom(vport->dev);
2069 ovs_dp_detach_port(vport); 2108 ovs_dp_detach_port(vport);
2109
2110 if (must_update_headroom)
2111 update_headroom(dp);
2070 ovs_unlock(); 2112 ovs_unlock();
2071 2113
2072 ovs_notify(&dp_vport_genl_family, reply, info); 2114 ovs_notify(&dp_vport_genl_family, reply, info);
@@ -2158,12 +2200,12 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2158 2200
2159static const struct genl_ops dp_vport_genl_ops[] = { 2201static const struct genl_ops dp_vport_genl_ops[] = {
2160 { .cmd = OVS_VPORT_CMD_NEW, 2202 { .cmd = OVS_VPORT_CMD_NEW,
2161 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2203 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2162 .policy = vport_policy, 2204 .policy = vport_policy,
2163 .doit = ovs_vport_cmd_new 2205 .doit = ovs_vport_cmd_new
2164 }, 2206 },
2165 { .cmd = OVS_VPORT_CMD_DEL, 2207 { .cmd = OVS_VPORT_CMD_DEL,
2166 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2208 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2167 .policy = vport_policy, 2209 .policy = vport_policy,
2168 .doit = ovs_vport_cmd_del 2210 .doit = ovs_vport_cmd_del
2169 }, 2211 },
@@ -2174,7 +2216,7 @@ static const struct genl_ops dp_vport_genl_ops[] = {
2174 .dumpit = ovs_vport_cmd_dump 2216 .dumpit = ovs_vport_cmd_dump
2175 }, 2217 },
2176 { .cmd = OVS_VPORT_CMD_SET, 2218 { .cmd = OVS_VPORT_CMD_SET,
2177 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2219 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2178 .policy = vport_policy, 2220 .policy = vport_policy,
2179 .doit = ovs_vport_cmd_set, 2221 .doit = ovs_vport_cmd_set,
2180 }, 2222 },
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 67bdecd9fdc1..427e39a045cf 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -68,6 +68,8 @@ struct dp_stats_percpu {
68 * ovs_mutex and RCU. 68 * ovs_mutex and RCU.
69 * @stats_percpu: Per-CPU datapath statistics. 69 * @stats_percpu: Per-CPU datapath statistics.
70 * @net: Reference to net namespace. 70 * @net: Reference to net namespace.
71 * @max_headroom: the maximum headroom of all vports in this datapath; it will
72 * be used by all the internal vports in this dp.
71 * 73 *
72 * Context: See the comment on locking at the top of datapath.c for additional 74 * Context: See the comment on locking at the top of datapath.c for additional
73 * locking information. 75 * locking information.
@@ -89,6 +91,8 @@ struct datapath {
89 possible_net_t net; 91 possible_net_t net;
90 92
91 u32 user_features; 93 u32 user_features;
94
95 u32 max_headroom;
92}; 96};
93 97
94/** 98/**
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 1d055c559eaf..03378e75a67c 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -55,7 +55,7 @@ struct ovs_tunnel_info {
55 FIELD_SIZEOF(struct sw_flow_key, recirc_id)) 55 FIELD_SIZEOF(struct sw_flow_key, recirc_id))
56 56
57struct sw_flow_key { 57struct sw_flow_key {
58 u8 tun_opts[255]; 58 u8 tun_opts[IP_TUNNEL_OPTS_MAX];
59 u8 tun_opts_len; 59 u8 tun_opts_len;
60 struct ip_tunnel_key tun_key; /* Encapsulating tunnel key. */ 60 struct ip_tunnel_key tun_key; /* Encapsulating tunnel key. */
61 struct { 61 struct {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index d1bd4a45ca2d..689c17264221 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1959,6 +1959,12 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
1959 if (!tun_dst) 1959 if (!tun_dst)
1960 return -ENOMEM; 1960 return -ENOMEM;
1961 1961
1962 err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL);
1963 if (err) {
1964 dst_release((struct dst_entry *)tun_dst);
1965 return err;
1966 }
1967
1962 a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, 1968 a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
1963 sizeof(*ovs_tun), log); 1969 sizeof(*ovs_tun), log);
1964 if (IS_ERR(a)) { 1970 if (IS_ERR(a)) {
@@ -2038,9 +2044,6 @@ static int validate_set(const struct nlattr *a,
2038 break; 2044 break;
2039 2045
2040 case OVS_KEY_ATTR_TUNNEL: 2046 case OVS_KEY_ATTR_TUNNEL:
2041 if (eth_p_mpls(eth_type))
2042 return -EINVAL;
2043
2044 if (masked) 2047 if (masked)
2045 return -EINVAL; /* Masked tunnel set not supported. */ 2048 return -EINVAL; /* Masked tunnel set not supported. */
2046 2049
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 30ab8e127288..1a1fcec88695 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -132,6 +132,6 @@ static void __exit ovs_geneve_tnl_exit(void)
132module_init(ovs_geneve_tnl_init); 132module_init(ovs_geneve_tnl_init);
133module_exit(ovs_geneve_tnl_exit); 133module_exit(ovs_geneve_tnl_exit);
134 134
135MODULE_DESCRIPTION("OVS: Geneve swiching port"); 135MODULE_DESCRIPTION("OVS: Geneve switching port");
136MODULE_LICENSE("GPL"); 136MODULE_LICENSE("GPL");
137MODULE_ALIAS("vport-type-5"); 137MODULE_ALIAS("vport-type-5");
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index ec76398a792f..7c8b90bf0e54 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -138,6 +138,11 @@ internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
138 return stats; 138 return stats;
139} 139}
140 140
141static void internal_set_rx_headroom(struct net_device *dev, int new_hr)
142{
143 dev->needed_headroom = new_hr;
144}
145
141static const struct net_device_ops internal_dev_netdev_ops = { 146static const struct net_device_ops internal_dev_netdev_ops = {
142 .ndo_open = internal_dev_open, 147 .ndo_open = internal_dev_open,
143 .ndo_stop = internal_dev_stop, 148 .ndo_stop = internal_dev_stop,
@@ -145,6 +150,7 @@ static const struct net_device_ops internal_dev_netdev_ops = {
145 .ndo_set_mac_address = eth_mac_addr, 150 .ndo_set_mac_address = eth_mac_addr,
146 .ndo_change_mtu = internal_dev_change_mtu, 151 .ndo_change_mtu = internal_dev_change_mtu,
147 .ndo_get_stats64 = internal_get_stats, 152 .ndo_get_stats64 = internal_get_stats,
153 .ndo_set_rx_headroom = internal_set_rx_headroom,
148}; 154};
149 155
150static struct rtnl_link_ops internal_dev_link_ops __read_mostly = { 156static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
@@ -158,7 +164,8 @@ static void do_setup(struct net_device *netdev)
158 netdev->netdev_ops = &internal_dev_netdev_ops; 164 netdev->netdev_ops = &internal_dev_netdev_ops;
159 165
160 netdev->priv_flags &= ~IFF_TX_SKB_SHARING; 166 netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
161 netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH; 167 netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
168 IFF_PHONY_HEADROOM;
162 netdev->destructor = internal_dev_destructor; 169 netdev->destructor = internal_dev_destructor;
163 netdev->ethtool_ops = &internal_dev_ethtool_ops; 170 netdev->ethtool_ops = &internal_dev_ethtool_ops;
164 netdev->rtnl_link_ops = &internal_dev_link_ops; 171 netdev->rtnl_link_ops = &internal_dev_link_ops;
@@ -199,6 +206,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
199 err = -ENOMEM; 206 err = -ENOMEM;
200 goto error_free_netdev; 207 goto error_free_netdev;
201 } 208 }
209 vport->dev->needed_headroom = vport->dp->max_headroom;
202 210
203 dev_net_set(vport->dev, ovs_dp_get_net(vport->dp)); 211 dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
204 internal_dev = internal_dev_priv(vport->dev); 212 internal_dev = internal_dev_priv(vport->dev);
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 6a6adf314363..4e3972344aa6 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -58,7 +58,7 @@ static void netdev_port_receive(struct sk_buff *skb)
58 return; 58 return;
59 59
60 skb_push(skb, ETH_HLEN); 60 skb_push(skb, ETH_HLEN);
61 ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); 61 skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
62 ovs_vport_receive(vport, skb, skb_tunnel_info(skb)); 62 ovs_vport_receive(vport, skb, skb_tunnel_info(skb));
63 return; 63 return;
64error: 64error:
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index c10899cb9040..f01f28a567ad 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -185,13 +185,6 @@ static inline struct vport *vport_from_priv(void *priv)
185int ovs_vport_receive(struct vport *, struct sk_buff *, 185int ovs_vport_receive(struct vport *, struct sk_buff *,
186 const struct ip_tunnel_info *); 186 const struct ip_tunnel_info *);
187 187
188static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
189 const void *start, unsigned int len)
190{
191 if (skb->ip_summed == CHECKSUM_COMPLETE)
192 skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
193}
194
195static inline const char *ovs_vport_name(struct vport *vport) 188static inline const char *ovs_vport_name(struct vport *vport)
196{ 189{
197 return vport->dev->name; 190 return vport->dev->name;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 992396aa635c..18d0becbc46d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -557,9 +557,8 @@ static int prb_calc_retire_blk_tmo(struct packet_sock *po,
557{ 557{
558 struct net_device *dev; 558 struct net_device *dev;
559 unsigned int mbits = 0, msec = 0, div = 0, tmo = 0; 559 unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
560 struct ethtool_cmd ecmd; 560 struct ethtool_link_ksettings ecmd;
561 int err; 561 int err;
562 u32 speed;
563 562
564 rtnl_lock(); 563 rtnl_lock();
565 dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex); 564 dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
@@ -567,19 +566,19 @@ static int prb_calc_retire_blk_tmo(struct packet_sock *po,
567 rtnl_unlock(); 566 rtnl_unlock();
568 return DEFAULT_PRB_RETIRE_TOV; 567 return DEFAULT_PRB_RETIRE_TOV;
569 } 568 }
570 err = __ethtool_get_settings(dev, &ecmd); 569 err = __ethtool_get_link_ksettings(dev, &ecmd);
571 speed = ethtool_cmd_speed(&ecmd);
572 rtnl_unlock(); 570 rtnl_unlock();
573 if (!err) { 571 if (!err) {
574 /* 572 /*
575 * If the link speed is so slow you don't really 573 * If the link speed is so slow you don't really
576 * need to worry about perf anyways 574 * need to worry about perf anyways
577 */ 575 */
578 if (speed < SPEED_1000 || speed == SPEED_UNKNOWN) { 576 if (ecmd.base.speed < SPEED_1000 ||
577 ecmd.base.speed == SPEED_UNKNOWN) {
579 return DEFAULT_PRB_RETIRE_TOV; 578 return DEFAULT_PRB_RETIRE_TOV;
580 } else { 579 } else {
581 msec = 1; 580 msec = 1;
582 div = speed / 1000; 581 div = ecmd.base.speed / 1000;
583 } 582 }
584 } 583 }
585 584
@@ -1916,6 +1915,10 @@ retry:
1916 goto retry; 1915 goto retry;
1917 } 1916 }
1918 1917
1918 if (!dev_validate_header(dev, skb->data, len)) {
1919 err = -EINVAL;
1920 goto out_unlock;
1921 }
1919 if (len > (dev->mtu + dev->hard_header_len + extra_len) && 1922 if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
1920 !packet_extra_vlan_len_allowed(dev, skb)) { 1923 !packet_extra_vlan_len_allowed(dev, skb)) {
1921 err = -EMSGSIZE; 1924 err = -EMSGSIZE;
@@ -1960,6 +1963,64 @@ static unsigned int run_filter(struct sk_buff *skb,
1960 return res; 1963 return res;
1961} 1964}
1962 1965
1966static int __packet_rcv_vnet(const struct sk_buff *skb,
1967 struct virtio_net_hdr *vnet_hdr)
1968{
1969 *vnet_hdr = (const struct virtio_net_hdr) { 0 };
1970
1971 if (skb_is_gso(skb)) {
1972 struct skb_shared_info *sinfo = skb_shinfo(skb);
1973
1974 /* This is a hint as to how much should be linear. */
1975 vnet_hdr->hdr_len =
1976 __cpu_to_virtio16(vio_le(), skb_headlen(skb));
1977 vnet_hdr->gso_size =
1978 __cpu_to_virtio16(vio_le(), sinfo->gso_size);
1979
1980 if (sinfo->gso_type & SKB_GSO_TCPV4)
1981 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1982 else if (sinfo->gso_type & SKB_GSO_TCPV6)
1983 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1984 else if (sinfo->gso_type & SKB_GSO_UDP)
1985 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
1986 else if (sinfo->gso_type & SKB_GSO_FCOE)
1987 return -EINVAL;
1988 else
1989 BUG();
1990
1991 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
1992 vnet_hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1993 } else
1994 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1995
1996 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1997 vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1998 vnet_hdr->csum_start = __cpu_to_virtio16(vio_le(),
1999 skb_checksum_start_offset(skb));
2000 vnet_hdr->csum_offset = __cpu_to_virtio16(vio_le(),
2001 skb->csum_offset);
2002 } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
2003 vnet_hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
2004 } /* else everything is zero */
2005
2006 return 0;
2007}
2008
2009static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
2010 size_t *len)
2011{
2012 struct virtio_net_hdr vnet_hdr;
2013
2014 if (*len < sizeof(vnet_hdr))
2015 return -EINVAL;
2016 *len -= sizeof(vnet_hdr);
2017
2018 if (__packet_rcv_vnet(skb, &vnet_hdr))
2019 return -EINVAL;
2020
2021 return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
2022}
2023
1963/* 2024/*
1964 * This function makes lazy skb cloning in hope that most of packets 2025 * This function makes lazy skb cloning in hope that most of packets
1965 * are discarded by BPF. 2026 * are discarded by BPF.
@@ -2148,7 +2209,9 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
2148 unsigned int maclen = skb_network_offset(skb); 2209 unsigned int maclen = skb_network_offset(skb);
2149 netoff = TPACKET_ALIGN(po->tp_hdrlen + 2210 netoff = TPACKET_ALIGN(po->tp_hdrlen +
2150 (maclen < 16 ? 16 : maclen)) + 2211 (maclen < 16 ? 16 : maclen)) +
2151 po->tp_reserve; 2212 po->tp_reserve;
2213 if (po->has_vnet_hdr)
2214 netoff += sizeof(struct virtio_net_hdr);
2152 macoff = netoff - maclen; 2215 macoff = netoff - maclen;
2153 } 2216 }
2154 if (po->tp_version <= TPACKET_V2) { 2217 if (po->tp_version <= TPACKET_V2) {
@@ -2185,7 +2248,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
2185 h.raw = packet_current_rx_frame(po, skb, 2248 h.raw = packet_current_rx_frame(po, skb,
2186 TP_STATUS_KERNEL, (macoff+snaplen)); 2249 TP_STATUS_KERNEL, (macoff+snaplen));
2187 if (!h.raw) 2250 if (!h.raw)
2188 goto ring_is_full; 2251 goto drop_n_account;
2189 if (po->tp_version <= TPACKET_V2) { 2252 if (po->tp_version <= TPACKET_V2) {
2190 packet_increment_rx_head(po, &po->rx_ring); 2253 packet_increment_rx_head(po, &po->rx_ring);
2191 /* 2254 /*
@@ -2204,6 +2267,14 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
2204 } 2267 }
2205 spin_unlock(&sk->sk_receive_queue.lock); 2268 spin_unlock(&sk->sk_receive_queue.lock);
2206 2269
2270 if (po->has_vnet_hdr) {
2271 if (__packet_rcv_vnet(skb, h.raw + macoff -
2272 sizeof(struct virtio_net_hdr))) {
2273 spin_lock(&sk->sk_receive_queue.lock);
2274 goto drop_n_account;
2275 }
2276 }
2277
2207 skb_copy_bits(skb, 0, h.raw + macoff, snaplen); 2278 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
2208 2279
2209 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) 2280 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
@@ -2299,7 +2370,7 @@ drop:
2299 kfree_skb(skb); 2370 kfree_skb(skb);
2300 return 0; 2371 return 0;
2301 2372
2302ring_is_full: 2373drop_n_account:
2303 po->stats.stats1.tp_drops++; 2374 po->stats.stats1.tp_drops++;
2304 spin_unlock(&sk->sk_receive_queue.lock); 2375 spin_unlock(&sk->sk_receive_queue.lock);
2305 2376
@@ -2326,18 +2397,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
2326 sock_wfree(skb); 2397 sock_wfree(skb);
2327} 2398}
2328 2399
2329static bool ll_header_truncated(const struct net_device *dev, int len)
2330{
2331 /* net device doesn't like empty head */
2332 if (unlikely(len < dev->hard_header_len)) {
2333 net_warn_ratelimited("%s: packet size is too short (%d < %d)\n",
2334 current->comm, len, dev->hard_header_len);
2335 return true;
2336 }
2337
2338 return false;
2339}
2340
2341static void tpacket_set_protocol(const struct net_device *dev, 2400static void tpacket_set_protocol(const struct net_device *dev,
2342 struct sk_buff *skb) 2401 struct sk_buff *skb)
2343{ 2402{
@@ -2347,15 +2406,92 @@ static void tpacket_set_protocol(const struct net_device *dev,
2347 } 2406 }
2348} 2407}
2349 2408
2409static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
2410{
2411 unsigned short gso_type = 0;
2412
2413 if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
2414 (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
2415 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 >
2416 __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len)))
2417 vnet_hdr->hdr_len = __cpu_to_virtio16(vio_le(),
2418 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
2419 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2);
2420
2421 if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len)
2422 return -EINVAL;
2423
2424 if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
2425 switch (vnet_hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2426 case VIRTIO_NET_HDR_GSO_TCPV4:
2427 gso_type = SKB_GSO_TCPV4;
2428 break;
2429 case VIRTIO_NET_HDR_GSO_TCPV6:
2430 gso_type = SKB_GSO_TCPV6;
2431 break;
2432 case VIRTIO_NET_HDR_GSO_UDP:
2433 gso_type = SKB_GSO_UDP;
2434 break;
2435 default:
2436 return -EINVAL;
2437 }
2438
2439 if (vnet_hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
2440 gso_type |= SKB_GSO_TCP_ECN;
2441
2442 if (vnet_hdr->gso_size == 0)
2443 return -EINVAL;
2444 }
2445
2446 vnet_hdr->gso_type = gso_type; /* changes type, temporary storage */
2447 return 0;
2448}
2449
2450static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
2451 struct virtio_net_hdr *vnet_hdr)
2452{
2453 int n;
2454
2455 if (*len < sizeof(*vnet_hdr))
2456 return -EINVAL;
2457 *len -= sizeof(*vnet_hdr);
2458
2459 n = copy_from_iter(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter);
2460 if (n != sizeof(*vnet_hdr))
2461 return -EFAULT;
2462
2463 return __packet_snd_vnet_parse(vnet_hdr, *len);
2464}
2465
2466static int packet_snd_vnet_gso(struct sk_buff *skb,
2467 struct virtio_net_hdr *vnet_hdr)
2468{
2469 if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
2470 u16 s = __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start);
2471 u16 o = __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset);
2472
2473 if (!skb_partial_csum_set(skb, s, o))
2474 return -EINVAL;
2475 }
2476
2477 skb_shinfo(skb)->gso_size =
2478 __virtio16_to_cpu(vio_le(), vnet_hdr->gso_size);
2479 skb_shinfo(skb)->gso_type = vnet_hdr->gso_type;
2480
2481 /* Header must be checked, and gso_segs computed. */
2482 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
2483 skb_shinfo(skb)->gso_segs = 0;
2484 return 0;
2485}
2486
2350static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, 2487static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2351 void *frame, struct net_device *dev, int size_max, 2488 void *frame, struct net_device *dev, void *data, int tp_len,
2352 __be16 proto, unsigned char *addr, int hlen) 2489 __be16 proto, unsigned char *addr, int hlen, int copylen)
2353{ 2490{
2354 union tpacket_uhdr ph; 2491 union tpacket_uhdr ph;
2355 int to_write, offset, len, tp_len, nr_frags, len_max; 2492 int to_write, offset, len, nr_frags, len_max;
2356 struct socket *sock = po->sk.sk_socket; 2493 struct socket *sock = po->sk.sk_socket;
2357 struct page *page; 2494 struct page *page;
2358 void *data;
2359 int err; 2495 int err;
2360 2496
2361 ph.raw = frame; 2497 ph.raw = frame;
@@ -2367,51 +2503,9 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2367 sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags); 2503 sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags);
2368 skb_shinfo(skb)->destructor_arg = ph.raw; 2504 skb_shinfo(skb)->destructor_arg = ph.raw;
2369 2505
2370 switch (po->tp_version) {
2371 case TPACKET_V2:
2372 tp_len = ph.h2->tp_len;
2373 break;
2374 default:
2375 tp_len = ph.h1->tp_len;
2376 break;
2377 }
2378 if (unlikely(tp_len > size_max)) {
2379 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
2380 return -EMSGSIZE;
2381 }
2382
2383 skb_reserve(skb, hlen); 2506 skb_reserve(skb, hlen);
2384 skb_reset_network_header(skb); 2507 skb_reset_network_header(skb);
2385 2508
2386 if (unlikely(po->tp_tx_has_off)) {
2387 int off_min, off_max, off;
2388 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
2389 off_max = po->tx_ring.frame_size - tp_len;
2390 if (sock->type == SOCK_DGRAM) {
2391 switch (po->tp_version) {
2392 case TPACKET_V2:
2393 off = ph.h2->tp_net;
2394 break;
2395 default:
2396 off = ph.h1->tp_net;
2397 break;
2398 }
2399 } else {
2400 switch (po->tp_version) {
2401 case TPACKET_V2:
2402 off = ph.h2->tp_mac;
2403 break;
2404 default:
2405 off = ph.h1->tp_mac;
2406 break;
2407 }
2408 }
2409 if (unlikely((off < off_min) || (off_max < off)))
2410 return -EINVAL;
2411 data = ph.raw + off;
2412 } else {
2413 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
2414 }
2415 to_write = tp_len; 2509 to_write = tp_len;
2416 2510
2417 if (sock->type == SOCK_DGRAM) { 2511 if (sock->type == SOCK_DGRAM) {
@@ -2419,20 +2513,21 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2419 NULL, tp_len); 2513 NULL, tp_len);
2420 if (unlikely(err < 0)) 2514 if (unlikely(err < 0))
2421 return -EINVAL; 2515 return -EINVAL;
2422 } else if (dev->hard_header_len) { 2516 } else if (copylen) {
2423 if (ll_header_truncated(dev, tp_len)) 2517 int hdrlen = min_t(int, copylen, tp_len);
2424 return -EINVAL;
2425 2518
2426 skb_push(skb, dev->hard_header_len); 2519 skb_push(skb, dev->hard_header_len);
2427 err = skb_store_bits(skb, 0, data, 2520 skb_put(skb, copylen - dev->hard_header_len);
2428 dev->hard_header_len); 2521 err = skb_store_bits(skb, 0, data, hdrlen);
2429 if (unlikely(err)) 2522 if (unlikely(err))
2430 return err; 2523 return err;
2524 if (!dev_validate_header(dev, skb->data, hdrlen))
2525 return -EINVAL;
2431 if (!skb->protocol) 2526 if (!skb->protocol)
2432 tpacket_set_protocol(dev, skb); 2527 tpacket_set_protocol(dev, skb);
2433 2528
2434 data += dev->hard_header_len; 2529 data += hdrlen;
2435 to_write -= dev->hard_header_len; 2530 to_write -= hdrlen;
2436 } 2531 }
2437 2532
2438 offset = offset_in_page(data); 2533 offset = offset_in_page(data);
@@ -2469,10 +2564,66 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2469 return tp_len; 2564 return tp_len;
2470} 2565}
2471 2566
2567static int tpacket_parse_header(struct packet_sock *po, void *frame,
2568 int size_max, void **data)
2569{
2570 union tpacket_uhdr ph;
2571 int tp_len, off;
2572
2573 ph.raw = frame;
2574
2575 switch (po->tp_version) {
2576 case TPACKET_V2:
2577 tp_len = ph.h2->tp_len;
2578 break;
2579 default:
2580 tp_len = ph.h1->tp_len;
2581 break;
2582 }
2583 if (unlikely(tp_len > size_max)) {
2584 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
2585 return -EMSGSIZE;
2586 }
2587
2588 if (unlikely(po->tp_tx_has_off)) {
2589 int off_min, off_max;
2590
2591 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
2592 off_max = po->tx_ring.frame_size - tp_len;
2593 if (po->sk.sk_type == SOCK_DGRAM) {
2594 switch (po->tp_version) {
2595 case TPACKET_V2:
2596 off = ph.h2->tp_net;
2597 break;
2598 default:
2599 off = ph.h1->tp_net;
2600 break;
2601 }
2602 } else {
2603 switch (po->tp_version) {
2604 case TPACKET_V2:
2605 off = ph.h2->tp_mac;
2606 break;
2607 default:
2608 off = ph.h1->tp_mac;
2609 break;
2610 }
2611 }
2612 if (unlikely((off < off_min) || (off_max < off)))
2613 return -EINVAL;
2614 } else {
2615 off = po->tp_hdrlen - sizeof(struct sockaddr_ll);
2616 }
2617
2618 *data = frame + off;
2619 return tp_len;
2620}
2621
2472static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) 2622static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2473{ 2623{
2474 struct sk_buff *skb; 2624 struct sk_buff *skb;
2475 struct net_device *dev; 2625 struct net_device *dev;
2626 struct virtio_net_hdr *vnet_hdr = NULL;
2476 __be16 proto; 2627 __be16 proto;
2477 int err, reserve = 0; 2628 int err, reserve = 0;
2478 void *ph; 2629 void *ph;
@@ -2480,9 +2631,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2480 bool need_wait = !(msg->msg_flags & MSG_DONTWAIT); 2631 bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
2481 int tp_len, size_max; 2632 int tp_len, size_max;
2482 unsigned char *addr; 2633 unsigned char *addr;
2634 void *data;
2483 int len_sum = 0; 2635 int len_sum = 0;
2484 int status = TP_STATUS_AVAILABLE; 2636 int status = TP_STATUS_AVAILABLE;
2485 int hlen, tlen; 2637 int hlen, tlen, copylen = 0;
2486 2638
2487 mutex_lock(&po->pg_vec_lock); 2639 mutex_lock(&po->pg_vec_lock);
2488 2640
@@ -2515,7 +2667,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2515 size_max = po->tx_ring.frame_size 2667 size_max = po->tx_ring.frame_size
2516 - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); 2668 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
2517 2669
2518 if (size_max > dev->mtu + reserve + VLAN_HLEN) 2670 if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
2519 size_max = dev->mtu + reserve + VLAN_HLEN; 2671 size_max = dev->mtu + reserve + VLAN_HLEN;
2520 2672
2521 do { 2673 do {
@@ -2527,11 +2679,30 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2527 continue; 2679 continue;
2528 } 2680 }
2529 2681
2682 skb = NULL;
2683 tp_len = tpacket_parse_header(po, ph, size_max, &data);
2684 if (tp_len < 0)
2685 goto tpacket_error;
2686
2530 status = TP_STATUS_SEND_REQUEST; 2687 status = TP_STATUS_SEND_REQUEST;
2531 hlen = LL_RESERVED_SPACE(dev); 2688 hlen = LL_RESERVED_SPACE(dev);
2532 tlen = dev->needed_tailroom; 2689 tlen = dev->needed_tailroom;
2690 if (po->has_vnet_hdr) {
2691 vnet_hdr = data;
2692 data += sizeof(*vnet_hdr);
2693 tp_len -= sizeof(*vnet_hdr);
2694 if (tp_len < 0 ||
2695 __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
2696 tp_len = -EINVAL;
2697 goto tpacket_error;
2698 }
2699 copylen = __virtio16_to_cpu(vio_le(),
2700 vnet_hdr->hdr_len);
2701 }
2702 copylen = max_t(int, copylen, dev->hard_header_len);
2533 skb = sock_alloc_send_skb(&po->sk, 2703 skb = sock_alloc_send_skb(&po->sk,
2534 hlen + tlen + sizeof(struct sockaddr_ll), 2704 hlen + tlen + sizeof(struct sockaddr_ll) +
2705 (copylen - dev->hard_header_len),
2535 !need_wait, &err); 2706 !need_wait, &err);
2536 2707
2537 if (unlikely(skb == NULL)) { 2708 if (unlikely(skb == NULL)) {
@@ -2540,14 +2711,16 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2540 err = len_sum; 2711 err = len_sum;
2541 goto out_status; 2712 goto out_status;
2542 } 2713 }
2543 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, 2714 tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto,
2544 addr, hlen); 2715 addr, hlen, copylen);
2545 if (likely(tp_len >= 0) && 2716 if (likely(tp_len >= 0) &&
2546 tp_len > dev->mtu + reserve && 2717 tp_len > dev->mtu + reserve &&
2718 !po->has_vnet_hdr &&
2547 !packet_extra_vlan_len_allowed(dev, skb)) 2719 !packet_extra_vlan_len_allowed(dev, skb))
2548 tp_len = -EMSGSIZE; 2720 tp_len = -EMSGSIZE;
2549 2721
2550 if (unlikely(tp_len < 0)) { 2722 if (unlikely(tp_len < 0)) {
2723tpacket_error:
2551 if (po->tp_loss) { 2724 if (po->tp_loss) {
2552 __packet_set_status(po, ph, 2725 __packet_set_status(po, ph,
2553 TP_STATUS_AVAILABLE); 2726 TP_STATUS_AVAILABLE);
@@ -2561,6 +2734,11 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2561 } 2734 }
2562 } 2735 }
2563 2736
2737 if (po->has_vnet_hdr && packet_snd_vnet_gso(skb, vnet_hdr)) {
2738 tp_len = -EINVAL;
2739 goto tpacket_error;
2740 }
2741
2564 packet_pick_tx_queue(dev, skb); 2742 packet_pick_tx_queue(dev, skb);
2565 2743
2566 skb->destructor = tpacket_destruct_skb; 2744 skb->destructor = tpacket_destruct_skb;
@@ -2643,12 +2821,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2643 struct sockcm_cookie sockc; 2821 struct sockcm_cookie sockc;
2644 struct virtio_net_hdr vnet_hdr = { 0 }; 2822 struct virtio_net_hdr vnet_hdr = { 0 };
2645 int offset = 0; 2823 int offset = 0;
2646 int vnet_hdr_len;
2647 struct packet_sock *po = pkt_sk(sk); 2824 struct packet_sock *po = pkt_sk(sk);
2648 unsigned short gso_type = 0;
2649 int hlen, tlen; 2825 int hlen, tlen;
2650 int extra_len = 0; 2826 int extra_len = 0;
2651 ssize_t n;
2652 2827
2653 /* 2828 /*
2654 * Get and verify the address. 2829 * Get and verify the address.
@@ -2686,53 +2861,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2686 if (sock->type == SOCK_RAW) 2861 if (sock->type == SOCK_RAW)
2687 reserve = dev->hard_header_len; 2862 reserve = dev->hard_header_len;
2688 if (po->has_vnet_hdr) { 2863 if (po->has_vnet_hdr) {
2689 vnet_hdr_len = sizeof(vnet_hdr); 2864 err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
2690 2865 if (err)
2691 err = -EINVAL;
2692 if (len < vnet_hdr_len)
2693 goto out_unlock;
2694
2695 len -= vnet_hdr_len;
2696
2697 err = -EFAULT;
2698 n = copy_from_iter(&vnet_hdr, vnet_hdr_len, &msg->msg_iter);
2699 if (n != vnet_hdr_len)
2700 goto out_unlock;
2701
2702 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
2703 (__virtio16_to_cpu(vio_le(), vnet_hdr.csum_start) +
2704 __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset) + 2 >
2705 __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len)))
2706 vnet_hdr.hdr_len = __cpu_to_virtio16(vio_le(),
2707 __virtio16_to_cpu(vio_le(), vnet_hdr.csum_start) +
2708 __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset) + 2);
2709
2710 err = -EINVAL;
2711 if (__virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len) > len)
2712 goto out_unlock; 2866 goto out_unlock;
2713
2714 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
2715 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2716 case VIRTIO_NET_HDR_GSO_TCPV4:
2717 gso_type = SKB_GSO_TCPV4;
2718 break;
2719 case VIRTIO_NET_HDR_GSO_TCPV6:
2720 gso_type = SKB_GSO_TCPV6;
2721 break;
2722 case VIRTIO_NET_HDR_GSO_UDP:
2723 gso_type = SKB_GSO_UDP;
2724 break;
2725 default:
2726 goto out_unlock;
2727 }
2728
2729 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
2730 gso_type |= SKB_GSO_TCP_ECN;
2731
2732 if (vnet_hdr.gso_size == 0)
2733 goto out_unlock;
2734
2735 }
2736 } 2867 }
2737 2868
2738 if (unlikely(sock_flag(sk, SOCK_NOFCS))) { 2869 if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
@@ -2744,7 +2875,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2744 } 2875 }
2745 2876
2746 err = -EMSGSIZE; 2877 err = -EMSGSIZE;
2747 if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN + extra_len)) 2878 if (!vnet_hdr.gso_type &&
2879 (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
2748 goto out_unlock; 2880 goto out_unlock;
2749 2881
2750 err = -ENOBUFS; 2882 err = -ENOBUFS;
@@ -2763,9 +2895,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2763 offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len); 2895 offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
2764 if (unlikely(offset < 0)) 2896 if (unlikely(offset < 0))
2765 goto out_free; 2897 goto out_free;
2766 } else {
2767 if (ll_header_truncated(dev, len))
2768 goto out_free;
2769 } 2898 }
2770 2899
2771 /* Returns -EFAULT on error */ 2900 /* Returns -EFAULT on error */
@@ -2773,9 +2902,15 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2773 if (err) 2902 if (err)
2774 goto out_free; 2903 goto out_free;
2775 2904
2905 if (sock->type == SOCK_RAW &&
2906 !dev_validate_header(dev, skb->data, len)) {
2907 err = -EINVAL;
2908 goto out_free;
2909 }
2910
2776 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); 2911 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
2777 2912
2778 if (!gso_type && (len > dev->mtu + reserve + extra_len) && 2913 if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
2779 !packet_extra_vlan_len_allowed(dev, skb)) { 2914 !packet_extra_vlan_len_allowed(dev, skb)) {
2780 err = -EMSGSIZE; 2915 err = -EMSGSIZE;
2781 goto out_free; 2916 goto out_free;
@@ -2789,24 +2924,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2789 packet_pick_tx_queue(dev, skb); 2924 packet_pick_tx_queue(dev, skb);
2790 2925
2791 if (po->has_vnet_hdr) { 2926 if (po->has_vnet_hdr) {
2792 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2927 err = packet_snd_vnet_gso(skb, &vnet_hdr);
2793 u16 s = __virtio16_to_cpu(vio_le(), vnet_hdr.csum_start); 2928 if (err)
2794 u16 o = __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset); 2929 goto out_free;
2795 if (!skb_partial_csum_set(skb, s, o)) { 2930 len += sizeof(vnet_hdr);
2796 err = -EINVAL;
2797 goto out_free;
2798 }
2799 }
2800
2801 skb_shinfo(skb)->gso_size =
2802 __virtio16_to_cpu(vio_le(), vnet_hdr.gso_size);
2803 skb_shinfo(skb)->gso_type = gso_type;
2804
2805 /* Header must be checked, and gso_segs computed. */
2806 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
2807 skb_shinfo(skb)->gso_segs = 0;
2808
2809 len += vnet_hdr_len;
2810 } 2931 }
2811 2932
2812 skb_probe_transport_header(skb, reserve); 2933 skb_probe_transport_header(skb, reserve);
@@ -3177,51 +3298,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
3177 packet_rcv_has_room(pkt_sk(sk), NULL); 3298 packet_rcv_has_room(pkt_sk(sk), NULL);
3178 3299
3179 if (pkt_sk(sk)->has_vnet_hdr) { 3300 if (pkt_sk(sk)->has_vnet_hdr) {
3180 struct virtio_net_hdr vnet_hdr = { 0 }; 3301 err = packet_rcv_vnet(msg, skb, &len);
3181 3302 if (err)
3182 err = -EINVAL;
3183 vnet_hdr_len = sizeof(vnet_hdr);
3184 if (len < vnet_hdr_len)
3185 goto out_free;
3186
3187 len -= vnet_hdr_len;
3188
3189 if (skb_is_gso(skb)) {
3190 struct skb_shared_info *sinfo = skb_shinfo(skb);
3191
3192 /* This is a hint as to how much should be linear. */
3193 vnet_hdr.hdr_len =
3194 __cpu_to_virtio16(vio_le(), skb_headlen(skb));
3195 vnet_hdr.gso_size =
3196 __cpu_to_virtio16(vio_le(), sinfo->gso_size);
3197 if (sinfo->gso_type & SKB_GSO_TCPV4)
3198 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
3199 else if (sinfo->gso_type & SKB_GSO_TCPV6)
3200 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
3201 else if (sinfo->gso_type & SKB_GSO_UDP)
3202 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
3203 else if (sinfo->gso_type & SKB_GSO_FCOE)
3204 goto out_free;
3205 else
3206 BUG();
3207 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
3208 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
3209 } else
3210 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
3211
3212 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3213 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
3214 vnet_hdr.csum_start = __cpu_to_virtio16(vio_le(),
3215 skb_checksum_start_offset(skb));
3216 vnet_hdr.csum_offset = __cpu_to_virtio16(vio_le(),
3217 skb->csum_offset);
3218 } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
3219 vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID;
3220 } /* else everything is zero */
3221
3222 err = memcpy_to_msg(msg, (void *)&vnet_hdr, vnet_hdr_len);
3223 if (err < 0)
3224 goto out_free; 3303 goto out_free;
3304 vnet_hdr_len = sizeof(struct virtio_net_hdr);
3225 } 3305 }
3226 3306
3227 /* You lose any data beyond the buffer you gave. If it worries 3307 /* You lose any data beyond the buffer you gave. If it worries
@@ -3441,6 +3521,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
3441 i->ifindex = mreq->mr_ifindex; 3521 i->ifindex = mreq->mr_ifindex;
3442 i->alen = mreq->mr_alen; 3522 i->alen = mreq->mr_alen;
3443 memcpy(i->addr, mreq->mr_address, i->alen); 3523 memcpy(i->addr, mreq->mr_address, i->alen);
3524 memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen);
3444 i->count = 1; 3525 i->count = 1;
3445 i->next = po->mclist; 3526 i->next = po->mclist;
3446 po->mclist = i; 3527 po->mclist = i;
@@ -3552,8 +3633,6 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
3552 } 3633 }
3553 if (optlen < len) 3634 if (optlen < len)
3554 return -EINVAL; 3635 return -EINVAL;
3555 if (pkt_sk(sk)->has_vnet_hdr)
3556 return -EINVAL;
3557 if (copy_from_user(&req_u.req, optval, len)) 3636 if (copy_from_user(&req_u.req, optval, len))
3558 return -EFAULT; 3637 return -EFAULT;
3559 return packet_set_ring(sk, &req_u, 0, 3638 return packet_set_ring(sk, &req_u, 0,
@@ -4073,7 +4152,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4073 4152
4074 /* Opening a Tx-ring is NOT supported in TPACKET_V3 */ 4153 /* Opening a Tx-ring is NOT supported in TPACKET_V3 */
4075 if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) { 4154 if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) {
4076 WARN(1, "Tx-ring is not supported.\n"); 4155 net_warn_ratelimited("Tx-ring is not supported.\n");
4077 goto out; 4156 goto out;
4078 } 4157 }
4079 4158
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index d575ef4e9aa6..ffd5f2297584 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -140,13 +140,15 @@ void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb)
140 rcu_read_unlock(); 140 rcu_read_unlock();
141} 141}
142 142
143void pn_sock_hash(struct sock *sk) 143int pn_sock_hash(struct sock *sk)
144{ 144{
145 struct hlist_head *hlist = pn_hash_list(pn_sk(sk)->sobject); 145 struct hlist_head *hlist = pn_hash_list(pn_sk(sk)->sobject);
146 146
147 mutex_lock(&pnsocks.lock); 147 mutex_lock(&pnsocks.lock);
148 sk_add_node_rcu(sk, hlist); 148 sk_add_node_rcu(sk, hlist);
149 mutex_unlock(&pnsocks.lock); 149 mutex_unlock(&pnsocks.lock);
150
151 return 0;
150} 152}
151EXPORT_SYMBOL(pn_sock_hash); 153EXPORT_SYMBOL(pn_sock_hash);
152 154
@@ -200,7 +202,7 @@ static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len)
200 pn->resource = spn->spn_resource; 202 pn->resource = spn->spn_resource;
201 203
202 /* Enable RX on the socket */ 204 /* Enable RX on the socket */
203 sk->sk_prot->hash(sk); 205 err = sk->sk_prot->hash(sk);
204out_port: 206out_port:
205 mutex_unlock(&port_mutex); 207 mutex_unlock(&port_mutex);
206out: 208out:
diff --git a/net/rds/Kconfig b/net/rds/Kconfig
index f2c670ba7b9b..bffde4b46c5d 100644
--- a/net/rds/Kconfig
+++ b/net/rds/Kconfig
@@ -4,14 +4,13 @@ config RDS
4 depends on INET 4 depends on INET
5 ---help--- 5 ---help---
6 The RDS (Reliable Datagram Sockets) protocol provides reliable, 6 The RDS (Reliable Datagram Sockets) protocol provides reliable,
7 sequenced delivery of datagrams over Infiniband, iWARP, 7 sequenced delivery of datagrams over Infiniband or TCP.
8 or TCP.
9 8
10config RDS_RDMA 9config RDS_RDMA
11 tristate "RDS over Infiniband and iWARP" 10 tristate "RDS over Infiniband"
12 depends on RDS && INFINIBAND && INFINIBAND_ADDR_TRANS 11 depends on RDS && INFINIBAND && INFINIBAND_ADDR_TRANS
13 ---help--- 12 ---help---
14 Allow RDS to use Infiniband and iWARP as a transport. 13 Allow RDS to use Infiniband as a transport.
15 This transport supports RDMA operations. 14 This transport supports RDMA operations.
16 15
17config RDS_TCP 16config RDS_TCP
diff --git a/net/rds/Makefile b/net/rds/Makefile
index 56d3f6023ced..0e72bec1529f 100644
--- a/net/rds/Makefile
+++ b/net/rds/Makefile
@@ -6,9 +6,7 @@ rds-y := af_rds.o bind.o cong.o connection.o info.o message.o \
6obj-$(CONFIG_RDS_RDMA) += rds_rdma.o 6obj-$(CONFIG_RDS_RDMA) += rds_rdma.o
7rds_rdma-y := rdma_transport.o \ 7rds_rdma-y := rdma_transport.o \
8 ib.o ib_cm.o ib_recv.o ib_ring.o ib_send.o ib_stats.o \ 8 ib.o ib_cm.o ib_recv.o ib_ring.o ib_send.o ib_stats.o \
9 ib_sysctl.o ib_rdma.o \ 9 ib_sysctl.o ib_rdma.o ib_fmr.o ib_frmr.o
10 iw.o iw_cm.o iw_recv.o iw_ring.o iw_send.o iw_stats.o \
11 iw_sysctl.o iw_rdma.o
12 10
13 11
14obj-$(CONFIG_RDS_TCP) += rds_tcp.o 12obj-$(CONFIG_RDS_TCP) += rds_tcp.o
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index b5476aebd68d..6beaeb1138f3 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -277,6 +277,27 @@ static int rds_set_transport(struct rds_sock *rs, char __user *optval,
277 return rs->rs_transport ? 0 : -ENOPROTOOPT; 277 return rs->rs_transport ? 0 : -ENOPROTOOPT;
278} 278}
279 279
280static int rds_enable_recvtstamp(struct sock *sk, char __user *optval,
281 int optlen)
282{
283 int val, valbool;
284
285 if (optlen != sizeof(int))
286 return -EFAULT;
287
288 if (get_user(val, (int __user *)optval))
289 return -EFAULT;
290
291 valbool = val ? 1 : 0;
292
293 if (valbool)
294 sock_set_flag(sk, SOCK_RCVTSTAMP);
295 else
296 sock_reset_flag(sk, SOCK_RCVTSTAMP);
297
298 return 0;
299}
300
280static int rds_setsockopt(struct socket *sock, int level, int optname, 301static int rds_setsockopt(struct socket *sock, int level, int optname,
281 char __user *optval, unsigned int optlen) 302 char __user *optval, unsigned int optlen)
282{ 303{
@@ -312,6 +333,11 @@ static int rds_setsockopt(struct socket *sock, int level, int optname,
312 ret = rds_set_transport(rs, optval, optlen); 333 ret = rds_set_transport(rs, optval, optlen);
313 release_sock(sock->sk); 334 release_sock(sock->sk);
314 break; 335 break;
336 case SO_TIMESTAMP:
337 lock_sock(sock->sk);
338 ret = rds_enable_recvtstamp(sock->sk, optval, optlen);
339 release_sock(sock->sk);
340 break;
315 default: 341 default:
316 ret = -ENOPROTOOPT; 342 ret = -ENOPROTOOPT;
317 } 343 }
diff --git a/net/rds/cong.c b/net/rds/cong.c
index e6144b8246fd..6641bcf7c185 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -299,7 +299,7 @@ void rds_cong_set_bit(struct rds_cong_map *map, __be16 port)
299 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; 299 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
300 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; 300 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
301 301
302 __set_bit_le(off, (void *)map->m_page_addrs[i]); 302 set_bit_le(off, (void *)map->m_page_addrs[i]);
303} 303}
304 304
305void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port) 305void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
@@ -313,7 +313,7 @@ void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
313 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; 313 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
314 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; 314 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
315 315
316 __clear_bit_le(off, (void *)map->m_page_addrs[i]); 316 clear_bit_le(off, (void *)map->m_page_addrs[i]);
317} 317}
318 318
319static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port) 319static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port)
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 9481d55ff6cb..b5342fddaf98 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -42,15 +42,16 @@
42 42
43#include "rds.h" 43#include "rds.h"
44#include "ib.h" 44#include "ib.h"
45#include "ib_mr.h"
45 46
46unsigned int rds_ib_fmr_1m_pool_size = RDS_FMR_1M_POOL_SIZE; 47unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE;
47unsigned int rds_ib_fmr_8k_pool_size = RDS_FMR_8K_POOL_SIZE; 48unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE;
48unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT; 49unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT;
49 50
50module_param(rds_ib_fmr_1m_pool_size, int, 0444); 51module_param(rds_ib_mr_1m_pool_size, int, 0444);
51MODULE_PARM_DESC(rds_ib_fmr_1m_pool_size, " Max number of 1M fmr per HCA"); 52MODULE_PARM_DESC(rds_ib_mr_1m_pool_size, " Max number of 1M mr per HCA");
52module_param(rds_ib_fmr_8k_pool_size, int, 0444); 53module_param(rds_ib_mr_8k_pool_size, int, 0444);
53MODULE_PARM_DESC(rds_ib_fmr_8k_pool_size, " Max number of 8K fmr per HCA"); 54MODULE_PARM_DESC(rds_ib_mr_8k_pool_size, " Max number of 8K mr per HCA");
54module_param(rds_ib_retry_count, int, 0444); 55module_param(rds_ib_retry_count, int, 0444);
55MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error"); 56MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error");
56 57
@@ -139,14 +140,20 @@ static void rds_ib_add_one(struct ib_device *device)
139 rds_ibdev->max_wrs = device->attrs.max_qp_wr; 140 rds_ibdev->max_wrs = device->attrs.max_qp_wr;
140 rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE); 141 rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE);
141 142
143 rds_ibdev->has_fr = (device->attrs.device_cap_flags &
144 IB_DEVICE_MEM_MGT_EXTENSIONS);
145 rds_ibdev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
146 device->map_phys_fmr && device->unmap_fmr);
147 rds_ibdev->use_fastreg = (rds_ibdev->has_fr && !rds_ibdev->has_fmr);
148
142 rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32; 149 rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
143 rds_ibdev->max_1m_fmrs = device->attrs.max_mr ? 150 rds_ibdev->max_1m_mrs = device->attrs.max_mr ?
144 min_t(unsigned int, (device->attrs.max_mr / 2), 151 min_t(unsigned int, (device->attrs.max_mr / 2),
145 rds_ib_fmr_1m_pool_size) : rds_ib_fmr_1m_pool_size; 152 rds_ib_mr_1m_pool_size) : rds_ib_mr_1m_pool_size;
146 153
147 rds_ibdev->max_8k_fmrs = device->attrs.max_mr ? 154 rds_ibdev->max_8k_mrs = device->attrs.max_mr ?
148 min_t(unsigned int, ((device->attrs.max_mr / 2) * RDS_MR_8K_SCALE), 155 min_t(unsigned int, ((device->attrs.max_mr / 2) * RDS_MR_8K_SCALE),
149 rds_ib_fmr_8k_pool_size) : rds_ib_fmr_8k_pool_size; 156 rds_ib_mr_8k_pool_size) : rds_ib_mr_8k_pool_size;
150 157
151 rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom; 158 rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom;
152 rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom; 159 rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom;
@@ -172,10 +179,14 @@ static void rds_ib_add_one(struct ib_device *device)
172 goto put_dev; 179 goto put_dev;
173 } 180 }
174 181
175 rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_fmrs = %d, max_8k_fmrs = %d\n", 182 rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_mrs = %d, max_8k_mrs = %d\n",
176 device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge, 183 device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
177 rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_fmrs, 184 rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_mrs,
178 rds_ibdev->max_8k_fmrs); 185 rds_ibdev->max_8k_mrs);
186
187 pr_info("RDS/IB: %s: %s supported and preferred\n",
188 device->name,
189 rds_ibdev->use_fastreg ? "FRMR" : "FMR");
179 190
180 INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); 191 INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
181 INIT_LIST_HEAD(&rds_ibdev->conn_list); 192 INIT_LIST_HEAD(&rds_ibdev->conn_list);
@@ -364,7 +375,7 @@ void rds_ib_exit(void)
364 rds_ib_sysctl_exit(); 375 rds_ib_sysctl_exit();
365 rds_ib_recv_exit(); 376 rds_ib_recv_exit();
366 rds_trans_unregister(&rds_ib_transport); 377 rds_trans_unregister(&rds_ib_transport);
367 rds_ib_fmr_exit(); 378 rds_ib_mr_exit();
368} 379}
369 380
370struct rds_transport rds_ib_transport = { 381struct rds_transport rds_ib_transport = {
@@ -400,13 +411,13 @@ int rds_ib_init(void)
400 411
401 INIT_LIST_HEAD(&rds_ib_devices); 412 INIT_LIST_HEAD(&rds_ib_devices);
402 413
403 ret = rds_ib_fmr_init(); 414 ret = rds_ib_mr_init();
404 if (ret) 415 if (ret)
405 goto out; 416 goto out;
406 417
407 ret = ib_register_client(&rds_ib_client); 418 ret = ib_register_client(&rds_ib_client);
408 if (ret) 419 if (ret)
409 goto out_fmr_exit; 420 goto out_mr_exit;
410 421
411 ret = rds_ib_sysctl_init(); 422 ret = rds_ib_sysctl_init();
412 if (ret) 423 if (ret)
@@ -430,8 +441,8 @@ out_sysctl:
430 rds_ib_sysctl_exit(); 441 rds_ib_sysctl_exit();
431out_ibreg: 442out_ibreg:
432 rds_ib_unregister_client(); 443 rds_ib_unregister_client();
433out_fmr_exit: 444out_mr_exit:
434 rds_ib_fmr_exit(); 445 rds_ib_mr_exit();
435out: 446out:
436 return ret; 447 return ret;
437} 448}
diff --git a/net/rds/ib.h b/net/rds/ib.h
index b3fdebb57460..627fb79aee65 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -9,17 +9,12 @@
9#include "rds.h" 9#include "rds.h"
10#include "rdma_transport.h" 10#include "rdma_transport.h"
11 11
12#define RDS_FMR_1M_POOL_SIZE (8192 / 2)
13#define RDS_FMR_1M_MSG_SIZE 256
14#define RDS_FMR_8K_MSG_SIZE 2
15#define RDS_MR_8K_SCALE (256 / (RDS_FMR_8K_MSG_SIZE + 1))
16#define RDS_FMR_8K_POOL_SIZE (RDS_MR_8K_SCALE * (8192 / 2))
17
18#define RDS_IB_MAX_SGE 8 12#define RDS_IB_MAX_SGE 8
19#define RDS_IB_RECV_SGE 2 13#define RDS_IB_RECV_SGE 2
20 14
21#define RDS_IB_DEFAULT_RECV_WR 1024 15#define RDS_IB_DEFAULT_RECV_WR 1024
22#define RDS_IB_DEFAULT_SEND_WR 256 16#define RDS_IB_DEFAULT_SEND_WR 256
17#define RDS_IB_DEFAULT_FR_WR 512
23 18
24#define RDS_IB_DEFAULT_RETRY_COUNT 2 19#define RDS_IB_DEFAULT_RETRY_COUNT 2
25 20
@@ -28,7 +23,6 @@
28#define RDS_IB_RECYCLE_BATCH_COUNT 32 23#define RDS_IB_RECYCLE_BATCH_COUNT 32
29 24
30#define RDS_IB_WC_MAX 32 25#define RDS_IB_WC_MAX 32
31#define RDS_IB_SEND_OP BIT_ULL(63)
32 26
33extern struct rw_semaphore rds_ib_devices_lock; 27extern struct rw_semaphore rds_ib_devices_lock;
34extern struct list_head rds_ib_devices; 28extern struct list_head rds_ib_devices;
@@ -129,6 +123,9 @@ struct rds_ib_connection {
129 struct ib_wc i_send_wc[RDS_IB_WC_MAX]; 123 struct ib_wc i_send_wc[RDS_IB_WC_MAX];
130 struct ib_wc i_recv_wc[RDS_IB_WC_MAX]; 124 struct ib_wc i_recv_wc[RDS_IB_WC_MAX];
131 125
126 /* To control the number of wrs from fastreg */
127 atomic_t i_fastreg_wrs;
128
132 /* interrupt handling */ 129 /* interrupt handling */
133 struct tasklet_struct i_send_tasklet; 130 struct tasklet_struct i_send_tasklet;
134 struct tasklet_struct i_recv_tasklet; 131 struct tasklet_struct i_recv_tasklet;
@@ -207,12 +204,16 @@ struct rds_ib_device {
207 struct list_head conn_list; 204 struct list_head conn_list;
208 struct ib_device *dev; 205 struct ib_device *dev;
209 struct ib_pd *pd; 206 struct ib_pd *pd;
210 unsigned int max_fmrs; 207 bool has_fmr;
208 bool has_fr;
209 bool use_fastreg;
210
211 unsigned int max_mrs;
211 struct rds_ib_mr_pool *mr_1m_pool; 212 struct rds_ib_mr_pool *mr_1m_pool;
212 struct rds_ib_mr_pool *mr_8k_pool; 213 struct rds_ib_mr_pool *mr_8k_pool;
213 unsigned int fmr_max_remaps; 214 unsigned int fmr_max_remaps;
214 unsigned int max_8k_fmrs; 215 unsigned int max_8k_mrs;
215 unsigned int max_1m_fmrs; 216 unsigned int max_1m_mrs;
216 int max_sge; 217 int max_sge;
217 unsigned int max_wrs; 218 unsigned int max_wrs;
218 unsigned int max_initiator_depth; 219 unsigned int max_initiator_depth;
@@ -266,6 +267,8 @@ struct rds_ib_statistics {
266 uint64_t s_ib_rdma_mr_1m_pool_flush; 267 uint64_t s_ib_rdma_mr_1m_pool_flush;
267 uint64_t s_ib_rdma_mr_1m_pool_wait; 268 uint64_t s_ib_rdma_mr_1m_pool_wait;
268 uint64_t s_ib_rdma_mr_1m_pool_depleted; 269 uint64_t s_ib_rdma_mr_1m_pool_depleted;
270 uint64_t s_ib_rdma_mr_8k_reused;
271 uint64_t s_ib_rdma_mr_1m_reused;
269 uint64_t s_ib_atomic_cswp; 272 uint64_t s_ib_atomic_cswp;
270 uint64_t s_ib_atomic_fadd; 273 uint64_t s_ib_atomic_fadd;
271}; 274};
@@ -317,8 +320,6 @@ struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device);
317void rds_ib_dev_put(struct rds_ib_device *rds_ibdev); 320void rds_ib_dev_put(struct rds_ib_device *rds_ibdev);
318extern struct ib_client rds_ib_client; 321extern struct ib_client rds_ib_client;
319 322
320extern unsigned int rds_ib_fmr_1m_pool_size;
321extern unsigned int rds_ib_fmr_8k_pool_size;
322extern unsigned int rds_ib_retry_count; 323extern unsigned int rds_ib_retry_count;
323 324
324extern spinlock_t ib_nodev_conns_lock; 325extern spinlock_t ib_nodev_conns_lock;
@@ -348,17 +349,7 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
348void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); 349void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
349void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); 350void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
350void rds_ib_destroy_nodev_conns(void); 351void rds_ib_destroy_nodev_conns(void);
351struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev, 352void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc);
352 int npages);
353void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
354void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
355void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
356 struct rds_sock *rs, u32 *key_ret);
357void rds_ib_sync_mr(void *trans_private, int dir);
358void rds_ib_free_mr(void *trans_private, int invalidate);
359void rds_ib_flush_mrs(void);
360int rds_ib_fmr_init(void);
361void rds_ib_fmr_exit(void);
362 353
363/* ib_recv.c */ 354/* ib_recv.c */
364int rds_ib_recv_init(void); 355int rds_ib_recv_init(void);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index da5a7fb98c77..310cabce2311 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -194,7 +194,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
194 dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version); 194 dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version);
195 dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version); 195 dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version);
196 dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); 196 dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
197 dp->dp_ack_seq = rds_ib_piggyb_ack(ic); 197 dp->dp_ack_seq = cpu_to_be64(rds_ib_piggyb_ack(ic));
198 198
199 /* Advertise flow control */ 199 /* Advertise flow control */
200 if (ic->i_flowctl) { 200 if (ic->i_flowctl) {
@@ -236,12 +236,10 @@ static void rds_ib_cq_comp_handler_recv(struct ib_cq *cq, void *context)
236 tasklet_schedule(&ic->i_recv_tasklet); 236 tasklet_schedule(&ic->i_recv_tasklet);
237} 237}
238 238
239static void poll_cq(struct rds_ib_connection *ic, struct ib_cq *cq, 239static void poll_scq(struct rds_ib_connection *ic, struct ib_cq *cq,
240 struct ib_wc *wcs, 240 struct ib_wc *wcs)
241 struct rds_ib_ack_state *ack_state)
242{ 241{
243 int nr; 242 int nr, i;
244 int i;
245 struct ib_wc *wc; 243 struct ib_wc *wc;
246 244
247 while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) { 245 while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) {
@@ -251,10 +249,12 @@ static void poll_cq(struct rds_ib_connection *ic, struct ib_cq *cq,
251 (unsigned long long)wc->wr_id, wc->status, 249 (unsigned long long)wc->wr_id, wc->status,
252 wc->byte_len, be32_to_cpu(wc->ex.imm_data)); 250 wc->byte_len, be32_to_cpu(wc->ex.imm_data));
253 251
254 if (wc->wr_id & RDS_IB_SEND_OP) 252 if (wc->wr_id <= ic->i_send_ring.w_nr ||
253 wc->wr_id == RDS_IB_ACK_WR_ID)
255 rds_ib_send_cqe_handler(ic, wc); 254 rds_ib_send_cqe_handler(ic, wc);
256 else 255 else
257 rds_ib_recv_cqe_handler(ic, wc, ack_state); 256 rds_ib_mr_cqe_handler(ic, wc);
257
258 } 258 }
259 } 259 }
260} 260}
@@ -263,14 +263,12 @@ static void rds_ib_tasklet_fn_send(unsigned long data)
263{ 263{
264 struct rds_ib_connection *ic = (struct rds_ib_connection *)data; 264 struct rds_ib_connection *ic = (struct rds_ib_connection *)data;
265 struct rds_connection *conn = ic->conn; 265 struct rds_connection *conn = ic->conn;
266 struct rds_ib_ack_state state;
267 266
268 rds_ib_stats_inc(s_ib_tasklet_call); 267 rds_ib_stats_inc(s_ib_tasklet_call);
269 268
270 memset(&state, 0, sizeof(state)); 269 poll_scq(ic, ic->i_send_cq, ic->i_send_wc);
271 poll_cq(ic, ic->i_send_cq, ic->i_send_wc, &state);
272 ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); 270 ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
273 poll_cq(ic, ic->i_send_cq, ic->i_send_wc, &state); 271 poll_scq(ic, ic->i_send_cq, ic->i_send_wc);
274 272
275 if (rds_conn_up(conn) && 273 if (rds_conn_up(conn) &&
276 (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) || 274 (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
@@ -278,6 +276,25 @@ static void rds_ib_tasklet_fn_send(unsigned long data)
278 rds_send_xmit(ic->conn); 276 rds_send_xmit(ic->conn);
279} 277}
280 278
279static void poll_rcq(struct rds_ib_connection *ic, struct ib_cq *cq,
280 struct ib_wc *wcs,
281 struct rds_ib_ack_state *ack_state)
282{
283 int nr, i;
284 struct ib_wc *wc;
285
286 while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) {
287 for (i = 0; i < nr; i++) {
288 wc = wcs + i;
289 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
290 (unsigned long long)wc->wr_id, wc->status,
291 wc->byte_len, be32_to_cpu(wc->ex.imm_data));
292
293 rds_ib_recv_cqe_handler(ic, wc, ack_state);
294 }
295 }
296}
297
281static void rds_ib_tasklet_fn_recv(unsigned long data) 298static void rds_ib_tasklet_fn_recv(unsigned long data)
282{ 299{
283 struct rds_ib_connection *ic = (struct rds_ib_connection *)data; 300 struct rds_ib_connection *ic = (struct rds_ib_connection *)data;
@@ -291,9 +308,9 @@ static void rds_ib_tasklet_fn_recv(unsigned long data)
291 rds_ib_stats_inc(s_ib_tasklet_call); 308 rds_ib_stats_inc(s_ib_tasklet_call);
292 309
293 memset(&state, 0, sizeof(state)); 310 memset(&state, 0, sizeof(state));
294 poll_cq(ic, ic->i_recv_cq, ic->i_recv_wc, &state); 311 poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state);
295 ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); 312 ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
296 poll_cq(ic, ic->i_recv_cq, ic->i_recv_wc, &state); 313 poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state);
297 314
298 if (state.ack_next_valid) 315 if (state.ack_next_valid)
299 rds_ib_set_ack(ic, state.ack_next, state.ack_required); 316 rds_ib_set_ack(ic, state.ack_next, state.ack_required);
@@ -351,7 +368,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
351 struct ib_qp_init_attr attr; 368 struct ib_qp_init_attr attr;
352 struct ib_cq_init_attr cq_attr = {}; 369 struct ib_cq_init_attr cq_attr = {};
353 struct rds_ib_device *rds_ibdev; 370 struct rds_ib_device *rds_ibdev;
354 int ret; 371 int ret, fr_queue_space;
355 372
356 /* 373 /*
357 * It's normal to see a null device if an incoming connection races 374 * It's normal to see a null device if an incoming connection races
@@ -361,6 +378,12 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
361 if (!rds_ibdev) 378 if (!rds_ibdev)
362 return -EOPNOTSUPP; 379 return -EOPNOTSUPP;
363 380
381 /* The fr_queue_space is currently set to 512, to add extra space on
382 * completion queue and send queue. This extra space is used for FRMR
383 * registration and invalidation work requests
384 */
385 fr_queue_space = (rds_ibdev->use_fastreg ? RDS_IB_DEFAULT_FR_WR : 0);
386
364 /* add the conn now so that connection establishment has the dev */ 387 /* add the conn now so that connection establishment has the dev */
365 rds_ib_add_conn(rds_ibdev, conn); 388 rds_ib_add_conn(rds_ibdev, conn);
366 389
@@ -372,7 +395,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
372 /* Protection domain and memory range */ 395 /* Protection domain and memory range */
373 ic->i_pd = rds_ibdev->pd; 396 ic->i_pd = rds_ibdev->pd;
374 397
375 cq_attr.cqe = ic->i_send_ring.w_nr + 1; 398 cq_attr.cqe = ic->i_send_ring.w_nr + fr_queue_space + 1;
376 399
377 ic->i_send_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_send, 400 ic->i_send_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_send,
378 rds_ib_cq_event_handler, conn, 401 rds_ib_cq_event_handler, conn,
@@ -412,7 +435,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
412 attr.event_handler = rds_ib_qp_event_handler; 435 attr.event_handler = rds_ib_qp_event_handler;
413 attr.qp_context = conn; 436 attr.qp_context = conn;
414 /* + 1 to allow for the single ack message */ 437 /* + 1 to allow for the single ack message */
415 attr.cap.max_send_wr = ic->i_send_ring.w_nr + 1; 438 attr.cap.max_send_wr = ic->i_send_ring.w_nr + fr_queue_space + 1;
416 attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1; 439 attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1;
417 attr.cap.max_send_sge = rds_ibdev->max_sge; 440 attr.cap.max_send_sge = rds_ibdev->max_sge;
418 attr.cap.max_recv_sge = RDS_IB_RECV_SGE; 441 attr.cap.max_recv_sge = RDS_IB_RECV_SGE;
@@ -420,6 +443,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
420 attr.qp_type = IB_QPT_RC; 443 attr.qp_type = IB_QPT_RC;
421 attr.send_cq = ic->i_send_cq; 444 attr.send_cq = ic->i_send_cq;
422 attr.recv_cq = ic->i_recv_cq; 445 attr.recv_cq = ic->i_recv_cq;
446 atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR);
423 447
424 /* 448 /*
425 * XXX this can fail if max_*_wr is too large? Are we supposed 449 * XXX this can fail if max_*_wr is too large? Are we supposed
@@ -739,7 +763,8 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
739 */ 763 */
740 wait_event(rds_ib_ring_empty_wait, 764 wait_event(rds_ib_ring_empty_wait,
741 rds_ib_ring_empty(&ic->i_recv_ring) && 765 rds_ib_ring_empty(&ic->i_recv_ring) &&
742 (atomic_read(&ic->i_signaled_sends) == 0)); 766 (atomic_read(&ic->i_signaled_sends) == 0) &&
767 (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR));
743 tasklet_kill(&ic->i_send_tasklet); 768 tasklet_kill(&ic->i_send_tasklet);
744 tasklet_kill(&ic->i_recv_tasklet); 769 tasklet_kill(&ic->i_recv_tasklet);
745 770
diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c
new file mode 100644
index 000000000000..4fe8f4fec4ee
--- /dev/null
+++ b/net/rds/ib_fmr.c
@@ -0,0 +1,248 @@
1/*
2 * Copyright (c) 2016 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include "ib_mr.h"
34
35struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages)
36{
37 struct rds_ib_mr_pool *pool;
38 struct rds_ib_mr *ibmr = NULL;
39 struct rds_ib_fmr *fmr;
40 int err = 0;
41
42 if (npages <= RDS_MR_8K_MSG_SIZE)
43 pool = rds_ibdev->mr_8k_pool;
44 else
45 pool = rds_ibdev->mr_1m_pool;
46
47 ibmr = rds_ib_try_reuse_ibmr(pool);
48 if (ibmr)
49 return ibmr;
50
51 ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL,
52 rdsibdev_to_node(rds_ibdev));
53 if (!ibmr) {
54 err = -ENOMEM;
55 goto out_no_cigar;
56 }
57
58 fmr = &ibmr->u.fmr;
59 fmr->fmr = ib_alloc_fmr(rds_ibdev->pd,
60 (IB_ACCESS_LOCAL_WRITE |
61 IB_ACCESS_REMOTE_READ |
62 IB_ACCESS_REMOTE_WRITE |
63 IB_ACCESS_REMOTE_ATOMIC),
64 &pool->fmr_attr);
65 if (IS_ERR(fmr->fmr)) {
66 err = PTR_ERR(fmr->fmr);
67 fmr->fmr = NULL;
68 pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, err);
69 goto out_no_cigar;
70 }
71
72 ibmr->pool = pool;
73 if (pool->pool_type == RDS_IB_MR_8K_POOL)
74 rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc);
75 else
76 rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc);
77
78 return ibmr;
79
80out_no_cigar:
81 if (ibmr) {
82 if (fmr->fmr)
83 ib_dealloc_fmr(fmr->fmr);
84 kfree(ibmr);
85 }
86 atomic_dec(&pool->item_count);
87 return ERR_PTR(err);
88}
89
90int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
91 struct scatterlist *sg, unsigned int nents)
92{
93 struct ib_device *dev = rds_ibdev->dev;
94 struct rds_ib_fmr *fmr = &ibmr->u.fmr;
95 struct scatterlist *scat = sg;
96 u64 io_addr = 0;
97 u64 *dma_pages;
98 u32 len;
99 int page_cnt, sg_dma_len;
100 int i, j;
101 int ret;
102
103 sg_dma_len = ib_dma_map_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
104 if (unlikely(!sg_dma_len)) {
105 pr_warn("RDS/IB: %s failed!\n", __func__);
106 return -EBUSY;
107 }
108
109 len = 0;
110 page_cnt = 0;
111
112 for (i = 0; i < sg_dma_len; ++i) {
113 unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
114 u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
115
116 if (dma_addr & ~PAGE_MASK) {
117 if (i > 0)
118 return -EINVAL;
119 else
120 ++page_cnt;
121 }
122 if ((dma_addr + dma_len) & ~PAGE_MASK) {
123 if (i < sg_dma_len - 1)
124 return -EINVAL;
125 else
126 ++page_cnt;
127 }
128
129 len += dma_len;
130 }
131
132 page_cnt += len >> PAGE_SHIFT;
133 if (page_cnt > ibmr->pool->fmr_attr.max_pages)
134 return -EINVAL;
135
136 dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
137 rdsibdev_to_node(rds_ibdev));
138 if (!dma_pages)
139 return -ENOMEM;
140
141 page_cnt = 0;
142 for (i = 0; i < sg_dma_len; ++i) {
143 unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
144 u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
145
146 for (j = 0; j < dma_len; j += PAGE_SIZE)
147 dma_pages[page_cnt++] =
148 (dma_addr & PAGE_MASK) + j;
149 }
150
151 ret = ib_map_phys_fmr(fmr->fmr, dma_pages, page_cnt, io_addr);
152 if (ret)
153 goto out;
154
155 /* Success - we successfully remapped the MR, so we can
156 * safely tear down the old mapping.
157 */
158 rds_ib_teardown_mr(ibmr);
159
160 ibmr->sg = scat;
161 ibmr->sg_len = nents;
162 ibmr->sg_dma_len = sg_dma_len;
163 ibmr->remap_count++;
164
165 if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
166 rds_ib_stats_inc(s_ib_rdma_mr_8k_used);
167 else
168 rds_ib_stats_inc(s_ib_rdma_mr_1m_used);
169 ret = 0;
170
171out:
172 kfree(dma_pages);
173
174 return ret;
175}
176
177struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *rds_ibdev,
178 struct scatterlist *sg,
179 unsigned long nents,
180 u32 *key)
181{
182 struct rds_ib_mr *ibmr = NULL;
183 struct rds_ib_fmr *fmr;
184 int ret;
185
186 ibmr = rds_ib_alloc_fmr(rds_ibdev, nents);
187 if (IS_ERR(ibmr))
188 return ibmr;
189
190 ibmr->device = rds_ibdev;
191 fmr = &ibmr->u.fmr;
192 ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents);
193 if (ret == 0)
194 *key = fmr->fmr->rkey;
195 else
196 rds_ib_free_mr(ibmr, 0);
197
198 return ibmr;
199}
200
201void rds_ib_unreg_fmr(struct list_head *list, unsigned int *nfreed,
202 unsigned long *unpinned, unsigned int goal)
203{
204 struct rds_ib_mr *ibmr, *next;
205 struct rds_ib_fmr *fmr;
206 LIST_HEAD(fmr_list);
207 int ret = 0;
208 unsigned int freed = *nfreed;
209
210 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
211 list_for_each_entry(ibmr, list, unmap_list) {
212 fmr = &ibmr->u.fmr;
213 list_add(&fmr->fmr->list, &fmr_list);
214 }
215
216 ret = ib_unmap_fmr(&fmr_list);
217 if (ret)
218 pr_warn("RDS/IB: FMR invalidation failed (err=%d)\n", ret);
219
220 /* Now we can destroy the DMA mapping and unpin any pages */
221 list_for_each_entry_safe(ibmr, next, list, unmap_list) {
222 fmr = &ibmr->u.fmr;
223 *unpinned += ibmr->sg_len;
224 __rds_ib_teardown_mr(ibmr);
225 if (freed < goal ||
226 ibmr->remap_count >= ibmr->pool->fmr_attr.max_maps) {
227 if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
228 rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
229 else
230 rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
231 list_del(&ibmr->unmap_list);
232 ib_dealloc_fmr(fmr->fmr);
233 kfree(ibmr);
234 freed++;
235 }
236 }
237 *nfreed = freed;
238}
239
240void rds_ib_free_fmr_list(struct rds_ib_mr *ibmr)
241{
242 struct rds_ib_mr_pool *pool = ibmr->pool;
243
244 if (ibmr->remap_count >= pool->fmr_attr.max_maps)
245 llist_add(&ibmr->llnode, &pool->drop_list);
246 else
247 llist_add(&ibmr->llnode, &pool->free_list);
248}
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
new file mode 100644
index 000000000000..93ff038ea9d1
--- /dev/null
+++ b/net/rds/ib_frmr.c
@@ -0,0 +1,376 @@
1/*
2 * Copyright (c) 2016 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include "ib_mr.h"
34
35static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
36 int npages)
37{
38 struct rds_ib_mr_pool *pool;
39 struct rds_ib_mr *ibmr = NULL;
40 struct rds_ib_frmr *frmr;
41 int err = 0;
42
43 if (npages <= RDS_MR_8K_MSG_SIZE)
44 pool = rds_ibdev->mr_8k_pool;
45 else
46 pool = rds_ibdev->mr_1m_pool;
47
48 ibmr = rds_ib_try_reuse_ibmr(pool);
49 if (ibmr)
50 return ibmr;
51
52 ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL,
53 rdsibdev_to_node(rds_ibdev));
54 if (!ibmr) {
55 err = -ENOMEM;
56 goto out_no_cigar;
57 }
58
59 frmr = &ibmr->u.frmr;
60 frmr->mr = ib_alloc_mr(rds_ibdev->pd, IB_MR_TYPE_MEM_REG,
61 pool->fmr_attr.max_pages);
62 if (IS_ERR(frmr->mr)) {
63 pr_warn("RDS/IB: %s failed to allocate MR", __func__);
64 goto out_no_cigar;
65 }
66
67 ibmr->pool = pool;
68 if (pool->pool_type == RDS_IB_MR_8K_POOL)
69 rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc);
70 else
71 rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc);
72
73 if (atomic_read(&pool->item_count) > pool->max_items_soft)
74 pool->max_items_soft = pool->max_items;
75
76 frmr->fr_state = FRMR_IS_FREE;
77 return ibmr;
78
79out_no_cigar:
80 kfree(ibmr);
81 atomic_dec(&pool->item_count);
82 return ERR_PTR(err);
83}
84
85static void rds_ib_free_frmr(struct rds_ib_mr *ibmr, bool drop)
86{
87 struct rds_ib_mr_pool *pool = ibmr->pool;
88
89 if (drop)
90 llist_add(&ibmr->llnode, &pool->drop_list);
91 else
92 llist_add(&ibmr->llnode, &pool->free_list);
93 atomic_add(ibmr->sg_len, &pool->free_pinned);
94 atomic_inc(&pool->dirty_count);
95
96 /* If we've pinned too many pages, request a flush */
97 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
98 atomic_read(&pool->dirty_count) >= pool->max_items / 5)
99 queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
100}
101
102static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
103{
104 struct rds_ib_frmr *frmr = &ibmr->u.frmr;
105 struct ib_send_wr *failed_wr;
106 struct ib_reg_wr reg_wr;
107 int ret;
108
109 while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) {
110 atomic_inc(&ibmr->ic->i_fastreg_wrs);
111 cpu_relax();
112 }
113
114 ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, PAGE_SIZE);
115 if (unlikely(ret != ibmr->sg_len))
116 return ret < 0 ? ret : -EINVAL;
117
118 /* Perform a WR for the fast_reg_mr. Each individual page
119 * in the sg list is added to the fast reg page list and placed
120 * inside the fast_reg_mr WR. The key used is a rolling 8bit
121 * counter, which should guarantee uniqueness.
122 */
123 ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++);
124 frmr->fr_state = FRMR_IS_INUSE;
125
126 memset(&reg_wr, 0, sizeof(reg_wr));
127 reg_wr.wr.wr_id = (unsigned long)(void *)ibmr;
128 reg_wr.wr.opcode = IB_WR_REG_MR;
129 reg_wr.wr.num_sge = 0;
130 reg_wr.mr = frmr->mr;
131 reg_wr.key = frmr->mr->rkey;
132 reg_wr.access = IB_ACCESS_LOCAL_WRITE |
133 IB_ACCESS_REMOTE_READ |
134 IB_ACCESS_REMOTE_WRITE;
135 reg_wr.wr.send_flags = IB_SEND_SIGNALED;
136
137 failed_wr = &reg_wr.wr;
138 ret = ib_post_send(ibmr->ic->i_cm_id->qp, &reg_wr.wr, &failed_wr);
139 WARN_ON(failed_wr != &reg_wr.wr);
140 if (unlikely(ret)) {
141 /* Failure here can be because of -ENOMEM as well */
142 frmr->fr_state = FRMR_IS_STALE;
143 atomic_inc(&ibmr->ic->i_fastreg_wrs);
144 if (printk_ratelimit())
145 pr_warn("RDS/IB: %s returned error(%d)\n",
146 __func__, ret);
147 }
148 return ret;
149}
150
151static int rds_ib_map_frmr(struct rds_ib_device *rds_ibdev,
152 struct rds_ib_mr_pool *pool,
153 struct rds_ib_mr *ibmr,
154 struct scatterlist *sg, unsigned int sg_len)
155{
156 struct ib_device *dev = rds_ibdev->dev;
157 struct rds_ib_frmr *frmr = &ibmr->u.frmr;
158 int i;
159 u32 len;
160 int ret = 0;
161
162 /* We want to teardown old ibmr values here and fill it up with
163 * new sg values
164 */
165 rds_ib_teardown_mr(ibmr);
166
167 ibmr->sg = sg;
168 ibmr->sg_len = sg_len;
169 ibmr->sg_dma_len = 0;
170 frmr->sg_byte_len = 0;
171 WARN_ON(ibmr->sg_dma_len);
172 ibmr->sg_dma_len = ib_dma_map_sg(dev, ibmr->sg, ibmr->sg_len,
173 DMA_BIDIRECTIONAL);
174 if (unlikely(!ibmr->sg_dma_len)) {
175 pr_warn("RDS/IB: %s failed!\n", __func__);
176 return -EBUSY;
177 }
178
179 frmr->sg_byte_len = 0;
180 frmr->dma_npages = 0;
181 len = 0;
182
183 ret = -EINVAL;
184 for (i = 0; i < ibmr->sg_dma_len; ++i) {
185 unsigned int dma_len = ib_sg_dma_len(dev, &ibmr->sg[i]);
186 u64 dma_addr = ib_sg_dma_address(dev, &ibmr->sg[i]);
187
188 frmr->sg_byte_len += dma_len;
189 if (dma_addr & ~PAGE_MASK) {
190 if (i > 0)
191 goto out_unmap;
192 else
193 ++frmr->dma_npages;
194 }
195
196 if ((dma_addr + dma_len) & ~PAGE_MASK) {
197 if (i < ibmr->sg_dma_len - 1)
198 goto out_unmap;
199 else
200 ++frmr->dma_npages;
201 }
202
203 len += dma_len;
204 }
205 frmr->dma_npages += len >> PAGE_SHIFT;
206
207 if (frmr->dma_npages > ibmr->pool->fmr_attr.max_pages) {
208 ret = -EMSGSIZE;
209 goto out_unmap;
210 }
211
212 ret = rds_ib_post_reg_frmr(ibmr);
213 if (ret)
214 goto out_unmap;
215
216 if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
217 rds_ib_stats_inc(s_ib_rdma_mr_8k_used);
218 else
219 rds_ib_stats_inc(s_ib_rdma_mr_1m_used);
220
221 return ret;
222
223out_unmap:
224 ib_dma_unmap_sg(rds_ibdev->dev, ibmr->sg, ibmr->sg_len,
225 DMA_BIDIRECTIONAL);
226 ibmr->sg_dma_len = 0;
227 return ret;
228}
229
230static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
231{
232 struct ib_send_wr *s_wr, *failed_wr;
233 struct rds_ib_frmr *frmr = &ibmr->u.frmr;
234 struct rdma_cm_id *i_cm_id = ibmr->ic->i_cm_id;
235 int ret = -EINVAL;
236
237 if (!i_cm_id || !i_cm_id->qp || !frmr->mr)
238 goto out;
239
240 if (frmr->fr_state != FRMR_IS_INUSE)
241 goto out;
242
243 while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) {
244 atomic_inc(&ibmr->ic->i_fastreg_wrs);
245 cpu_relax();
246 }
247
248 frmr->fr_inv = true;
249 s_wr = &frmr->fr_wr;
250
251 memset(s_wr, 0, sizeof(*s_wr));
252 s_wr->wr_id = (unsigned long)(void *)ibmr;
253 s_wr->opcode = IB_WR_LOCAL_INV;
254 s_wr->ex.invalidate_rkey = frmr->mr->rkey;
255 s_wr->send_flags = IB_SEND_SIGNALED;
256
257 failed_wr = s_wr;
258 ret = ib_post_send(i_cm_id->qp, s_wr, &failed_wr);
259 WARN_ON(failed_wr != s_wr);
260 if (unlikely(ret)) {
261 frmr->fr_state = FRMR_IS_STALE;
262 frmr->fr_inv = false;
263 atomic_inc(&ibmr->ic->i_fastreg_wrs);
264 pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret);
265 goto out;
266 }
267out:
268 return ret;
269}
270
271void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
272{
273 struct rds_ib_mr *ibmr = (void *)(unsigned long)wc->wr_id;
274 struct rds_ib_frmr *frmr = &ibmr->u.frmr;
275
276 if (wc->status != IB_WC_SUCCESS) {
277 frmr->fr_state = FRMR_IS_STALE;
278 if (rds_conn_up(ic->conn))
279 rds_ib_conn_error(ic->conn,
280 "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n",
281 &ic->conn->c_laddr,
282 &ic->conn->c_faddr,
283 wc->status,
284 ib_wc_status_msg(wc->status),
285 wc->vendor_err);
286 }
287
288 if (frmr->fr_inv) {
289 frmr->fr_state = FRMR_IS_FREE;
290 frmr->fr_inv = false;
291 }
292
293 atomic_inc(&ic->i_fastreg_wrs);
294}
295
296void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
297 unsigned long *unpinned, unsigned int goal)
298{
299 struct rds_ib_mr *ibmr, *next;
300 struct rds_ib_frmr *frmr;
301 int ret = 0;
302 unsigned int freed = *nfreed;
303
304 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
305 list_for_each_entry(ibmr, list, unmap_list) {
306 if (ibmr->sg_dma_len)
307 ret |= rds_ib_post_inv(ibmr);
308 }
309 if (ret)
310 pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret);
311
312 /* Now we can destroy the DMA mapping and unpin any pages */
313 list_for_each_entry_safe(ibmr, next, list, unmap_list) {
314 *unpinned += ibmr->sg_len;
315 frmr = &ibmr->u.frmr;
316 __rds_ib_teardown_mr(ibmr);
317 if (freed < goal || frmr->fr_state == FRMR_IS_STALE) {
318 /* Don't de-allocate if the MR is not free yet */
319 if (frmr->fr_state == FRMR_IS_INUSE)
320 continue;
321
322 if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
323 rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
324 else
325 rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
326 list_del(&ibmr->unmap_list);
327 if (frmr->mr)
328 ib_dereg_mr(frmr->mr);
329 kfree(ibmr);
330 freed++;
331 }
332 }
333 *nfreed = freed;
334}
335
336struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev,
337 struct rds_ib_connection *ic,
338 struct scatterlist *sg,
339 unsigned long nents, u32 *key)
340{
341 struct rds_ib_mr *ibmr = NULL;
342 struct rds_ib_frmr *frmr;
343 int ret;
344
345 do {
346 if (ibmr)
347 rds_ib_free_frmr(ibmr, true);
348 ibmr = rds_ib_alloc_frmr(rds_ibdev, nents);
349 if (IS_ERR(ibmr))
350 return ibmr;
351 frmr = &ibmr->u.frmr;
352 } while (frmr->fr_state != FRMR_IS_FREE);
353
354 ibmr->ic = ic;
355 ibmr->device = rds_ibdev;
356 ret = rds_ib_map_frmr(rds_ibdev, ibmr->pool, ibmr, sg, nents);
357 if (ret == 0) {
358 *key = frmr->mr->rkey;
359 } else {
360 rds_ib_free_frmr(ibmr, false);
361 ibmr = ERR_PTR(ret);
362 }
363
364 return ibmr;
365}
366
367void rds_ib_free_frmr_list(struct rds_ib_mr *ibmr)
368{
369 struct rds_ib_mr_pool *pool = ibmr->pool;
370 struct rds_ib_frmr *frmr = &ibmr->u.frmr;
371
372 if (frmr->fr_state == FRMR_IS_STALE)
373 llist_add(&ibmr->llnode, &pool->drop_list);
374 else
375 llist_add(&ibmr->llnode, &pool->free_list);
376}
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
new file mode 100644
index 000000000000..1c754f4acbe5
--- /dev/null
+++ b/net/rds/ib_mr.h
@@ -0,0 +1,148 @@
1/*
2 * Copyright (c) 2016 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#ifndef _RDS_IB_MR_H
33#define _RDS_IB_MR_H
34
35#include <linux/kernel.h>
36
37#include "rds.h"
38#include "ib.h"
39
40#define RDS_MR_1M_POOL_SIZE (8192 / 2)
41#define RDS_MR_1M_MSG_SIZE 256
42#define RDS_MR_8K_MSG_SIZE 2
43#define RDS_MR_8K_SCALE (256 / (RDS_MR_8K_MSG_SIZE + 1))
44#define RDS_MR_8K_POOL_SIZE (RDS_MR_8K_SCALE * (8192 / 2))
45
46struct rds_ib_fmr {
47 struct ib_fmr *fmr;
48 u64 *dma;
49};
50
51enum rds_ib_fr_state {
52 FRMR_IS_FREE, /* mr invalidated & ready for use */
53 FRMR_IS_INUSE, /* mr is in use or used & can be invalidated */
54 FRMR_IS_STALE, /* Stale MR and needs to be dropped */
55};
56
57struct rds_ib_frmr {
58 struct ib_mr *mr;
59 enum rds_ib_fr_state fr_state;
60 bool fr_inv;
61 struct ib_send_wr fr_wr;
62 unsigned int dma_npages;
63 unsigned int sg_byte_len;
64};
65
66/* This is stored as mr->r_trans_private. */
67struct rds_ib_mr {
68 struct rds_ib_device *device;
69 struct rds_ib_mr_pool *pool;
70 struct rds_ib_connection *ic;
71
72 struct llist_node llnode;
73
74 /* unmap_list is for freeing */
75 struct list_head unmap_list;
76 unsigned int remap_count;
77
78 struct scatterlist *sg;
79 unsigned int sg_len;
80 int sg_dma_len;
81
82 union {
83 struct rds_ib_fmr fmr;
84 struct rds_ib_frmr frmr;
85 } u;
86};
87
88/* Our own little MR pool */
89struct rds_ib_mr_pool {
90 unsigned int pool_type;
91 struct mutex flush_lock; /* serialize fmr invalidate */
92 struct delayed_work flush_worker; /* flush worker */
93
94 atomic_t item_count; /* total # of MRs */
95 atomic_t dirty_count; /* # dirty of MRs */
96
97 struct llist_head drop_list; /* MRs not reached max_maps */
98 struct llist_head free_list; /* unused MRs */
99 struct llist_head clean_list; /* unused & unmapped MRs */
100 wait_queue_head_t flush_wait;
101
102 atomic_t free_pinned; /* memory pinned by free MRs */
103 unsigned long max_items;
104 unsigned long max_items_soft;
105 unsigned long max_free_pinned;
106 struct ib_fmr_attr fmr_attr;
107 bool use_fastreg;
108};
109
110extern struct workqueue_struct *rds_ib_mr_wq;
111extern unsigned int rds_ib_mr_1m_pool_size;
112extern unsigned int rds_ib_mr_8k_pool_size;
113extern bool prefer_frmr;
114
115struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev,
116 int npages);
117void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev,
118 struct rds_info_rdma_connection *iinfo);
119void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
120void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
121 struct rds_sock *rs, u32 *key_ret);
122void rds_ib_sync_mr(void *trans_private, int dir);
123void rds_ib_free_mr(void *trans_private, int invalidate);
124void rds_ib_flush_mrs(void);
125int rds_ib_mr_init(void);
126void rds_ib_mr_exit(void);
127
128void __rds_ib_teardown_mr(struct rds_ib_mr *);
129void rds_ib_teardown_mr(struct rds_ib_mr *);
130struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *, int);
131int rds_ib_map_fmr(struct rds_ib_device *, struct rds_ib_mr *,
132 struct scatterlist *, unsigned int);
133struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *);
134int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *, int, struct rds_ib_mr **);
135struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *, struct scatterlist *,
136 unsigned long, u32 *);
137struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *);
138void rds_ib_unreg_fmr(struct list_head *, unsigned int *,
139 unsigned long *, unsigned int);
140void rds_ib_free_fmr_list(struct rds_ib_mr *);
141struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev,
142 struct rds_ib_connection *ic,
143 struct scatterlist *sg,
144 unsigned long nents, u32 *key);
145void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
146 unsigned long *unpinned, unsigned int goal);
147void rds_ib_free_frmr_list(struct rds_ib_mr *);
148#endif
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index a2340748ec86..f7164ac1ffc1 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -35,78 +35,13 @@
35#include <linux/rculist.h> 35#include <linux/rculist.h>
36#include <linux/llist.h> 36#include <linux/llist.h>
37 37
38#include "rds.h" 38#include "ib_mr.h"
39#include "ib.h" 39
40struct workqueue_struct *rds_ib_mr_wq;
40 41
41static DEFINE_PER_CPU(unsigned long, clean_list_grace); 42static DEFINE_PER_CPU(unsigned long, clean_list_grace);
42#define CLEAN_LIST_BUSY_BIT 0 43#define CLEAN_LIST_BUSY_BIT 0
43 44
44/*
45 * This is stored as mr->r_trans_private.
46 */
47struct rds_ib_mr {
48 struct rds_ib_device *device;
49 struct rds_ib_mr_pool *pool;
50 struct ib_fmr *fmr;
51
52 struct llist_node llnode;
53
54 /* unmap_list is for freeing */
55 struct list_head unmap_list;
56 unsigned int remap_count;
57
58 struct scatterlist *sg;
59 unsigned int sg_len;
60 u64 *dma;
61 int sg_dma_len;
62};
63
64/*
65 * Our own little FMR pool
66 */
67struct rds_ib_mr_pool {
68 unsigned int pool_type;
69 struct mutex flush_lock; /* serialize fmr invalidate */
70 struct delayed_work flush_worker; /* flush worker */
71
72 atomic_t item_count; /* total # of MRs */
73 atomic_t dirty_count; /* # dirty of MRs */
74
75 struct llist_head drop_list; /* MRs that have reached their max_maps limit */
76 struct llist_head free_list; /* unused MRs */
77 struct llist_head clean_list; /* global unused & unamapped MRs */
78 wait_queue_head_t flush_wait;
79
80 atomic_t free_pinned; /* memory pinned by free MRs */
81 unsigned long max_items;
82 unsigned long max_items_soft;
83 unsigned long max_free_pinned;
84 struct ib_fmr_attr fmr_attr;
85};
86
87static struct workqueue_struct *rds_ib_fmr_wq;
88
89int rds_ib_fmr_init(void)
90{
91 rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd");
92 if (!rds_ib_fmr_wq)
93 return -ENOMEM;
94 return 0;
95}
96
97/* By the time this is called all the IB devices should have been torn down and
98 * had their pools freed. As each pool is freed its work struct is waited on,
99 * so the pool flushing work queue should be idle by the time we get here.
100 */
101void rds_ib_fmr_exit(void)
102{
103 destroy_workqueue(rds_ib_fmr_wq);
104}
105
106static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all, struct rds_ib_mr **);
107static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr);
108static void rds_ib_mr_pool_flush_worker(struct work_struct *work);
109
110static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) 45static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
111{ 46{
112 struct rds_ib_device *rds_ibdev; 47 struct rds_ib_device *rds_ibdev;
@@ -235,41 +170,6 @@ void rds_ib_destroy_nodev_conns(void)
235 rds_conn_destroy(ic->conn); 170 rds_conn_destroy(ic->conn);
236} 171}
237 172
238struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
239 int pool_type)
240{
241 struct rds_ib_mr_pool *pool;
242
243 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
244 if (!pool)
245 return ERR_PTR(-ENOMEM);
246
247 pool->pool_type = pool_type;
248 init_llist_head(&pool->free_list);
249 init_llist_head(&pool->drop_list);
250 init_llist_head(&pool->clean_list);
251 mutex_init(&pool->flush_lock);
252 init_waitqueue_head(&pool->flush_wait);
253 INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
254
255 if (pool_type == RDS_IB_MR_1M_POOL) {
256 /* +1 allows for unaligned MRs */
257 pool->fmr_attr.max_pages = RDS_FMR_1M_MSG_SIZE + 1;
258 pool->max_items = RDS_FMR_1M_POOL_SIZE;
259 } else {
260 /* pool_type == RDS_IB_MR_8K_POOL */
261 pool->fmr_attr.max_pages = RDS_FMR_8K_MSG_SIZE + 1;
262 pool->max_items = RDS_FMR_8K_POOL_SIZE;
263 }
264
265 pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4;
266 pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps;
267 pool->fmr_attr.page_shift = PAGE_SHIFT;
268 pool->max_items_soft = rds_ibdev->max_fmrs * 3 / 4;
269
270 return pool;
271}
272
273void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo) 173void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo)
274{ 174{
275 struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool; 175 struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool;
@@ -278,16 +178,7 @@ void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_co
278 iinfo->rdma_mr_size = pool_1m->fmr_attr.max_pages; 178 iinfo->rdma_mr_size = pool_1m->fmr_attr.max_pages;
279} 179}
280 180
281void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) 181struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool)
282{
283 cancel_delayed_work_sync(&pool->flush_worker);
284 rds_ib_flush_mr_pool(pool, 1, NULL);
285 WARN_ON(atomic_read(&pool->item_count));
286 WARN_ON(atomic_read(&pool->free_pinned));
287 kfree(pool);
288}
289
290static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool)
291{ 182{
292 struct rds_ib_mr *ibmr = NULL; 183 struct rds_ib_mr *ibmr = NULL;
293 struct llist_node *ret; 184 struct llist_node *ret;
@@ -297,8 +188,13 @@ static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool)
297 flag = this_cpu_ptr(&clean_list_grace); 188 flag = this_cpu_ptr(&clean_list_grace);
298 set_bit(CLEAN_LIST_BUSY_BIT, flag); 189 set_bit(CLEAN_LIST_BUSY_BIT, flag);
299 ret = llist_del_first(&pool->clean_list); 190 ret = llist_del_first(&pool->clean_list);
300 if (ret) 191 if (ret) {
301 ibmr = llist_entry(ret, struct rds_ib_mr, llnode); 192 ibmr = llist_entry(ret, struct rds_ib_mr, llnode);
193 if (pool->pool_type == RDS_IB_MR_8K_POOL)
194 rds_ib_stats_inc(s_ib_rdma_mr_8k_reused);
195 else
196 rds_ib_stats_inc(s_ib_rdma_mr_1m_reused);
197 }
302 198
303 clear_bit(CLEAN_LIST_BUSY_BIT, flag); 199 clear_bit(CLEAN_LIST_BUSY_BIT, flag);
304 preempt_enable(); 200 preempt_enable();
@@ -317,190 +213,6 @@ static inline void wait_clean_list_grace(void)
317 } 213 }
318} 214}
319 215
320static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev,
321 int npages)
322{
323 struct rds_ib_mr_pool *pool;
324 struct rds_ib_mr *ibmr = NULL;
325 int err = 0, iter = 0;
326
327 if (npages <= RDS_FMR_8K_MSG_SIZE)
328 pool = rds_ibdev->mr_8k_pool;
329 else
330 pool = rds_ibdev->mr_1m_pool;
331
332 if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
333 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
334
335 /* Switch pools if one of the pool is reaching upper limit */
336 if (atomic_read(&pool->dirty_count) >= pool->max_items * 9 / 10) {
337 if (pool->pool_type == RDS_IB_MR_8K_POOL)
338 pool = rds_ibdev->mr_1m_pool;
339 else
340 pool = rds_ibdev->mr_8k_pool;
341 }
342
343 while (1) {
344 ibmr = rds_ib_reuse_fmr(pool);
345 if (ibmr)
346 return ibmr;
347
348 /* No clean MRs - now we have the choice of either
349 * allocating a fresh MR up to the limit imposed by the
350 * driver, or flush any dirty unused MRs.
351 * We try to avoid stalling in the send path if possible,
352 * so we allocate as long as we're allowed to.
353 *
354 * We're fussy with enforcing the FMR limit, though. If the driver
355 * tells us we can't use more than N fmrs, we shouldn't start
356 * arguing with it */
357 if (atomic_inc_return(&pool->item_count) <= pool->max_items)
358 break;
359
360 atomic_dec(&pool->item_count);
361
362 if (++iter > 2) {
363 if (pool->pool_type == RDS_IB_MR_8K_POOL)
364 rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted);
365 else
366 rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted);
367 return ERR_PTR(-EAGAIN);
368 }
369
370 /* We do have some empty MRs. Flush them out. */
371 if (pool->pool_type == RDS_IB_MR_8K_POOL)
372 rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_wait);
373 else
374 rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_wait);
375 rds_ib_flush_mr_pool(pool, 0, &ibmr);
376 if (ibmr)
377 return ibmr;
378 }
379
380 ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL, rdsibdev_to_node(rds_ibdev));
381 if (!ibmr) {
382 err = -ENOMEM;
383 goto out_no_cigar;
384 }
385
386 ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd,
387 (IB_ACCESS_LOCAL_WRITE |
388 IB_ACCESS_REMOTE_READ |
389 IB_ACCESS_REMOTE_WRITE|
390 IB_ACCESS_REMOTE_ATOMIC),
391 &pool->fmr_attr);
392 if (IS_ERR(ibmr->fmr)) {
393 err = PTR_ERR(ibmr->fmr);
394 ibmr->fmr = NULL;
395 printk(KERN_WARNING "RDS/IB: ib_alloc_fmr failed (err=%d)\n", err);
396 goto out_no_cigar;
397 }
398
399 ibmr->pool = pool;
400 if (pool->pool_type == RDS_IB_MR_8K_POOL)
401 rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc);
402 else
403 rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc);
404
405 return ibmr;
406
407out_no_cigar:
408 if (ibmr) {
409 if (ibmr->fmr)
410 ib_dealloc_fmr(ibmr->fmr);
411 kfree(ibmr);
412 }
413 atomic_dec(&pool->item_count);
414 return ERR_PTR(err);
415}
416
417static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
418 struct scatterlist *sg, unsigned int nents)
419{
420 struct ib_device *dev = rds_ibdev->dev;
421 struct scatterlist *scat = sg;
422 u64 io_addr = 0;
423 u64 *dma_pages;
424 u32 len;
425 int page_cnt, sg_dma_len;
426 int i, j;
427 int ret;
428
429 sg_dma_len = ib_dma_map_sg(dev, sg, nents,
430 DMA_BIDIRECTIONAL);
431 if (unlikely(!sg_dma_len)) {
432 printk(KERN_WARNING "RDS/IB: dma_map_sg failed!\n");
433 return -EBUSY;
434 }
435
436 len = 0;
437 page_cnt = 0;
438
439 for (i = 0; i < sg_dma_len; ++i) {
440 unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
441 u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
442
443 if (dma_addr & ~PAGE_MASK) {
444 if (i > 0)
445 return -EINVAL;
446 else
447 ++page_cnt;
448 }
449 if ((dma_addr + dma_len) & ~PAGE_MASK) {
450 if (i < sg_dma_len - 1)
451 return -EINVAL;
452 else
453 ++page_cnt;
454 }
455
456 len += dma_len;
457 }
458
459 page_cnt += len >> PAGE_SHIFT;
460 if (page_cnt > ibmr->pool->fmr_attr.max_pages)
461 return -EINVAL;
462
463 dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
464 rdsibdev_to_node(rds_ibdev));
465 if (!dma_pages)
466 return -ENOMEM;
467
468 page_cnt = 0;
469 for (i = 0; i < sg_dma_len; ++i) {
470 unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
471 u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
472
473 for (j = 0; j < dma_len; j += PAGE_SIZE)
474 dma_pages[page_cnt++] =
475 (dma_addr & PAGE_MASK) + j;
476 }
477
478 ret = ib_map_phys_fmr(ibmr->fmr,
479 dma_pages, page_cnt, io_addr);
480 if (ret)
481 goto out;
482
483 /* Success - we successfully remapped the MR, so we can
484 * safely tear down the old mapping. */
485 rds_ib_teardown_mr(ibmr);
486
487 ibmr->sg = scat;
488 ibmr->sg_len = nents;
489 ibmr->sg_dma_len = sg_dma_len;
490 ibmr->remap_count++;
491
492 if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
493 rds_ib_stats_inc(s_ib_rdma_mr_8k_used);
494 else
495 rds_ib_stats_inc(s_ib_rdma_mr_1m_used);
496 ret = 0;
497
498out:
499 kfree(dma_pages);
500
501 return ret;
502}
503
504void rds_ib_sync_mr(void *trans_private, int direction) 216void rds_ib_sync_mr(void *trans_private, int direction)
505{ 217{
506 struct rds_ib_mr *ibmr = trans_private; 218 struct rds_ib_mr *ibmr = trans_private;
@@ -518,7 +230,7 @@ void rds_ib_sync_mr(void *trans_private, int direction)
518 } 230 }
519} 231}
520 232
521static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr) 233void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
522{ 234{
523 struct rds_ib_device *rds_ibdev = ibmr->device; 235 struct rds_ib_device *rds_ibdev = ibmr->device;
524 236
@@ -549,7 +261,7 @@ static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
549 } 261 }
550} 262}
551 263
552static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr) 264void rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
553{ 265{
554 unsigned int pinned = ibmr->sg_len; 266 unsigned int pinned = ibmr->sg_len;
555 267
@@ -623,17 +335,15 @@ static void list_to_llist_nodes(struct rds_ib_mr_pool *pool,
623 * If the number of MRs allocated exceeds the limit, we also try 335 * If the number of MRs allocated exceeds the limit, we also try
624 * to free as many MRs as needed to get back to this limit. 336 * to free as many MRs as needed to get back to this limit.
625 */ 337 */
626static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, 338int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
627 int free_all, struct rds_ib_mr **ibmr_ret) 339 int free_all, struct rds_ib_mr **ibmr_ret)
628{ 340{
629 struct rds_ib_mr *ibmr, *next; 341 struct rds_ib_mr *ibmr;
630 struct llist_node *clean_nodes; 342 struct llist_node *clean_nodes;
631 struct llist_node *clean_tail; 343 struct llist_node *clean_tail;
632 LIST_HEAD(unmap_list); 344 LIST_HEAD(unmap_list);
633 LIST_HEAD(fmr_list);
634 unsigned long unpinned = 0; 345 unsigned long unpinned = 0;
635 unsigned int nfreed = 0, dirty_to_clean = 0, free_goal; 346 unsigned int nfreed = 0, dirty_to_clean = 0, free_goal;
636 int ret = 0;
637 347
638 if (pool->pool_type == RDS_IB_MR_8K_POOL) 348 if (pool->pool_type == RDS_IB_MR_8K_POOL)
639 rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_flush); 349 rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_flush);
@@ -643,7 +353,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
643 if (ibmr_ret) { 353 if (ibmr_ret) {
644 DEFINE_WAIT(wait); 354 DEFINE_WAIT(wait);
645 while (!mutex_trylock(&pool->flush_lock)) { 355 while (!mutex_trylock(&pool->flush_lock)) {
646 ibmr = rds_ib_reuse_fmr(pool); 356 ibmr = rds_ib_reuse_mr(pool);
647 if (ibmr) { 357 if (ibmr) {
648 *ibmr_ret = ibmr; 358 *ibmr_ret = ibmr;
649 finish_wait(&pool->flush_wait, &wait); 359 finish_wait(&pool->flush_wait, &wait);
@@ -655,7 +365,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
655 if (llist_empty(&pool->clean_list)) 365 if (llist_empty(&pool->clean_list))
656 schedule(); 366 schedule();
657 367
658 ibmr = rds_ib_reuse_fmr(pool); 368 ibmr = rds_ib_reuse_mr(pool);
659 if (ibmr) { 369 if (ibmr) {
660 *ibmr_ret = ibmr; 370 *ibmr_ret = ibmr;
661 finish_wait(&pool->flush_wait, &wait); 371 finish_wait(&pool->flush_wait, &wait);
@@ -667,7 +377,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
667 mutex_lock(&pool->flush_lock); 377 mutex_lock(&pool->flush_lock);
668 378
669 if (ibmr_ret) { 379 if (ibmr_ret) {
670 ibmr = rds_ib_reuse_fmr(pool); 380 ibmr = rds_ib_reuse_mr(pool);
671 if (ibmr) { 381 if (ibmr) {
672 *ibmr_ret = ibmr; 382 *ibmr_ret = ibmr;
673 goto out; 383 goto out;
@@ -687,30 +397,10 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
687 if (list_empty(&unmap_list)) 397 if (list_empty(&unmap_list))
688 goto out; 398 goto out;
689 399
690 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ 400 if (pool->use_fastreg)
691 list_for_each_entry(ibmr, &unmap_list, unmap_list) 401 rds_ib_unreg_frmr(&unmap_list, &nfreed, &unpinned, free_goal);
692 list_add(&ibmr->fmr->list, &fmr_list); 402 else
693 403 rds_ib_unreg_fmr(&unmap_list, &nfreed, &unpinned, free_goal);
694 ret = ib_unmap_fmr(&fmr_list);
695 if (ret)
696 printk(KERN_WARNING "RDS/IB: ib_unmap_fmr failed (err=%d)\n", ret);
697
698 /* Now we can destroy the DMA mapping and unpin any pages */
699 list_for_each_entry_safe(ibmr, next, &unmap_list, unmap_list) {
700 unpinned += ibmr->sg_len;
701 __rds_ib_teardown_mr(ibmr);
702 if (nfreed < free_goal ||
703 ibmr->remap_count >= pool->fmr_attr.max_maps) {
704 if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
705 rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
706 else
707 rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
708 list_del(&ibmr->unmap_list);
709 ib_dealloc_fmr(ibmr->fmr);
710 kfree(ibmr);
711 nfreed++;
712 }
713 }
714 404
715 if (!list_empty(&unmap_list)) { 405 if (!list_empty(&unmap_list)) {
716 /* we have to make sure that none of the things we're about 406 /* we have to make sure that none of the things we're about
@@ -743,7 +433,47 @@ out:
743 if (waitqueue_active(&pool->flush_wait)) 433 if (waitqueue_active(&pool->flush_wait))
744 wake_up(&pool->flush_wait); 434 wake_up(&pool->flush_wait);
745out_nolock: 435out_nolock:
746 return ret; 436 return 0;
437}
438
439struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool)
440{
441 struct rds_ib_mr *ibmr = NULL;
442 int iter = 0;
443
444 if (atomic_read(&pool->dirty_count) >= pool->max_items_soft / 10)
445 queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
446
447 while (1) {
448 ibmr = rds_ib_reuse_mr(pool);
449 if (ibmr)
450 return ibmr;
451
452 if (atomic_inc_return(&pool->item_count) <= pool->max_items)
453 break;
454
455 atomic_dec(&pool->item_count);
456
457 if (++iter > 2) {
458 if (pool->pool_type == RDS_IB_MR_8K_POOL)
459 rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted);
460 else
461 rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted);
462 return ERR_PTR(-EAGAIN);
463 }
464
465 /* We do have some empty MRs. Flush them out. */
466 if (pool->pool_type == RDS_IB_MR_8K_POOL)
467 rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_wait);
468 else
469 rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_wait);
470
471 rds_ib_flush_mr_pool(pool, 0, &ibmr);
472 if (ibmr)
473 return ibmr;
474 }
475
476 return ibmr;
747} 477}
748 478
749static void rds_ib_mr_pool_flush_worker(struct work_struct *work) 479static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
@@ -762,10 +492,10 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
762 rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len); 492 rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len);
763 493
764 /* Return it to the pool's free list */ 494 /* Return it to the pool's free list */
765 if (ibmr->remap_count >= pool->fmr_attr.max_maps) 495 if (rds_ibdev->use_fastreg)
766 llist_add(&ibmr->llnode, &pool->drop_list); 496 rds_ib_free_frmr_list(ibmr);
767 else 497 else
768 llist_add(&ibmr->llnode, &pool->free_list); 498 rds_ib_free_fmr_list(ibmr);
769 499
770 atomic_add(ibmr->sg_len, &pool->free_pinned); 500 atomic_add(ibmr->sg_len, &pool->free_pinned);
771 atomic_inc(&pool->dirty_count); 501 atomic_inc(&pool->dirty_count);
@@ -773,7 +503,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
773 /* If we've pinned too many pages, request a flush */ 503 /* If we've pinned too many pages, request a flush */
774 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || 504 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
775 atomic_read(&pool->dirty_count) >= pool->max_items / 5) 505 atomic_read(&pool->dirty_count) >= pool->max_items / 5)
776 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10); 506 queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
777 507
778 if (invalidate) { 508 if (invalidate) {
779 if (likely(!in_interrupt())) { 509 if (likely(!in_interrupt())) {
@@ -782,7 +512,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
782 /* We get here if the user created a MR marked 512 /* We get here if the user created a MR marked
783 * as use_once and invalidate at the same time. 513 * as use_once and invalidate at the same time.
784 */ 514 */
785 queue_delayed_work(rds_ib_fmr_wq, 515 queue_delayed_work(rds_ib_mr_wq,
786 &pool->flush_worker, 10); 516 &pool->flush_worker, 10);
787 } 517 }
788 } 518 }
@@ -810,6 +540,7 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
810{ 540{
811 struct rds_ib_device *rds_ibdev; 541 struct rds_ib_device *rds_ibdev;
812 struct rds_ib_mr *ibmr = NULL; 542 struct rds_ib_mr *ibmr = NULL;
543 struct rds_ib_connection *ic = rs->rs_conn->c_transport_data;
813 int ret; 544 int ret;
814 545
815 rds_ibdev = rds_ib_get_device(rs->rs_bound_addr); 546 rds_ibdev = rds_ib_get_device(rs->rs_bound_addr);
@@ -823,29 +554,81 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
823 goto out; 554 goto out;
824 } 555 }
825 556
826 ibmr = rds_ib_alloc_fmr(rds_ibdev, nents); 557 if (rds_ibdev->use_fastreg)
827 if (IS_ERR(ibmr)) { 558 ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret);
828 rds_ib_dev_put(rds_ibdev);
829 return ibmr;
830 }
831
832 ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents);
833 if (ret == 0)
834 *key_ret = ibmr->fmr->rkey;
835 else 559 else
836 printk(KERN_WARNING "RDS/IB: map_fmr failed (errno=%d)\n", ret); 560 ibmr = rds_ib_reg_fmr(rds_ibdev, sg, nents, key_ret);
837 561 if (ibmr)
838 ibmr->device = rds_ibdev; 562 rds_ibdev = NULL;
839 rds_ibdev = NULL;
840 563
841 out: 564 out:
842 if (ret) { 565 if (!ibmr)
843 if (ibmr) 566 pr_warn("RDS/IB: rds_ib_get_mr failed (errno=%d)\n", ret);
844 rds_ib_free_mr(ibmr, 0); 567
845 ibmr = ERR_PTR(ret);
846 }
847 if (rds_ibdev) 568 if (rds_ibdev)
848 rds_ib_dev_put(rds_ibdev); 569 rds_ib_dev_put(rds_ibdev);
570
849 return ibmr; 571 return ibmr;
850} 572}
851 573
574void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
575{
576 cancel_delayed_work_sync(&pool->flush_worker);
577 rds_ib_flush_mr_pool(pool, 1, NULL);
578 WARN_ON(atomic_read(&pool->item_count));
579 WARN_ON(atomic_read(&pool->free_pinned));
580 kfree(pool);
581}
582
583struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
584 int pool_type)
585{
586 struct rds_ib_mr_pool *pool;
587
588 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
589 if (!pool)
590 return ERR_PTR(-ENOMEM);
591
592 pool->pool_type = pool_type;
593 init_llist_head(&pool->free_list);
594 init_llist_head(&pool->drop_list);
595 init_llist_head(&pool->clean_list);
596 mutex_init(&pool->flush_lock);
597 init_waitqueue_head(&pool->flush_wait);
598 INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
599
600 if (pool_type == RDS_IB_MR_1M_POOL) {
601 /* +1 allows for unaligned MRs */
602 pool->fmr_attr.max_pages = RDS_MR_1M_MSG_SIZE + 1;
603 pool->max_items = RDS_MR_1M_POOL_SIZE;
604 } else {
605 /* pool_type == RDS_IB_MR_8K_POOL */
606 pool->fmr_attr.max_pages = RDS_MR_8K_MSG_SIZE + 1;
607 pool->max_items = RDS_MR_8K_POOL_SIZE;
608 }
609
610 pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4;
611 pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps;
612 pool->fmr_attr.page_shift = PAGE_SHIFT;
613 pool->max_items_soft = rds_ibdev->max_mrs * 3 / 4;
614 pool->use_fastreg = rds_ibdev->use_fastreg;
615
616 return pool;
617}
618
619int rds_ib_mr_init(void)
620{
621 rds_ib_mr_wq = create_workqueue("rds_mr_flushd");
622 if (!rds_ib_mr_wq)
623 return -ENOMEM;
624 return 0;
625}
626
627/* By the time this is called all the IB devices should have been torn down and
628 * had their pools freed. As each pool is freed its work struct is waited on,
629 * so the pool flushing work queue should be idle by the time we get here.
630 */
631void rds_ib_mr_exit(void)
632{
633 destroy_workqueue(rds_ib_mr_wq);
634}
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 977fb86065b7..abc8cc805e8d 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -796,7 +796,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
796 796
797 addr = kmap_atomic(sg_page(&frag->f_sg)); 797 addr = kmap_atomic(sg_page(&frag->f_sg));
798 798
799 src = addr + frag_off; 799 src = addr + frag->f_sg.offset + frag_off;
800 dst = (void *)map->m_page_addrs[map_page] + map_off; 800 dst = (void *)map->m_page_addrs[map_page] + map_off;
801 for (k = 0; k < to_copy; k += 8) { 801 for (k = 0; k < to_copy; k += 8) {
802 /* Record ports that became uncongested, ie 802 /* Record ports that became uncongested, ie
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index eac30bf486d7..f27d2c82b036 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -195,7 +195,7 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic)
195 195
196 send->s_op = NULL; 196 send->s_op = NULL;
197 197
198 send->s_wr.wr_id = i | RDS_IB_SEND_OP; 198 send->s_wr.wr_id = i;
199 send->s_wr.sg_list = send->s_sge; 199 send->s_wr.sg_list = send->s_sge;
200 send->s_wr.ex.imm_data = 0; 200 send->s_wr.ex.imm_data = 0;
201 201
@@ -263,9 +263,7 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
263 263
264 oldest = rds_ib_ring_oldest(&ic->i_send_ring); 264 oldest = rds_ib_ring_oldest(&ic->i_send_ring);
265 265
266 completed = rds_ib_ring_completed(&ic->i_send_ring, 266 completed = rds_ib_ring_completed(&ic->i_send_ring, wc->wr_id, oldest);
267 (wc->wr_id & ~RDS_IB_SEND_OP),
268 oldest);
269 267
270 for (i = 0; i < completed; i++) { 268 for (i = 0; i < completed; i++) {
271 send = &ic->i_sends[oldest]; 269 send = &ic->i_sends[oldest];
diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c
index d77e04473056..7e78dca1f252 100644
--- a/net/rds/ib_stats.c
+++ b/net/rds/ib_stats.c
@@ -73,6 +73,8 @@ static const char *const rds_ib_stat_names[] = {
73 "ib_rdma_mr_1m_pool_flush", 73 "ib_rdma_mr_1m_pool_flush",
74 "ib_rdma_mr_1m_pool_wait", 74 "ib_rdma_mr_1m_pool_wait",
75 "ib_rdma_mr_1m_pool_depleted", 75 "ib_rdma_mr_1m_pool_depleted",
76 "ib_rdma_mr_8k_reused",
77 "ib_rdma_mr_1m_reused",
76 "ib_atomic_cswp", 78 "ib_atomic_cswp",
77 "ib_atomic_fadd", 79 "ib_atomic_fadd",
78}; 80};
diff --git a/net/rds/iw.c b/net/rds/iw.c
deleted file mode 100644
index f4a9fff829e0..000000000000
--- a/net/rds/iw.c
+++ /dev/null
@@ -1,312 +0,0 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/if.h>
36#include <linux/netdevice.h>
37#include <linux/inetdevice.h>
38#include <linux/if_arp.h>
39#include <linux/delay.h>
40#include <linux/slab.h>
41#include <linux/module.h>
42
43#include "rds.h"
44#include "iw.h"
45
46unsigned int fastreg_pool_size = RDS_FASTREG_POOL_SIZE;
47unsigned int fastreg_message_size = RDS_FASTREG_SIZE + 1; /* +1 allows for unaligned MRs */
48
49module_param(fastreg_pool_size, int, 0444);
50MODULE_PARM_DESC(fastreg_pool_size, " Max number of fastreg MRs per device");
51module_param(fastreg_message_size, int, 0444);
52MODULE_PARM_DESC(fastreg_message_size, " Max size of a RDMA transfer (fastreg MRs)");
53
54struct list_head rds_iw_devices;
55
56/* NOTE: if also grabbing iwdev lock, grab this first */
57DEFINE_SPINLOCK(iw_nodev_conns_lock);
58LIST_HEAD(iw_nodev_conns);
59
60static void rds_iw_add_one(struct ib_device *device)
61{
62 struct rds_iw_device *rds_iwdev;
63
64 /* Only handle iwarp devices */
65 if (device->node_type != RDMA_NODE_RNIC)
66 return;
67
68 rds_iwdev = kmalloc(sizeof *rds_iwdev, GFP_KERNEL);
69 if (!rds_iwdev)
70 return;
71
72 spin_lock_init(&rds_iwdev->spinlock);
73
74 rds_iwdev->dma_local_lkey = !!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY);
75 rds_iwdev->max_wrs = device->attrs.max_qp_wr;
76 rds_iwdev->max_sge = min(device->attrs.max_sge, RDS_IW_MAX_SGE);
77
78 rds_iwdev->dev = device;
79 rds_iwdev->pd = ib_alloc_pd(device);
80 if (IS_ERR(rds_iwdev->pd))
81 goto free_dev;
82
83 if (!rds_iwdev->dma_local_lkey) {
84 rds_iwdev->mr = ib_get_dma_mr(rds_iwdev->pd,
85 IB_ACCESS_REMOTE_READ |
86 IB_ACCESS_REMOTE_WRITE |
87 IB_ACCESS_LOCAL_WRITE);
88 if (IS_ERR(rds_iwdev->mr))
89 goto err_pd;
90 } else
91 rds_iwdev->mr = NULL;
92
93 rds_iwdev->mr_pool = rds_iw_create_mr_pool(rds_iwdev);
94 if (IS_ERR(rds_iwdev->mr_pool)) {
95 rds_iwdev->mr_pool = NULL;
96 goto err_mr;
97 }
98
99 INIT_LIST_HEAD(&rds_iwdev->cm_id_list);
100 INIT_LIST_HEAD(&rds_iwdev->conn_list);
101 list_add_tail(&rds_iwdev->list, &rds_iw_devices);
102
103 ib_set_client_data(device, &rds_iw_client, rds_iwdev);
104 return;
105
106err_mr:
107 if (rds_iwdev->mr)
108 ib_dereg_mr(rds_iwdev->mr);
109err_pd:
110 ib_dealloc_pd(rds_iwdev->pd);
111free_dev:
112 kfree(rds_iwdev);
113}
114
115static void rds_iw_remove_one(struct ib_device *device, void *client_data)
116{
117 struct rds_iw_device *rds_iwdev = client_data;
118 struct rds_iw_cm_id *i_cm_id, *next;
119
120 if (!rds_iwdev)
121 return;
122
123 spin_lock_irq(&rds_iwdev->spinlock);
124 list_for_each_entry_safe(i_cm_id, next, &rds_iwdev->cm_id_list, list) {
125 list_del(&i_cm_id->list);
126 kfree(i_cm_id);
127 }
128 spin_unlock_irq(&rds_iwdev->spinlock);
129
130 rds_iw_destroy_conns(rds_iwdev);
131
132 if (rds_iwdev->mr_pool)
133 rds_iw_destroy_mr_pool(rds_iwdev->mr_pool);
134
135 if (rds_iwdev->mr)
136 ib_dereg_mr(rds_iwdev->mr);
137
138 ib_dealloc_pd(rds_iwdev->pd);
139
140 list_del(&rds_iwdev->list);
141 kfree(rds_iwdev);
142}
143
144struct ib_client rds_iw_client = {
145 .name = "rds_iw",
146 .add = rds_iw_add_one,
147 .remove = rds_iw_remove_one
148};
149
150static int rds_iw_conn_info_visitor(struct rds_connection *conn,
151 void *buffer)
152{
153 struct rds_info_rdma_connection *iinfo = buffer;
154 struct rds_iw_connection *ic;
155
156 /* We will only ever look at IB transports */
157 if (conn->c_trans != &rds_iw_transport)
158 return 0;
159
160 iinfo->src_addr = conn->c_laddr;
161 iinfo->dst_addr = conn->c_faddr;
162
163 memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
164 memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
165 if (rds_conn_state(conn) == RDS_CONN_UP) {
166 struct rds_iw_device *rds_iwdev;
167 struct rdma_dev_addr *dev_addr;
168
169 ic = conn->c_transport_data;
170 dev_addr = &ic->i_cm_id->route.addr.dev_addr;
171
172 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
173 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
174
175 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
176 iinfo->max_send_wr = ic->i_send_ring.w_nr;
177 iinfo->max_recv_wr = ic->i_recv_ring.w_nr;
178 iinfo->max_send_sge = rds_iwdev->max_sge;
179 rds_iw_get_mr_info(rds_iwdev, iinfo);
180 }
181 return 1;
182}
183
184static void rds_iw_ic_info(struct socket *sock, unsigned int len,
185 struct rds_info_iterator *iter,
186 struct rds_info_lengths *lens)
187{
188 rds_for_each_conn_info(sock, len, iter, lens,
189 rds_iw_conn_info_visitor,
190 sizeof(struct rds_info_rdma_connection));
191}
192
193
194/*
195 * Early RDS/IB was built to only bind to an address if there is an IPoIB
196 * device with that address set.
197 *
198 * If it were me, I'd advocate for something more flexible. Sending and
199 * receiving should be device-agnostic. Transports would try and maintain
200 * connections between peers who have messages queued. Userspace would be
201 * allowed to influence which paths have priority. We could call userspace
202 * asserting this policy "routing".
203 */
204static int rds_iw_laddr_check(struct net *net, __be32 addr)
205{
206 int ret;
207 struct rdma_cm_id *cm_id;
208 struct sockaddr_in sin;
209
210 /* Create a CMA ID and try to bind it. This catches both
211 * IB and iWARP capable NICs.
212 */
213 cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
214 if (IS_ERR(cm_id))
215 return PTR_ERR(cm_id);
216
217 memset(&sin, 0, sizeof(sin));
218 sin.sin_family = AF_INET;
219 sin.sin_addr.s_addr = addr;
220
221 /* rdma_bind_addr will only succeed for IB & iWARP devices */
222 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
223 /* due to this, we will claim to support IB devices unless we
224 check node_type. */
225 if (ret || !cm_id->device ||
226 cm_id->device->node_type != RDMA_NODE_RNIC)
227 ret = -EADDRNOTAVAIL;
228
229 rdsdebug("addr %pI4 ret %d node type %d\n",
230 &addr, ret,
231 cm_id->device ? cm_id->device->node_type : -1);
232
233 rdma_destroy_id(cm_id);
234
235 return ret;
236}
237
238void rds_iw_exit(void)
239{
240 rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info);
241 rds_iw_destroy_nodev_conns();
242 ib_unregister_client(&rds_iw_client);
243 rds_iw_sysctl_exit();
244 rds_iw_recv_exit();
245 rds_trans_unregister(&rds_iw_transport);
246}
247
248struct rds_transport rds_iw_transport = {
249 .laddr_check = rds_iw_laddr_check,
250 .xmit_complete = rds_iw_xmit_complete,
251 .xmit = rds_iw_xmit,
252 .xmit_rdma = rds_iw_xmit_rdma,
253 .recv = rds_iw_recv,
254 .conn_alloc = rds_iw_conn_alloc,
255 .conn_free = rds_iw_conn_free,
256 .conn_connect = rds_iw_conn_connect,
257 .conn_shutdown = rds_iw_conn_shutdown,
258 .inc_copy_to_user = rds_iw_inc_copy_to_user,
259 .inc_free = rds_iw_inc_free,
260 .cm_initiate_connect = rds_iw_cm_initiate_connect,
261 .cm_handle_connect = rds_iw_cm_handle_connect,
262 .cm_connect_complete = rds_iw_cm_connect_complete,
263 .stats_info_copy = rds_iw_stats_info_copy,
264 .exit = rds_iw_exit,
265 .get_mr = rds_iw_get_mr,
266 .sync_mr = rds_iw_sync_mr,
267 .free_mr = rds_iw_free_mr,
268 .flush_mrs = rds_iw_flush_mrs,
269 .t_owner = THIS_MODULE,
270 .t_name = "iwarp",
271 .t_type = RDS_TRANS_IWARP,
272 .t_prefer_loopback = 1,
273};
274
275int rds_iw_init(void)
276{
277 int ret;
278
279 INIT_LIST_HEAD(&rds_iw_devices);
280
281 ret = ib_register_client(&rds_iw_client);
282 if (ret)
283 goto out;
284
285 ret = rds_iw_sysctl_init();
286 if (ret)
287 goto out_ibreg;
288
289 ret = rds_iw_recv_init();
290 if (ret)
291 goto out_sysctl;
292
293 ret = rds_trans_register(&rds_iw_transport);
294 if (ret)
295 goto out_recv;
296
297 rds_info_register_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info);
298
299 goto out;
300
301out_recv:
302 rds_iw_recv_exit();
303out_sysctl:
304 rds_iw_sysctl_exit();
305out_ibreg:
306 ib_unregister_client(&rds_iw_client);
307out:
308 return ret;
309}
310
311MODULE_LICENSE("GPL");
312
diff --git a/net/rds/iw.h b/net/rds/iw.h
deleted file mode 100644
index 5af01d1758b3..000000000000
--- a/net/rds/iw.h
+++ /dev/null
@@ -1,398 +0,0 @@
1#ifndef _RDS_IW_H
2#define _RDS_IW_H
3
4#include <linux/interrupt.h>
5#include <rdma/ib_verbs.h>
6#include <rdma/rdma_cm.h>
7#include "rds.h"
8#include "rdma_transport.h"
9
10#define RDS_FASTREG_SIZE 20
11#define RDS_FASTREG_POOL_SIZE 2048
12
13#define RDS_IW_MAX_SGE 8
14#define RDS_IW_RECV_SGE 2
15
16#define RDS_IW_DEFAULT_RECV_WR 1024
17#define RDS_IW_DEFAULT_SEND_WR 256
18
19#define RDS_IW_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */
20
21extern struct list_head rds_iw_devices;
22
23/*
24 * IB posts RDS_FRAG_SIZE fragments of pages to the receive queues to
25 * try and minimize the amount of memory tied up both the device and
26 * socket receive queues.
27 */
28/* page offset of the final full frag that fits in the page */
29#define RDS_PAGE_LAST_OFF (((PAGE_SIZE / RDS_FRAG_SIZE) - 1) * RDS_FRAG_SIZE)
30struct rds_page_frag {
31 struct list_head f_item;
32 struct page *f_page;
33 unsigned long f_offset;
34 dma_addr_t f_mapped;
35};
36
37struct rds_iw_incoming {
38 struct list_head ii_frags;
39 struct rds_incoming ii_inc;
40};
41
42struct rds_iw_connect_private {
43 /* Add new fields at the end, and don't permute existing fields. */
44 __be32 dp_saddr;
45 __be32 dp_daddr;
46 u8 dp_protocol_major;
47 u8 dp_protocol_minor;
48 __be16 dp_protocol_minor_mask; /* bitmask */
49 __be32 dp_reserved1;
50 __be64 dp_ack_seq;
51 __be32 dp_credit; /* non-zero enables flow ctl */
52};
53
54struct rds_iw_scatterlist {
55 struct scatterlist *list;
56 unsigned int len;
57 int dma_len;
58 unsigned int dma_npages;
59 unsigned int bytes;
60};
61
62struct rds_iw_mapping {
63 spinlock_t m_lock; /* protect the mapping struct */
64 struct list_head m_list;
65 struct rds_iw_mr *m_mr;
66 uint32_t m_rkey;
67 struct rds_iw_scatterlist m_sg;
68};
69
70struct rds_iw_send_work {
71 struct rds_message *s_rm;
72
73 /* We should really put these into a union: */
74 struct rm_rdma_op *s_op;
75 struct rds_iw_mapping *s_mapping;
76 struct ib_mr *s_mr;
77 unsigned char s_remap_count;
78
79 union {
80 struct ib_send_wr s_send_wr;
81 struct ib_rdma_wr s_rdma_wr;
82 struct ib_reg_wr s_reg_wr;
83 };
84 struct ib_sge s_sge[RDS_IW_MAX_SGE];
85 unsigned long s_queued;
86};
87
88struct rds_iw_recv_work {
89 struct rds_iw_incoming *r_iwinc;
90 struct rds_page_frag *r_frag;
91 struct ib_recv_wr r_wr;
92 struct ib_sge r_sge[2];
93};
94
95struct rds_iw_work_ring {
96 u32 w_nr;
97 u32 w_alloc_ptr;
98 u32 w_alloc_ctr;
99 u32 w_free_ptr;
100 atomic_t w_free_ctr;
101};
102
103struct rds_iw_device;
104
105struct rds_iw_connection {
106
107 struct list_head iw_node;
108 struct rds_iw_device *rds_iwdev;
109 struct rds_connection *conn;
110
111 /* alphabet soup, IBTA style */
112 struct rdma_cm_id *i_cm_id;
113 struct ib_pd *i_pd;
114 struct ib_mr *i_mr;
115 struct ib_cq *i_send_cq;
116 struct ib_cq *i_recv_cq;
117
118 /* tx */
119 struct rds_iw_work_ring i_send_ring;
120 struct rds_message *i_rm;
121 struct rds_header *i_send_hdrs;
122 u64 i_send_hdrs_dma;
123 struct rds_iw_send_work *i_sends;
124
125 /* rx */
126 struct tasklet_struct i_recv_tasklet;
127 struct mutex i_recv_mutex;
128 struct rds_iw_work_ring i_recv_ring;
129 struct rds_iw_incoming *i_iwinc;
130 u32 i_recv_data_rem;
131 struct rds_header *i_recv_hdrs;
132 u64 i_recv_hdrs_dma;
133 struct rds_iw_recv_work *i_recvs;
134 struct rds_page_frag i_frag;
135 u64 i_ack_recv; /* last ACK received */
136
137 /* sending acks */
138 unsigned long i_ack_flags;
139#ifdef KERNEL_HAS_ATOMIC64
140 atomic64_t i_ack_next; /* next ACK to send */
141#else
142 spinlock_t i_ack_lock; /* protect i_ack_next */
143 u64 i_ack_next; /* next ACK to send */
144#endif
145 struct rds_header *i_ack;
146 struct ib_send_wr i_ack_wr;
147 struct ib_sge i_ack_sge;
148 u64 i_ack_dma;
149 unsigned long i_ack_queued;
150
151 /* Flow control related information
152 *
153 * Our algorithm uses a pair variables that we need to access
154 * atomically - one for the send credits, and one posted
155 * recv credits we need to transfer to remote.
156 * Rather than protect them using a slow spinlock, we put both into
157 * a single atomic_t and update it using cmpxchg
158 */
159 atomic_t i_credits;
160
161 /* Protocol version specific information */
162 unsigned int i_flowctl:1; /* enable/disable flow ctl */
163 unsigned int i_dma_local_lkey:1;
164 unsigned int i_fastreg_posted:1; /* fastreg posted on this connection */
165 /* Batched completions */
166 unsigned int i_unsignaled_wrs;
167 long i_unsignaled_bytes;
168};
169
170/* This assumes that atomic_t is at least 32 bits */
171#define IB_GET_SEND_CREDITS(v) ((v) & 0xffff)
172#define IB_GET_POST_CREDITS(v) ((v) >> 16)
173#define IB_SET_SEND_CREDITS(v) ((v) & 0xffff)
174#define IB_SET_POST_CREDITS(v) ((v) << 16)
175
176struct rds_iw_cm_id {
177 struct list_head list;
178 struct rdma_cm_id *cm_id;
179};
180
181struct rds_iw_device {
182 struct list_head list;
183 struct list_head cm_id_list;
184 struct list_head conn_list;
185 struct ib_device *dev;
186 struct ib_pd *pd;
187 struct ib_mr *mr;
188 struct rds_iw_mr_pool *mr_pool;
189 int max_sge;
190 unsigned int max_wrs;
191 unsigned int dma_local_lkey:1;
192 spinlock_t spinlock; /* protect the above */
193};
194
195/* bits for i_ack_flags */
196#define IB_ACK_IN_FLIGHT 0
197#define IB_ACK_REQUESTED 1
198
199/* Magic WR_ID for ACKs */
200#define RDS_IW_ACK_WR_ID ((u64)0xffffffffffffffffULL)
201#define RDS_IW_REG_WR_ID ((u64)0xefefefefefefefefULL)
202#define RDS_IW_LOCAL_INV_WR_ID ((u64)0xdfdfdfdfdfdfdfdfULL)
203
204struct rds_iw_statistics {
205 uint64_t s_iw_connect_raced;
206 uint64_t s_iw_listen_closed_stale;
207 uint64_t s_iw_tx_cq_call;
208 uint64_t s_iw_tx_cq_event;
209 uint64_t s_iw_tx_ring_full;
210 uint64_t s_iw_tx_throttle;
211 uint64_t s_iw_tx_sg_mapping_failure;
212 uint64_t s_iw_tx_stalled;
213 uint64_t s_iw_tx_credit_updates;
214 uint64_t s_iw_rx_cq_call;
215 uint64_t s_iw_rx_cq_event;
216 uint64_t s_iw_rx_ring_empty;
217 uint64_t s_iw_rx_refill_from_cq;
218 uint64_t s_iw_rx_refill_from_thread;
219 uint64_t s_iw_rx_alloc_limit;
220 uint64_t s_iw_rx_credit_updates;
221 uint64_t s_iw_ack_sent;
222 uint64_t s_iw_ack_send_failure;
223 uint64_t s_iw_ack_send_delayed;
224 uint64_t s_iw_ack_send_piggybacked;
225 uint64_t s_iw_ack_received;
226 uint64_t s_iw_rdma_mr_alloc;
227 uint64_t s_iw_rdma_mr_free;
228 uint64_t s_iw_rdma_mr_used;
229 uint64_t s_iw_rdma_mr_pool_flush;
230 uint64_t s_iw_rdma_mr_pool_wait;
231 uint64_t s_iw_rdma_mr_pool_depleted;
232};
233
234extern struct workqueue_struct *rds_iw_wq;
235
236/*
237 * Fake ib_dma_sync_sg_for_{cpu,device} as long as ib_verbs.h
238 * doesn't define it.
239 */
240static inline void rds_iw_dma_sync_sg_for_cpu(struct ib_device *dev,
241 struct scatterlist *sg, unsigned int sg_dma_len, int direction)
242{
243 unsigned int i;
244
245 for (i = 0; i < sg_dma_len; ++i) {
246 ib_dma_sync_single_for_cpu(dev,
247 ib_sg_dma_address(dev, &sg[i]),
248 ib_sg_dma_len(dev, &sg[i]),
249 direction);
250 }
251}
252#define ib_dma_sync_sg_for_cpu rds_iw_dma_sync_sg_for_cpu
253
254static inline void rds_iw_dma_sync_sg_for_device(struct ib_device *dev,
255 struct scatterlist *sg, unsigned int sg_dma_len, int direction)
256{
257 unsigned int i;
258
259 for (i = 0; i < sg_dma_len; ++i) {
260 ib_dma_sync_single_for_device(dev,
261 ib_sg_dma_address(dev, &sg[i]),
262 ib_sg_dma_len(dev, &sg[i]),
263 direction);
264 }
265}
266#define ib_dma_sync_sg_for_device rds_iw_dma_sync_sg_for_device
267
268static inline u32 rds_iw_local_dma_lkey(struct rds_iw_connection *ic)
269{
270 return ic->i_dma_local_lkey ? ic->i_cm_id->device->local_dma_lkey : ic->i_mr->lkey;
271}
272
273/* ib.c */
274extern struct rds_transport rds_iw_transport;
275extern struct ib_client rds_iw_client;
276
277extern unsigned int fastreg_pool_size;
278extern unsigned int fastreg_message_size;
279
280extern spinlock_t iw_nodev_conns_lock;
281extern struct list_head iw_nodev_conns;
282
283/* ib_cm.c */
284int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp);
285void rds_iw_conn_free(void *arg);
286int rds_iw_conn_connect(struct rds_connection *conn);
287void rds_iw_conn_shutdown(struct rds_connection *conn);
288void rds_iw_state_change(struct sock *sk);
289int rds_iw_listen_init(void);
290void rds_iw_listen_stop(void);
291void __rds_iw_conn_error(struct rds_connection *conn, const char *, ...);
292int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
293 struct rdma_cm_event *event);
294int rds_iw_cm_initiate_connect(struct rdma_cm_id *cm_id);
295void rds_iw_cm_connect_complete(struct rds_connection *conn,
296 struct rdma_cm_event *event);
297
298
299#define rds_iw_conn_error(conn, fmt...) \
300 __rds_iw_conn_error(conn, KERN_WARNING "RDS/IW: " fmt)
301
302/* ib_rdma.c */
303int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id);
304void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn);
305void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn);
306void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock);
307static inline void rds_iw_destroy_nodev_conns(void)
308{
309 __rds_iw_destroy_conns(&iw_nodev_conns, &iw_nodev_conns_lock);
310}
311static inline void rds_iw_destroy_conns(struct rds_iw_device *rds_iwdev)
312{
313 __rds_iw_destroy_conns(&rds_iwdev->conn_list, &rds_iwdev->spinlock);
314}
315struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *);
316void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo);
317void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *);
318void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
319 struct rds_sock *rs, u32 *key_ret);
320void rds_iw_sync_mr(void *trans_private, int dir);
321void rds_iw_free_mr(void *trans_private, int invalidate);
322void rds_iw_flush_mrs(void);
323
324/* ib_recv.c */
325int rds_iw_recv_init(void);
326void rds_iw_recv_exit(void);
327int rds_iw_recv(struct rds_connection *conn);
328int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
329 gfp_t page_gfp, int prefill);
330void rds_iw_inc_free(struct rds_incoming *inc);
331int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
332void rds_iw_recv_cq_comp_handler(struct ib_cq *cq, void *context);
333void rds_iw_recv_tasklet_fn(unsigned long data);
334void rds_iw_recv_init_ring(struct rds_iw_connection *ic);
335void rds_iw_recv_clear_ring(struct rds_iw_connection *ic);
336void rds_iw_recv_init_ack(struct rds_iw_connection *ic);
337void rds_iw_attempt_ack(struct rds_iw_connection *ic);
338void rds_iw_ack_send_complete(struct rds_iw_connection *ic);
339u64 rds_iw_piggyb_ack(struct rds_iw_connection *ic);
340
341/* ib_ring.c */
342void rds_iw_ring_init(struct rds_iw_work_ring *ring, u32 nr);
343void rds_iw_ring_resize(struct rds_iw_work_ring *ring, u32 nr);
344u32 rds_iw_ring_alloc(struct rds_iw_work_ring *ring, u32 val, u32 *pos);
345void rds_iw_ring_free(struct rds_iw_work_ring *ring, u32 val);
346void rds_iw_ring_unalloc(struct rds_iw_work_ring *ring, u32 val);
347int rds_iw_ring_empty(struct rds_iw_work_ring *ring);
348int rds_iw_ring_low(struct rds_iw_work_ring *ring);
349u32 rds_iw_ring_oldest(struct rds_iw_work_ring *ring);
350u32 rds_iw_ring_completed(struct rds_iw_work_ring *ring, u32 wr_id, u32 oldest);
351extern wait_queue_head_t rds_iw_ring_empty_wait;
352
353/* ib_send.c */
354void rds_iw_xmit_complete(struct rds_connection *conn);
355int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
356 unsigned int hdr_off, unsigned int sg, unsigned int off);
357void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context);
358void rds_iw_send_init_ring(struct rds_iw_connection *ic);
359void rds_iw_send_clear_ring(struct rds_iw_connection *ic);
360int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op);
361void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits);
362void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted);
363int rds_iw_send_grab_credits(struct rds_iw_connection *ic, u32 wanted,
364 u32 *adv_credits, int need_posted, int max_posted);
365
366/* ib_stats.c */
367DECLARE_PER_CPU(struct rds_iw_statistics, rds_iw_stats);
368#define rds_iw_stats_inc(member) rds_stats_inc_which(rds_iw_stats, member)
369unsigned int rds_iw_stats_info_copy(struct rds_info_iterator *iter,
370 unsigned int avail);
371
372/* ib_sysctl.c */
373int rds_iw_sysctl_init(void);
374void rds_iw_sysctl_exit(void);
375extern unsigned long rds_iw_sysctl_max_send_wr;
376extern unsigned long rds_iw_sysctl_max_recv_wr;
377extern unsigned long rds_iw_sysctl_max_unsig_wrs;
378extern unsigned long rds_iw_sysctl_max_unsig_bytes;
379extern unsigned long rds_iw_sysctl_max_recv_allocation;
380extern unsigned int rds_iw_sysctl_flow_control;
381
382/*
383 * Helper functions for getting/setting the header and data SGEs in
384 * RDS packets (not RDMA)
385 */
386static inline struct ib_sge *
387rds_iw_header_sge(struct rds_iw_connection *ic, struct ib_sge *sge)
388{
389 return &sge[0];
390}
391
392static inline struct ib_sge *
393rds_iw_data_sge(struct rds_iw_connection *ic, struct ib_sge *sge)
394{
395 return &sge[1];
396}
397
398#endif
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
deleted file mode 100644
index aea4c911bc76..000000000000
--- a/net/rds/iw_cm.c
+++ /dev/null
@@ -1,769 +0,0 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/slab.h>
36#include <linux/vmalloc.h>
37#include <linux/ratelimit.h>
38
39#include "rds.h"
40#include "iw.h"
41
42/*
43 * Set the selected protocol version
44 */
45static void rds_iw_set_protocol(struct rds_connection *conn, unsigned int version)
46{
47 conn->c_version = version;
48}
49
50/*
51 * Set up flow control
52 */
53static void rds_iw_set_flow_control(struct rds_connection *conn, u32 credits)
54{
55 struct rds_iw_connection *ic = conn->c_transport_data;
56
57 if (rds_iw_sysctl_flow_control && credits != 0) {
58 /* We're doing flow control */
59 ic->i_flowctl = 1;
60 rds_iw_send_add_credits(conn, credits);
61 } else {
62 ic->i_flowctl = 0;
63 }
64}
65
66/*
67 * Connection established.
68 * We get here for both outgoing and incoming connection.
69 */
70void rds_iw_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event)
71{
72 const struct rds_iw_connect_private *dp = NULL;
73 struct rds_iw_connection *ic = conn->c_transport_data;
74 struct rds_iw_device *rds_iwdev;
75 int err;
76
77 if (event->param.conn.private_data_len) {
78 dp = event->param.conn.private_data;
79
80 rds_iw_set_protocol(conn,
81 RDS_PROTOCOL(dp->dp_protocol_major,
82 dp->dp_protocol_minor));
83 rds_iw_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
84 }
85
86 /* update ib_device with this local ipaddr & conn */
87 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
88 err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id);
89 if (err)
90 printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err);
91 rds_iw_add_conn(rds_iwdev, conn);
92
93 /* If the peer gave us the last packet it saw, process this as if
94 * we had received a regular ACK. */
95 if (dp && dp->dp_ack_seq)
96 rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);
97
98 printk(KERN_NOTICE "RDS/IW: connected to %pI4<->%pI4 version %u.%u%s\n",
99 &conn->c_laddr, &conn->c_faddr,
100 RDS_PROTOCOL_MAJOR(conn->c_version),
101 RDS_PROTOCOL_MINOR(conn->c_version),
102 ic->i_flowctl ? ", flow control" : "");
103
104 rds_connect_complete(conn);
105}
106
107static void rds_iw_cm_fill_conn_param(struct rds_connection *conn,
108 struct rdma_conn_param *conn_param,
109 struct rds_iw_connect_private *dp,
110 u32 protocol_version)
111{
112 struct rds_iw_connection *ic = conn->c_transport_data;
113
114 memset(conn_param, 0, sizeof(struct rdma_conn_param));
115 /* XXX tune these? */
116 conn_param->responder_resources = 1;
117 conn_param->initiator_depth = 1;
118
119 if (dp) {
120 memset(dp, 0, sizeof(*dp));
121 dp->dp_saddr = conn->c_laddr;
122 dp->dp_daddr = conn->c_faddr;
123 dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version);
124 dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version);
125 dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IW_SUPPORTED_PROTOCOLS);
126 dp->dp_ack_seq = rds_iw_piggyb_ack(ic);
127
128 /* Advertise flow control */
129 if (ic->i_flowctl) {
130 unsigned int credits;
131
132 credits = IB_GET_POST_CREDITS(atomic_read(&ic->i_credits));
133 dp->dp_credit = cpu_to_be32(credits);
134 atomic_sub(IB_SET_POST_CREDITS(credits), &ic->i_credits);
135 }
136
137 conn_param->private_data = dp;
138 conn_param->private_data_len = sizeof(*dp);
139 }
140}
141
142static void rds_iw_cq_event_handler(struct ib_event *event, void *data)
143{
144 rdsdebug("event %u data %p\n", event->event, data);
145}
146
147static void rds_iw_qp_event_handler(struct ib_event *event, void *data)
148{
149 struct rds_connection *conn = data;
150 struct rds_iw_connection *ic = conn->c_transport_data;
151
152 rdsdebug("conn %p ic %p event %u\n", conn, ic, event->event);
153
154 switch (event->event) {
155 case IB_EVENT_COMM_EST:
156 rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
157 break;
158 case IB_EVENT_QP_REQ_ERR:
159 case IB_EVENT_QP_FATAL:
160 default:
161 rdsdebug("Fatal QP Event %u "
162 "- connection %pI4->%pI4, reconnecting\n",
163 event->event, &conn->c_laddr,
164 &conn->c_faddr);
165 rds_conn_drop(conn);
166 break;
167 }
168}
169
170/*
171 * Create a QP
172 */
173static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr,
174 struct rds_iw_device *rds_iwdev,
175 struct rds_iw_work_ring *send_ring,
176 void (*send_cq_handler)(struct ib_cq *, void *),
177 struct rds_iw_work_ring *recv_ring,
178 void (*recv_cq_handler)(struct ib_cq *, void *),
179 void *context)
180{
181 struct ib_device *dev = rds_iwdev->dev;
182 struct ib_cq_init_attr cq_attr = {};
183 unsigned int send_size, recv_size;
184 int ret;
185
186 /* The offset of 1 is to accommodate the additional ACK WR. */
187 send_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_send_wr + 1);
188 recv_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_recv_wr + 1);
189 rds_iw_ring_resize(send_ring, send_size - 1);
190 rds_iw_ring_resize(recv_ring, recv_size - 1);
191
192 memset(attr, 0, sizeof(*attr));
193 attr->event_handler = rds_iw_qp_event_handler;
194 attr->qp_context = context;
195 attr->cap.max_send_wr = send_size;
196 attr->cap.max_recv_wr = recv_size;
197 attr->cap.max_send_sge = rds_iwdev->max_sge;
198 attr->cap.max_recv_sge = RDS_IW_RECV_SGE;
199 attr->sq_sig_type = IB_SIGNAL_REQ_WR;
200 attr->qp_type = IB_QPT_RC;
201
202 cq_attr.cqe = send_size;
203 attr->send_cq = ib_create_cq(dev, send_cq_handler,
204 rds_iw_cq_event_handler,
205 context, &cq_attr);
206 if (IS_ERR(attr->send_cq)) {
207 ret = PTR_ERR(attr->send_cq);
208 attr->send_cq = NULL;
209 rdsdebug("ib_create_cq send failed: %d\n", ret);
210 goto out;
211 }
212
213 cq_attr.cqe = recv_size;
214 attr->recv_cq = ib_create_cq(dev, recv_cq_handler,
215 rds_iw_cq_event_handler,
216 context, &cq_attr);
217 if (IS_ERR(attr->recv_cq)) {
218 ret = PTR_ERR(attr->recv_cq);
219 attr->recv_cq = NULL;
220 rdsdebug("ib_create_cq send failed: %d\n", ret);
221 goto out;
222 }
223
224 ret = ib_req_notify_cq(attr->send_cq, IB_CQ_NEXT_COMP);
225 if (ret) {
226 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
227 goto out;
228 }
229
230 ret = ib_req_notify_cq(attr->recv_cq, IB_CQ_SOLICITED);
231 if (ret) {
232 rdsdebug("ib_req_notify_cq recv failed: %d\n", ret);
233 goto out;
234 }
235
236out:
237 if (ret) {
238 if (attr->send_cq)
239 ib_destroy_cq(attr->send_cq);
240 if (attr->recv_cq)
241 ib_destroy_cq(attr->recv_cq);
242 }
243 return ret;
244}
245
246/*
247 * This needs to be very careful to not leave IS_ERR pointers around for
248 * cleanup to trip over.
249 */
250static int rds_iw_setup_qp(struct rds_connection *conn)
251{
252 struct rds_iw_connection *ic = conn->c_transport_data;
253 struct ib_device *dev = ic->i_cm_id->device;
254 struct ib_qp_init_attr attr;
255 struct rds_iw_device *rds_iwdev;
256 int ret;
257
258 /* rds_iw_add_one creates a rds_iw_device object per IB device,
259 * and allocates a protection domain, memory range and MR pool
260 * for each. If that fails for any reason, it will not register
261 * the rds_iwdev at all.
262 */
263 rds_iwdev = ib_get_client_data(dev, &rds_iw_client);
264 if (!rds_iwdev) {
265 printk_ratelimited(KERN_NOTICE "RDS/IW: No client_data for device %s\n",
266 dev->name);
267 return -EOPNOTSUPP;
268 }
269
270 /* Protection domain and memory range */
271 ic->i_pd = rds_iwdev->pd;
272 ic->i_mr = rds_iwdev->mr;
273
274 ret = rds_iw_init_qp_attrs(&attr, rds_iwdev,
275 &ic->i_send_ring, rds_iw_send_cq_comp_handler,
276 &ic->i_recv_ring, rds_iw_recv_cq_comp_handler,
277 conn);
278 if (ret < 0)
279 goto out;
280
281 ic->i_send_cq = attr.send_cq;
282 ic->i_recv_cq = attr.recv_cq;
283
284 /*
285 * XXX this can fail if max_*_wr is too large? Are we supposed
286 * to back off until we get a value that the hardware can support?
287 */
288 ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
289 if (ret) {
290 rdsdebug("rdma_create_qp failed: %d\n", ret);
291 goto out;
292 }
293
294 ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
295 ic->i_send_ring.w_nr *
296 sizeof(struct rds_header),
297 &ic->i_send_hdrs_dma, GFP_KERNEL);
298 if (!ic->i_send_hdrs) {
299 ret = -ENOMEM;
300 rdsdebug("ib_dma_alloc_coherent send failed\n");
301 goto out;
302 }
303
304 ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
305 ic->i_recv_ring.w_nr *
306 sizeof(struct rds_header),
307 &ic->i_recv_hdrs_dma, GFP_KERNEL);
308 if (!ic->i_recv_hdrs) {
309 ret = -ENOMEM;
310 rdsdebug("ib_dma_alloc_coherent recv failed\n");
311 goto out;
312 }
313
314 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
315 &ic->i_ack_dma, GFP_KERNEL);
316 if (!ic->i_ack) {
317 ret = -ENOMEM;
318 rdsdebug("ib_dma_alloc_coherent ack failed\n");
319 goto out;
320 }
321
322 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work));
323 if (!ic->i_sends) {
324 ret = -ENOMEM;
325 rdsdebug("send allocation failed\n");
326 goto out;
327 }
328 rds_iw_send_init_ring(ic);
329
330 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work));
331 if (!ic->i_recvs) {
332 ret = -ENOMEM;
333 rdsdebug("recv allocation failed\n");
334 goto out;
335 }
336
337 rds_iw_recv_init_ring(ic);
338 rds_iw_recv_init_ack(ic);
339
340 /* Post receive buffers - as a side effect, this will update
341 * the posted credit count. */
342 rds_iw_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1);
343
344 rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr,
345 ic->i_send_cq, ic->i_recv_cq);
346
347out:
348 return ret;
349}
350
351static u32 rds_iw_protocol_compatible(const struct rds_iw_connect_private *dp)
352{
353 u16 common;
354 u32 version = 0;
355
356 /* rdma_cm private data is odd - when there is any private data in the
357 * request, we will be given a pretty large buffer without telling us the
358 * original size. The only way to tell the difference is by looking at
359 * the contents, which are initialized to zero.
360 * If the protocol version fields aren't set, this is a connection attempt
361 * from an older version. This could could be 3.0 or 2.0 - we can't tell.
362 * We really should have changed this for OFED 1.3 :-( */
363 if (dp->dp_protocol_major == 0)
364 return RDS_PROTOCOL_3_0;
365
366 common = be16_to_cpu(dp->dp_protocol_minor_mask) & RDS_IW_SUPPORTED_PROTOCOLS;
367 if (dp->dp_protocol_major == 3 && common) {
368 version = RDS_PROTOCOL_3_0;
369 while ((common >>= 1) != 0)
370 version++;
371 }
372 printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using "
373 "incompatible protocol version %u.%u\n",
374 &dp->dp_saddr,
375 dp->dp_protocol_major,
376 dp->dp_protocol_minor);
377 return version;
378}
379
380int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
381 struct rdma_cm_event *event)
382{
383 const struct rds_iw_connect_private *dp = event->param.conn.private_data;
384 struct rds_iw_connect_private dp_rep;
385 struct rds_connection *conn = NULL;
386 struct rds_iw_connection *ic = NULL;
387 struct rdma_conn_param conn_param;
388 struct rds_iw_device *rds_iwdev;
389 u32 version;
390 int err, destroy = 1;
391
392 /* Check whether the remote protocol version matches ours. */
393 version = rds_iw_protocol_compatible(dp);
394 if (!version)
395 goto out;
396
397 rdsdebug("saddr %pI4 daddr %pI4 RDSv%u.%u\n",
398 &dp->dp_saddr, &dp->dp_daddr,
399 RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version));
400
401 /* RDS/IW is not currently netns aware, thus init_net */
402 conn = rds_conn_create(&init_net, dp->dp_daddr, dp->dp_saddr,
403 &rds_iw_transport, GFP_KERNEL);
404 if (IS_ERR(conn)) {
405 rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
406 conn = NULL;
407 goto out;
408 }
409
410 /*
411 * The connection request may occur while the
412 * previous connection exist, e.g. in case of failover.
413 * But as connections may be initiated simultaneously
414 * by both hosts, we have a random backoff mechanism -
415 * see the comment above rds_queue_reconnect()
416 */
417 mutex_lock(&conn->c_cm_lock);
418 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
419 if (rds_conn_state(conn) == RDS_CONN_UP) {
420 rdsdebug("incoming connect while connecting\n");
421 rds_conn_drop(conn);
422 rds_iw_stats_inc(s_iw_listen_closed_stale);
423 } else
424 if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
425 /* Wait and see - our connect may still be succeeding */
426 rds_iw_stats_inc(s_iw_connect_raced);
427 }
428 mutex_unlock(&conn->c_cm_lock);
429 goto out;
430 }
431
432 ic = conn->c_transport_data;
433
434 rds_iw_set_protocol(conn, version);
435 rds_iw_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
436
437 /* If the peer gave us the last packet it saw, process this as if
438 * we had received a regular ACK. */
439 if (dp->dp_ack_seq)
440 rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);
441
442 BUG_ON(cm_id->context);
443 BUG_ON(ic->i_cm_id);
444
445 ic->i_cm_id = cm_id;
446 cm_id->context = conn;
447
448 rds_iwdev = ib_get_client_data(cm_id->device, &rds_iw_client);
449 ic->i_dma_local_lkey = rds_iwdev->dma_local_lkey;
450
451 /* We got halfway through setting up the ib_connection, if we
452 * fail now, we have to take the long route out of this mess. */
453 destroy = 0;
454
455 err = rds_iw_setup_qp(conn);
456 if (err) {
457 rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", err);
458 mutex_unlock(&conn->c_cm_lock);
459 goto out;
460 }
461
462 rds_iw_cm_fill_conn_param(conn, &conn_param, &dp_rep, version);
463
464 /* rdma_accept() calls rdma_reject() internally if it fails */
465 err = rdma_accept(cm_id, &conn_param);
466 mutex_unlock(&conn->c_cm_lock);
467 if (err) {
468 rds_iw_conn_error(conn, "rdma_accept failed (%d)\n", err);
469 goto out;
470 }
471
472 return 0;
473
474out:
475 rdma_reject(cm_id, NULL, 0);
476 return destroy;
477}
478
479
480int rds_iw_cm_initiate_connect(struct rdma_cm_id *cm_id)
481{
482 struct rds_connection *conn = cm_id->context;
483 struct rds_iw_connection *ic = conn->c_transport_data;
484 struct rdma_conn_param conn_param;
485 struct rds_iw_connect_private dp;
486 int ret;
487
488 /* If the peer doesn't do protocol negotiation, we must
489 * default to RDSv3.0 */
490 rds_iw_set_protocol(conn, RDS_PROTOCOL_3_0);
491 ic->i_flowctl = rds_iw_sysctl_flow_control; /* advertise flow control */
492
493 ret = rds_iw_setup_qp(conn);
494 if (ret) {
495 rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", ret);
496 goto out;
497 }
498
499 rds_iw_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION);
500
501 ret = rdma_connect(cm_id, &conn_param);
502 if (ret)
503 rds_iw_conn_error(conn, "rdma_connect failed (%d)\n", ret);
504
505out:
506 /* Beware - returning non-zero tells the rdma_cm to destroy
507 * the cm_id. We should certainly not do it as long as we still
508 * "own" the cm_id. */
509 if (ret) {
510 struct rds_iw_connection *ic = conn->c_transport_data;
511
512 if (ic->i_cm_id == cm_id)
513 ret = 0;
514 }
515 return ret;
516}
517
518int rds_iw_conn_connect(struct rds_connection *conn)
519{
520 struct rds_iw_connection *ic = conn->c_transport_data;
521 struct rds_iw_device *rds_iwdev;
522 struct sockaddr_in src, dest;
523 int ret;
524
525 /* XXX I wonder what affect the port space has */
526 /* delegate cm event handler to rdma_transport */
527 ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn,
528 RDMA_PS_TCP, IB_QPT_RC);
529 if (IS_ERR(ic->i_cm_id)) {
530 ret = PTR_ERR(ic->i_cm_id);
531 ic->i_cm_id = NULL;
532 rdsdebug("rdma_create_id() failed: %d\n", ret);
533 goto out;
534 }
535
536 rdsdebug("created cm id %p for conn %p\n", ic->i_cm_id, conn);
537
538 src.sin_family = AF_INET;
539 src.sin_addr.s_addr = (__force u32)conn->c_laddr;
540 src.sin_port = (__force u16)htons(0);
541
542 /* First, bind to the local address and device. */
543 ret = rdma_bind_addr(ic->i_cm_id, (struct sockaddr *) &src);
544 if (ret) {
545 rdsdebug("rdma_bind_addr(%pI4) failed: %d\n",
546 &conn->c_laddr, ret);
547 rdma_destroy_id(ic->i_cm_id);
548 ic->i_cm_id = NULL;
549 goto out;
550 }
551
552 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
553 ic->i_dma_local_lkey = rds_iwdev->dma_local_lkey;
554
555 dest.sin_family = AF_INET;
556 dest.sin_addr.s_addr = (__force u32)conn->c_faddr;
557 dest.sin_port = (__force u16)htons(RDS_PORT);
558
559 ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src,
560 (struct sockaddr *)&dest,
561 RDS_RDMA_RESOLVE_TIMEOUT_MS);
562 if (ret) {
563 rdsdebug("addr resolve failed for cm id %p: %d\n", ic->i_cm_id,
564 ret);
565 rdma_destroy_id(ic->i_cm_id);
566 ic->i_cm_id = NULL;
567 }
568
569out:
570 return ret;
571}
572
573/*
574 * This is so careful about only cleaning up resources that were built up
575 * so that it can be called at any point during startup. In fact it
576 * can be called multiple times for a given connection.
577 */
578void rds_iw_conn_shutdown(struct rds_connection *conn)
579{
580 struct rds_iw_connection *ic = conn->c_transport_data;
581 int err = 0;
582 struct ib_qp_attr qp_attr;
583
584 rdsdebug("cm %p pd %p cq %p %p qp %p\n", ic->i_cm_id,
585 ic->i_pd, ic->i_send_cq, ic->i_recv_cq,
586 ic->i_cm_id ? ic->i_cm_id->qp : NULL);
587
588 if (ic->i_cm_id) {
589 struct ib_device *dev = ic->i_cm_id->device;
590
591 rdsdebug("disconnecting cm %p\n", ic->i_cm_id);
592 err = rdma_disconnect(ic->i_cm_id);
593 if (err) {
594 /* Actually this may happen quite frequently, when
595 * an outgoing connect raced with an incoming connect.
596 */
597 rdsdebug("failed to disconnect, cm: %p err %d\n",
598 ic->i_cm_id, err);
599 }
600
601 if (ic->i_cm_id->qp) {
602 qp_attr.qp_state = IB_QPS_ERR;
603 ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE);
604 }
605
606 wait_event(rds_iw_ring_empty_wait,
607 rds_iw_ring_empty(&ic->i_send_ring) &&
608 rds_iw_ring_empty(&ic->i_recv_ring));
609
610 if (ic->i_send_hdrs)
611 ib_dma_free_coherent(dev,
612 ic->i_send_ring.w_nr *
613 sizeof(struct rds_header),
614 ic->i_send_hdrs,
615 ic->i_send_hdrs_dma);
616
617 if (ic->i_recv_hdrs)
618 ib_dma_free_coherent(dev,
619 ic->i_recv_ring.w_nr *
620 sizeof(struct rds_header),
621 ic->i_recv_hdrs,
622 ic->i_recv_hdrs_dma);
623
624 if (ic->i_ack)
625 ib_dma_free_coherent(dev, sizeof(struct rds_header),
626 ic->i_ack, ic->i_ack_dma);
627
628 if (ic->i_sends)
629 rds_iw_send_clear_ring(ic);
630 if (ic->i_recvs)
631 rds_iw_recv_clear_ring(ic);
632
633 if (ic->i_cm_id->qp)
634 rdma_destroy_qp(ic->i_cm_id);
635 if (ic->i_send_cq)
636 ib_destroy_cq(ic->i_send_cq);
637 if (ic->i_recv_cq)
638 ib_destroy_cq(ic->i_recv_cq);
639
640 /*
641 * If associated with an rds_iw_device:
642 * Move connection back to the nodev list.
643 * Remove cm_id from the device cm_id list.
644 */
645 if (ic->rds_iwdev)
646 rds_iw_remove_conn(ic->rds_iwdev, conn);
647
648 rdma_destroy_id(ic->i_cm_id);
649
650 ic->i_cm_id = NULL;
651 ic->i_pd = NULL;
652 ic->i_mr = NULL;
653 ic->i_send_cq = NULL;
654 ic->i_recv_cq = NULL;
655 ic->i_send_hdrs = NULL;
656 ic->i_recv_hdrs = NULL;
657 ic->i_ack = NULL;
658 }
659 BUG_ON(ic->rds_iwdev);
660
661 /* Clear pending transmit */
662 if (ic->i_rm) {
663 rds_message_put(ic->i_rm);
664 ic->i_rm = NULL;
665 }
666
667 /* Clear the ACK state */
668 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
669#ifdef KERNEL_HAS_ATOMIC64
670 atomic64_set(&ic->i_ack_next, 0);
671#else
672 ic->i_ack_next = 0;
673#endif
674 ic->i_ack_recv = 0;
675
676 /* Clear flow control state */
677 ic->i_flowctl = 0;
678 atomic_set(&ic->i_credits, 0);
679
680 rds_iw_ring_init(&ic->i_send_ring, rds_iw_sysctl_max_send_wr);
681 rds_iw_ring_init(&ic->i_recv_ring, rds_iw_sysctl_max_recv_wr);
682
683 if (ic->i_iwinc) {
684 rds_inc_put(&ic->i_iwinc->ii_inc);
685 ic->i_iwinc = NULL;
686 }
687
688 vfree(ic->i_sends);
689 ic->i_sends = NULL;
690 vfree(ic->i_recvs);
691 ic->i_recvs = NULL;
692 rdsdebug("shutdown complete\n");
693}
694
695int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
696{
697 struct rds_iw_connection *ic;
698 unsigned long flags;
699
700 /* XXX too lazy? */
701 ic = kzalloc(sizeof(struct rds_iw_connection), gfp);
702 if (!ic)
703 return -ENOMEM;
704
705 INIT_LIST_HEAD(&ic->iw_node);
706 tasklet_init(&ic->i_recv_tasklet, rds_iw_recv_tasklet_fn,
707 (unsigned long) ic);
708 mutex_init(&ic->i_recv_mutex);
709#ifndef KERNEL_HAS_ATOMIC64
710 spin_lock_init(&ic->i_ack_lock);
711#endif
712
713 /*
714 * rds_iw_conn_shutdown() waits for these to be emptied so they
715 * must be initialized before it can be called.
716 */
717 rds_iw_ring_init(&ic->i_send_ring, rds_iw_sysctl_max_send_wr);
718 rds_iw_ring_init(&ic->i_recv_ring, rds_iw_sysctl_max_recv_wr);
719
720 ic->conn = conn;
721 conn->c_transport_data = ic;
722
723 spin_lock_irqsave(&iw_nodev_conns_lock, flags);
724 list_add_tail(&ic->iw_node, &iw_nodev_conns);
725 spin_unlock_irqrestore(&iw_nodev_conns_lock, flags);
726
727
728 rdsdebug("conn %p conn ic %p\n", conn, conn->c_transport_data);
729 return 0;
730}
731
732/*
733 * Free a connection. Connection must be shut down and not set for reconnect.
734 */
735void rds_iw_conn_free(void *arg)
736{
737 struct rds_iw_connection *ic = arg;
738 spinlock_t *lock_ptr;
739
740 rdsdebug("ic %p\n", ic);
741
742 /*
743 * Conn is either on a dev's list or on the nodev list.
744 * A race with shutdown() or connect() would cause problems
745 * (since rds_iwdev would change) but that should never happen.
746 */
747 lock_ptr = ic->rds_iwdev ? &ic->rds_iwdev->spinlock : &iw_nodev_conns_lock;
748
749 spin_lock_irq(lock_ptr);
750 list_del(&ic->iw_node);
751 spin_unlock_irq(lock_ptr);
752
753 kfree(ic);
754}
755
756/*
757 * An error occurred on the connection
758 */
759void
760__rds_iw_conn_error(struct rds_connection *conn, const char *fmt, ...)
761{
762 va_list ap;
763
764 rds_conn_drop(conn);
765
766 va_start(ap, fmt);
767 vprintk(fmt, ap);
768 va_end(ap);
769}
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
deleted file mode 100644
index b09a40c1adce..000000000000
--- a/net/rds/iw_rdma.c
+++ /dev/null
@@ -1,837 +0,0 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/slab.h>
35#include <linux/ratelimit.h>
36
37#include "rds.h"
38#include "iw.h"
39
40
41/*
42 * This is stored as mr->r_trans_private.
43 */
44struct rds_iw_mr {
45 struct rds_iw_device *device;
46 struct rds_iw_mr_pool *pool;
47 struct rdma_cm_id *cm_id;
48
49 struct ib_mr *mr;
50
51 struct rds_iw_mapping mapping;
52 unsigned char remap_count;
53};
54
55/*
56 * Our own little MR pool
57 */
58struct rds_iw_mr_pool {
59 struct rds_iw_device *device; /* back ptr to the device that owns us */
60
61 struct mutex flush_lock; /* serialize fmr invalidate */
62 struct work_struct flush_worker; /* flush worker */
63
64 spinlock_t list_lock; /* protect variables below */
65 atomic_t item_count; /* total # of MRs */
66 atomic_t dirty_count; /* # dirty of MRs */
67 struct list_head dirty_list; /* dirty mappings */
68 struct list_head clean_list; /* unused & unamapped MRs */
69 atomic_t free_pinned; /* memory pinned by free MRs */
70 unsigned long max_message_size; /* in pages */
71 unsigned long max_items;
72 unsigned long max_items_soft;
73 unsigned long max_free_pinned;
74 int max_pages;
75};
76
77static void rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all);
78static void rds_iw_mr_pool_flush_worker(struct work_struct *work);
79static int rds_iw_init_reg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
80static int rds_iw_map_reg(struct rds_iw_mr_pool *pool,
81 struct rds_iw_mr *ibmr,
82 struct scatterlist *sg, unsigned int nents);
83static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
84static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
85 struct list_head *unmap_list,
86 struct list_head *kill_list,
87 int *unpinned);
88static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
89
90static int rds_iw_get_device(struct sockaddr_in *src, struct sockaddr_in *dst,
91 struct rds_iw_device **rds_iwdev,
92 struct rdma_cm_id **cm_id)
93{
94 struct rds_iw_device *iwdev;
95 struct rds_iw_cm_id *i_cm_id;
96
97 *rds_iwdev = NULL;
98 *cm_id = NULL;
99
100 list_for_each_entry(iwdev, &rds_iw_devices, list) {
101 spin_lock_irq(&iwdev->spinlock);
102 list_for_each_entry(i_cm_id, &iwdev->cm_id_list, list) {
103 struct sockaddr_in *src_addr, *dst_addr;
104
105 src_addr = (struct sockaddr_in *)&i_cm_id->cm_id->route.addr.src_addr;
106 dst_addr = (struct sockaddr_in *)&i_cm_id->cm_id->route.addr.dst_addr;
107
108 rdsdebug("local ipaddr = %x port %d, "
109 "remote ipaddr = %x port %d"
110 "..looking for %x port %d, "
111 "remote ipaddr = %x port %d\n",
112 src_addr->sin_addr.s_addr,
113 src_addr->sin_port,
114 dst_addr->sin_addr.s_addr,
115 dst_addr->sin_port,
116 src->sin_addr.s_addr,
117 src->sin_port,
118 dst->sin_addr.s_addr,
119 dst->sin_port);
120#ifdef WORKING_TUPLE_DETECTION
121 if (src_addr->sin_addr.s_addr == src->sin_addr.s_addr &&
122 src_addr->sin_port == src->sin_port &&
123 dst_addr->sin_addr.s_addr == dst->sin_addr.s_addr &&
124 dst_addr->sin_port == dst->sin_port) {
125#else
126 /* FIXME - needs to compare the local and remote
127 * ipaddr/port tuple, but the ipaddr is the only
128 * available information in the rds_sock (as the rest are
129 * zero'ed. It doesn't appear to be properly populated
130 * during connection setup...
131 */
132 if (src_addr->sin_addr.s_addr == src->sin_addr.s_addr) {
133#endif
134 spin_unlock_irq(&iwdev->spinlock);
135 *rds_iwdev = iwdev;
136 *cm_id = i_cm_id->cm_id;
137 return 0;
138 }
139 }
140 spin_unlock_irq(&iwdev->spinlock);
141 }
142
143 return 1;
144}
145
146static int rds_iw_add_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id)
147{
148 struct rds_iw_cm_id *i_cm_id;
149
150 i_cm_id = kmalloc(sizeof *i_cm_id, GFP_KERNEL);
151 if (!i_cm_id)
152 return -ENOMEM;
153
154 i_cm_id->cm_id = cm_id;
155
156 spin_lock_irq(&rds_iwdev->spinlock);
157 list_add_tail(&i_cm_id->list, &rds_iwdev->cm_id_list);
158 spin_unlock_irq(&rds_iwdev->spinlock);
159
160 return 0;
161}
162
163static void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev,
164 struct rdma_cm_id *cm_id)
165{
166 struct rds_iw_cm_id *i_cm_id;
167
168 spin_lock_irq(&rds_iwdev->spinlock);
169 list_for_each_entry(i_cm_id, &rds_iwdev->cm_id_list, list) {
170 if (i_cm_id->cm_id == cm_id) {
171 list_del(&i_cm_id->list);
172 kfree(i_cm_id);
173 break;
174 }
175 }
176 spin_unlock_irq(&rds_iwdev->spinlock);
177}
178
179
180int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id)
181{
182 struct sockaddr_in *src_addr, *dst_addr;
183 struct rds_iw_device *rds_iwdev_old;
184 struct rdma_cm_id *pcm_id;
185 int rc;
186
187 src_addr = (struct sockaddr_in *)&cm_id->route.addr.src_addr;
188 dst_addr = (struct sockaddr_in *)&cm_id->route.addr.dst_addr;
189
190 rc = rds_iw_get_device(src_addr, dst_addr, &rds_iwdev_old, &pcm_id);
191 if (rc)
192 rds_iw_remove_cm_id(rds_iwdev, cm_id);
193
194 return rds_iw_add_cm_id(rds_iwdev, cm_id);
195}
196
197void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn)
198{
199 struct rds_iw_connection *ic = conn->c_transport_data;
200
201 /* conn was previously on the nodev_conns_list */
202 spin_lock_irq(&iw_nodev_conns_lock);
203 BUG_ON(list_empty(&iw_nodev_conns));
204 BUG_ON(list_empty(&ic->iw_node));
205 list_del(&ic->iw_node);
206
207 spin_lock(&rds_iwdev->spinlock);
208 list_add_tail(&ic->iw_node, &rds_iwdev->conn_list);
209 spin_unlock(&rds_iwdev->spinlock);
210 spin_unlock_irq(&iw_nodev_conns_lock);
211
212 ic->rds_iwdev = rds_iwdev;
213}
214
215void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn)
216{
217 struct rds_iw_connection *ic = conn->c_transport_data;
218
219 /* place conn on nodev_conns_list */
220 spin_lock(&iw_nodev_conns_lock);
221
222 spin_lock_irq(&rds_iwdev->spinlock);
223 BUG_ON(list_empty(&ic->iw_node));
224 list_del(&ic->iw_node);
225 spin_unlock_irq(&rds_iwdev->spinlock);
226
227 list_add_tail(&ic->iw_node, &iw_nodev_conns);
228
229 spin_unlock(&iw_nodev_conns_lock);
230
231 rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id);
232 ic->rds_iwdev = NULL;
233}
234
235void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock)
236{
237 struct rds_iw_connection *ic, *_ic;
238 LIST_HEAD(tmp_list);
239
240 /* avoid calling conn_destroy with irqs off */
241 spin_lock_irq(list_lock);
242 list_splice(list, &tmp_list);
243 INIT_LIST_HEAD(list);
244 spin_unlock_irq(list_lock);
245
246 list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node)
247 rds_conn_destroy(ic->conn);
248}
249
250static void rds_iw_set_scatterlist(struct rds_iw_scatterlist *sg,
251 struct scatterlist *list, unsigned int sg_len)
252{
253 sg->list = list;
254 sg->len = sg_len;
255 sg->dma_len = 0;
256 sg->dma_npages = 0;
257 sg->bytes = 0;
258}
259
260static int rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
261 struct rds_iw_scatterlist *sg)
262{
263 struct ib_device *dev = rds_iwdev->dev;
264 int i, ret;
265
266 WARN_ON(sg->dma_len);
267
268 sg->dma_len = ib_dma_map_sg(dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
269 if (unlikely(!sg->dma_len)) {
270 printk(KERN_WARNING "RDS/IW: dma_map_sg failed!\n");
271 return -EBUSY;
272 }
273
274 sg->bytes = 0;
275 sg->dma_npages = 0;
276
277 ret = -EINVAL;
278 for (i = 0; i < sg->dma_len; ++i) {
279 unsigned int dma_len = ib_sg_dma_len(dev, &sg->list[i]);
280 u64 dma_addr = ib_sg_dma_address(dev, &sg->list[i]);
281 u64 end_addr;
282
283 sg->bytes += dma_len;
284
285 end_addr = dma_addr + dma_len;
286 if (dma_addr & PAGE_MASK) {
287 if (i > 0)
288 goto out_unmap;
289 dma_addr &= ~PAGE_MASK;
290 }
291 if (end_addr & PAGE_MASK) {
292 if (i < sg->dma_len - 1)
293 goto out_unmap;
294 end_addr = (end_addr + PAGE_MASK) & ~PAGE_MASK;
295 }
296
297 sg->dma_npages += (end_addr - dma_addr) >> PAGE_SHIFT;
298 }
299
300 /* Now gather the dma addrs into one list */
301 if (sg->dma_npages > fastreg_message_size)
302 goto out_unmap;
303
304
305
306 return 0;
307
308out_unmap:
309 ib_dma_unmap_sg(rds_iwdev->dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
310 sg->dma_len = 0;
311 return ret;
312}
313
314
315struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *rds_iwdev)
316{
317 struct rds_iw_mr_pool *pool;
318
319 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
320 if (!pool) {
321 printk(KERN_WARNING "RDS/IW: rds_iw_create_mr_pool alloc error\n");
322 return ERR_PTR(-ENOMEM);
323 }
324
325 pool->device = rds_iwdev;
326 INIT_LIST_HEAD(&pool->dirty_list);
327 INIT_LIST_HEAD(&pool->clean_list);
328 mutex_init(&pool->flush_lock);
329 spin_lock_init(&pool->list_lock);
330 INIT_WORK(&pool->flush_worker, rds_iw_mr_pool_flush_worker);
331
332 pool->max_message_size = fastreg_message_size;
333 pool->max_items = fastreg_pool_size;
334 pool->max_free_pinned = pool->max_items * pool->max_message_size / 4;
335 pool->max_pages = fastreg_message_size;
336
337 /* We never allow more than max_items MRs to be allocated.
338 * When we exceed more than max_items_soft, we start freeing
339 * items more aggressively.
340 * Make sure that max_items > max_items_soft > max_items / 2
341 */
342 pool->max_items_soft = pool->max_items * 3 / 4;
343
344 return pool;
345}
346
347void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo)
348{
349 struct rds_iw_mr_pool *pool = rds_iwdev->mr_pool;
350
351 iinfo->rdma_mr_max = pool->max_items;
352 iinfo->rdma_mr_size = pool->max_pages;
353}
354
355void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *pool)
356{
357 flush_workqueue(rds_wq);
358 rds_iw_flush_mr_pool(pool, 1);
359 BUG_ON(atomic_read(&pool->item_count));
360 BUG_ON(atomic_read(&pool->free_pinned));
361 kfree(pool);
362}
363
364static inline struct rds_iw_mr *rds_iw_reuse_fmr(struct rds_iw_mr_pool *pool)
365{
366 struct rds_iw_mr *ibmr = NULL;
367 unsigned long flags;
368
369 spin_lock_irqsave(&pool->list_lock, flags);
370 if (!list_empty(&pool->clean_list)) {
371 ibmr = list_entry(pool->clean_list.next, struct rds_iw_mr, mapping.m_list);
372 list_del_init(&ibmr->mapping.m_list);
373 }
374 spin_unlock_irqrestore(&pool->list_lock, flags);
375
376 return ibmr;
377}
378
379static struct rds_iw_mr *rds_iw_alloc_mr(struct rds_iw_device *rds_iwdev)
380{
381 struct rds_iw_mr_pool *pool = rds_iwdev->mr_pool;
382 struct rds_iw_mr *ibmr = NULL;
383 int err = 0, iter = 0;
384
385 while (1) {
386 ibmr = rds_iw_reuse_fmr(pool);
387 if (ibmr)
388 return ibmr;
389
390 /* No clean MRs - now we have the choice of either
391 * allocating a fresh MR up to the limit imposed by the
392 * driver, or flush any dirty unused MRs.
393 * We try to avoid stalling in the send path if possible,
394 * so we allocate as long as we're allowed to.
395 *
396 * We're fussy with enforcing the FMR limit, though. If the driver
397 * tells us we can't use more than N fmrs, we shouldn't start
398 * arguing with it */
399 if (atomic_inc_return(&pool->item_count) <= pool->max_items)
400 break;
401
402 atomic_dec(&pool->item_count);
403
404 if (++iter > 2) {
405 rds_iw_stats_inc(s_iw_rdma_mr_pool_depleted);
406 return ERR_PTR(-EAGAIN);
407 }
408
409 /* We do have some empty MRs. Flush them out. */
410 rds_iw_stats_inc(s_iw_rdma_mr_pool_wait);
411 rds_iw_flush_mr_pool(pool, 0);
412 }
413
414 ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL);
415 if (!ibmr) {
416 err = -ENOMEM;
417 goto out_no_cigar;
418 }
419
420 spin_lock_init(&ibmr->mapping.m_lock);
421 INIT_LIST_HEAD(&ibmr->mapping.m_list);
422 ibmr->mapping.m_mr = ibmr;
423
424 err = rds_iw_init_reg(pool, ibmr);
425 if (err)
426 goto out_no_cigar;
427
428 rds_iw_stats_inc(s_iw_rdma_mr_alloc);
429 return ibmr;
430
431out_no_cigar:
432 if (ibmr) {
433 rds_iw_destroy_fastreg(pool, ibmr);
434 kfree(ibmr);
435 }
436 atomic_dec(&pool->item_count);
437 return ERR_PTR(err);
438}
439
440void rds_iw_sync_mr(void *trans_private, int direction)
441{
442 struct rds_iw_mr *ibmr = trans_private;
443 struct rds_iw_device *rds_iwdev = ibmr->device;
444
445 switch (direction) {
446 case DMA_FROM_DEVICE:
447 ib_dma_sync_sg_for_cpu(rds_iwdev->dev, ibmr->mapping.m_sg.list,
448 ibmr->mapping.m_sg.dma_len, DMA_BIDIRECTIONAL);
449 break;
450 case DMA_TO_DEVICE:
451 ib_dma_sync_sg_for_device(rds_iwdev->dev, ibmr->mapping.m_sg.list,
452 ibmr->mapping.m_sg.dma_len, DMA_BIDIRECTIONAL);
453 break;
454 }
455}
456
457/*
458 * Flush our pool of MRs.
459 * At a minimum, all currently unused MRs are unmapped.
460 * If the number of MRs allocated exceeds the limit, we also try
461 * to free as many MRs as needed to get back to this limit.
462 */
463static void rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
464{
465 struct rds_iw_mr *ibmr, *next;
466 LIST_HEAD(unmap_list);
467 LIST_HEAD(kill_list);
468 unsigned long flags;
469 unsigned int nfreed = 0, ncleaned = 0, unpinned = 0;
470
471 rds_iw_stats_inc(s_iw_rdma_mr_pool_flush);
472
473 mutex_lock(&pool->flush_lock);
474
475 spin_lock_irqsave(&pool->list_lock, flags);
476 /* Get the list of all mappings to be destroyed */
477 list_splice_init(&pool->dirty_list, &unmap_list);
478 if (free_all)
479 list_splice_init(&pool->clean_list, &kill_list);
480 spin_unlock_irqrestore(&pool->list_lock, flags);
481
482 /* Batched invalidate of dirty MRs.
483 * For FMR based MRs, the mappings on the unmap list are
484 * actually members of an ibmr (ibmr->mapping). They either
485 * migrate to the kill_list, or have been cleaned and should be
486 * moved to the clean_list.
487 * For fastregs, they will be dynamically allocated, and
488 * will be destroyed by the unmap function.
489 */
490 if (!list_empty(&unmap_list)) {
491 ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list,
492 &kill_list, &unpinned);
493 /* If we've been asked to destroy all MRs, move those
494 * that were simply cleaned to the kill list */
495 if (free_all)
496 list_splice_init(&unmap_list, &kill_list);
497 }
498
499 /* Destroy any MRs that are past their best before date */
500 list_for_each_entry_safe(ibmr, next, &kill_list, mapping.m_list) {
501 rds_iw_stats_inc(s_iw_rdma_mr_free);
502 list_del(&ibmr->mapping.m_list);
503 rds_iw_destroy_fastreg(pool, ibmr);
504 kfree(ibmr);
505 nfreed++;
506 }
507
508 /* Anything that remains are laundered ibmrs, which we can add
509 * back to the clean list. */
510 if (!list_empty(&unmap_list)) {
511 spin_lock_irqsave(&pool->list_lock, flags);
512 list_splice(&unmap_list, &pool->clean_list);
513 spin_unlock_irqrestore(&pool->list_lock, flags);
514 }
515
516 atomic_sub(unpinned, &pool->free_pinned);
517 atomic_sub(ncleaned, &pool->dirty_count);
518 atomic_sub(nfreed, &pool->item_count);
519
520 mutex_unlock(&pool->flush_lock);
521}
522
523static void rds_iw_mr_pool_flush_worker(struct work_struct *work)
524{
525 struct rds_iw_mr_pool *pool = container_of(work, struct rds_iw_mr_pool, flush_worker);
526
527 rds_iw_flush_mr_pool(pool, 0);
528}
529
530void rds_iw_free_mr(void *trans_private, int invalidate)
531{
532 struct rds_iw_mr *ibmr = trans_private;
533 struct rds_iw_mr_pool *pool = ibmr->device->mr_pool;
534
535 rdsdebug("RDS/IW: free_mr nents %u\n", ibmr->mapping.m_sg.len);
536 if (!pool)
537 return;
538
539 /* Return it to the pool's free list */
540 rds_iw_free_fastreg(pool, ibmr);
541
542 /* If we've pinned too many pages, request a flush */
543 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
544 atomic_read(&pool->dirty_count) >= pool->max_items / 10)
545 queue_work(rds_wq, &pool->flush_worker);
546
547 if (invalidate) {
548 if (likely(!in_interrupt())) {
549 rds_iw_flush_mr_pool(pool, 0);
550 } else {
551 /* We get here if the user created a MR marked
552 * as use_once and invalidate at the same time. */
553 queue_work(rds_wq, &pool->flush_worker);
554 }
555 }
556}
557
558void rds_iw_flush_mrs(void)
559{
560 struct rds_iw_device *rds_iwdev;
561
562 list_for_each_entry(rds_iwdev, &rds_iw_devices, list) {
563 struct rds_iw_mr_pool *pool = rds_iwdev->mr_pool;
564
565 if (pool)
566 rds_iw_flush_mr_pool(pool, 0);
567 }
568}
569
570void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
571 struct rds_sock *rs, u32 *key_ret)
572{
573 struct rds_iw_device *rds_iwdev;
574 struct rds_iw_mr *ibmr = NULL;
575 struct rdma_cm_id *cm_id;
576 struct sockaddr_in src = {
577 .sin_addr.s_addr = rs->rs_bound_addr,
578 .sin_port = rs->rs_bound_port,
579 };
580 struct sockaddr_in dst = {
581 .sin_addr.s_addr = rs->rs_conn_addr,
582 .sin_port = rs->rs_conn_port,
583 };
584 int ret;
585
586 ret = rds_iw_get_device(&src, &dst, &rds_iwdev, &cm_id);
587 if (ret || !cm_id) {
588 ret = -ENODEV;
589 goto out;
590 }
591
592 if (!rds_iwdev->mr_pool) {
593 ret = -ENODEV;
594 goto out;
595 }
596
597 ibmr = rds_iw_alloc_mr(rds_iwdev);
598 if (IS_ERR(ibmr))
599 return ibmr;
600
601 ibmr->cm_id = cm_id;
602 ibmr->device = rds_iwdev;
603
604 ret = rds_iw_map_reg(rds_iwdev->mr_pool, ibmr, sg, nents);
605 if (ret == 0)
606 *key_ret = ibmr->mr->rkey;
607 else
608 printk(KERN_WARNING "RDS/IW: failed to map mr (errno=%d)\n", ret);
609
610out:
611 if (ret) {
612 if (ibmr)
613 rds_iw_free_mr(ibmr, 0);
614 ibmr = ERR_PTR(ret);
615 }
616 return ibmr;
617}
618
619/*
620 * iWARP reg handling
621 *
622 * The life cycle of a fastreg registration is a bit different from
623 * FMRs.
624 * The idea behind fastreg is to have one MR, to which we bind different
625 * mappings over time. To avoid stalling on the expensive map and invalidate
626 * operations, these operations are pipelined on the same send queue on
627 * which we want to send the message containing the r_key.
628 *
629 * This creates a bit of a problem for us, as we do not have the destination
630 * IP in GET_MR, so the connection must be setup prior to the GET_MR call for
631 * RDMA to be correctly setup. If a fastreg request is present, rds_iw_xmit
632 * will try to queue a LOCAL_INV (if needed) and a REG_MR work request
633 * before queuing the SEND. When completions for these arrive, they are
634 * dispatched to the MR has a bit set showing that RDMa can be performed.
635 *
636 * There is another interesting aspect that's related to invalidation.
637 * The application can request that a mapping is invalidated in FREE_MR.
638 * The expectation there is that this invalidation step includes ALL
639 * PREVIOUSLY FREED MRs.
640 */
641static int rds_iw_init_reg(struct rds_iw_mr_pool *pool,
642 struct rds_iw_mr *ibmr)
643{
644 struct rds_iw_device *rds_iwdev = pool->device;
645 struct ib_mr *mr;
646 int err;
647
648 mr = ib_alloc_mr(rds_iwdev->pd, IB_MR_TYPE_MEM_REG,
649 pool->max_message_size);
650 if (IS_ERR(mr)) {
651 err = PTR_ERR(mr);
652
653 printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed (err=%d)\n", err);
654 return err;
655 }
656
657 ibmr->mr = mr;
658 return 0;
659}
660
661static int rds_iw_rdma_reg_mr(struct rds_iw_mapping *mapping)
662{
663 struct rds_iw_mr *ibmr = mapping->m_mr;
664 struct rds_iw_scatterlist *m_sg = &mapping->m_sg;
665 struct ib_reg_wr reg_wr;
666 struct ib_send_wr *failed_wr;
667 int ret, n;
668
669 n = ib_map_mr_sg_zbva(ibmr->mr, m_sg->list, m_sg->len, PAGE_SIZE);
670 if (unlikely(n != m_sg->len))
671 return n < 0 ? n : -EINVAL;
672
673 reg_wr.wr.next = NULL;
674 reg_wr.wr.opcode = IB_WR_REG_MR;
675 reg_wr.wr.wr_id = RDS_IW_REG_WR_ID;
676 reg_wr.wr.num_sge = 0;
677 reg_wr.mr = ibmr->mr;
678 reg_wr.key = mapping->m_rkey;
679 reg_wr.access = IB_ACCESS_LOCAL_WRITE |
680 IB_ACCESS_REMOTE_READ |
681 IB_ACCESS_REMOTE_WRITE;
682
683 /*
684 * Perform a WR for the reg_mr. Each individual page
685 * in the sg list is added to the fast reg page list and placed
686 * inside the reg_mr WR. The key used is a rolling 8bit
687 * counter, which should guarantee uniqueness.
688 */
689 ib_update_fast_reg_key(ibmr->mr, ibmr->remap_count++);
690 mapping->m_rkey = ibmr->mr->rkey;
691
692 failed_wr = &reg_wr.wr;
693 ret = ib_post_send(ibmr->cm_id->qp, &reg_wr.wr, &failed_wr);
694 BUG_ON(failed_wr != &reg_wr.wr);
695 if (ret)
696 printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n",
697 __func__, __LINE__, ret);
698 return ret;
699}
700
701static int rds_iw_rdma_fastreg_inv(struct rds_iw_mr *ibmr)
702{
703 struct ib_send_wr s_wr, *failed_wr;
704 int ret = 0;
705
706 if (!ibmr->cm_id->qp || !ibmr->mr)
707 goto out;
708
709 memset(&s_wr, 0, sizeof(s_wr));
710 s_wr.wr_id = RDS_IW_LOCAL_INV_WR_ID;
711 s_wr.opcode = IB_WR_LOCAL_INV;
712 s_wr.ex.invalidate_rkey = ibmr->mr->rkey;
713 s_wr.send_flags = IB_SEND_SIGNALED;
714
715 failed_wr = &s_wr;
716 ret = ib_post_send(ibmr->cm_id->qp, &s_wr, &failed_wr);
717 if (ret) {
718 printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n",
719 __func__, __LINE__, ret);
720 goto out;
721 }
722out:
723 return ret;
724}
725
726static int rds_iw_map_reg(struct rds_iw_mr_pool *pool,
727 struct rds_iw_mr *ibmr,
728 struct scatterlist *sg,
729 unsigned int sg_len)
730{
731 struct rds_iw_device *rds_iwdev = pool->device;
732 struct rds_iw_mapping *mapping = &ibmr->mapping;
733 u64 *dma_pages;
734 int ret = 0;
735
736 rds_iw_set_scatterlist(&mapping->m_sg, sg, sg_len);
737
738 ret = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg);
739 if (ret) {
740 dma_pages = NULL;
741 goto out;
742 }
743
744 if (mapping->m_sg.dma_len > pool->max_message_size) {
745 ret = -EMSGSIZE;
746 goto out;
747 }
748
749 ret = rds_iw_rdma_reg_mr(mapping);
750 if (ret)
751 goto out;
752
753 rds_iw_stats_inc(s_iw_rdma_mr_used);
754
755out:
756 kfree(dma_pages);
757
758 return ret;
759}
760
761/*
762 * "Free" a fastreg MR.
763 */
764static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool,
765 struct rds_iw_mr *ibmr)
766{
767 unsigned long flags;
768 int ret;
769
770 if (!ibmr->mapping.m_sg.dma_len)
771 return;
772
773 ret = rds_iw_rdma_fastreg_inv(ibmr);
774 if (ret)
775 return;
776
777 /* Try to post the LOCAL_INV WR to the queue. */
778 spin_lock_irqsave(&pool->list_lock, flags);
779
780 list_add_tail(&ibmr->mapping.m_list, &pool->dirty_list);
781 atomic_add(ibmr->mapping.m_sg.len, &pool->free_pinned);
782 atomic_inc(&pool->dirty_count);
783
784 spin_unlock_irqrestore(&pool->list_lock, flags);
785}
786
787static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
788 struct list_head *unmap_list,
789 struct list_head *kill_list,
790 int *unpinned)
791{
792 struct rds_iw_mapping *mapping, *next;
793 unsigned int ncleaned = 0;
794 LIST_HEAD(laundered);
795
796 /* Batched invalidation of fastreg MRs.
797 * Why do we do it this way, even though we could pipeline unmap
798 * and remap? The reason is the application semantics - when the
799 * application requests an invalidation of MRs, it expects all
800 * previously released R_Keys to become invalid.
801 *
802 * If we implement MR reuse naively, we risk memory corruption
803 * (this has actually been observed). So the default behavior
804 * requires that a MR goes through an explicit unmap operation before
805 * we can reuse it again.
806 *
807 * We could probably improve on this a little, by allowing immediate
808 * reuse of a MR on the same socket (eg you could add small
809 * cache of unused MRs to strct rds_socket - GET_MR could grab one
810 * of these without requiring an explicit invalidate).
811 */
812 while (!list_empty(unmap_list)) {
813 unsigned long flags;
814
815 spin_lock_irqsave(&pool->list_lock, flags);
816 list_for_each_entry_safe(mapping, next, unmap_list, m_list) {
817 *unpinned += mapping->m_sg.len;
818 list_move(&mapping->m_list, &laundered);
819 ncleaned++;
820 }
821 spin_unlock_irqrestore(&pool->list_lock, flags);
822 }
823
824 /* Move all laundered mappings back to the unmap list.
825 * We do not kill any WRs right now - it doesn't seem the
826 * fastreg API has a max_remap limit. */
827 list_splice_init(&laundered, unmap_list);
828
829 return ncleaned;
830}
831
832static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool,
833 struct rds_iw_mr *ibmr)
834{
835 if (ibmr->mr)
836 ib_dereg_mr(ibmr->mr);
837}
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
deleted file mode 100644
index a66d1794b2d0..000000000000
--- a/net/rds/iw_recv.c
+++ /dev/null
@@ -1,904 +0,0 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/slab.h>
35#include <linux/pci.h>
36#include <linux/dma-mapping.h>
37#include <rdma/rdma_cm.h>
38
39#include "rds.h"
40#include "iw.h"
41
42static struct kmem_cache *rds_iw_incoming_slab;
43static struct kmem_cache *rds_iw_frag_slab;
44static atomic_t rds_iw_allocation = ATOMIC_INIT(0);
45
46static void rds_iw_frag_drop_page(struct rds_page_frag *frag)
47{
48 rdsdebug("frag %p page %p\n", frag, frag->f_page);
49 __free_page(frag->f_page);
50 frag->f_page = NULL;
51}
52
53static void rds_iw_frag_free(struct rds_page_frag *frag)
54{
55 rdsdebug("frag %p page %p\n", frag, frag->f_page);
56 BUG_ON(frag->f_page);
57 kmem_cache_free(rds_iw_frag_slab, frag);
58}
59
60/*
61 * We map a page at a time. Its fragments are posted in order. This
62 * is called in fragment order as the fragments get send completion events.
63 * Only the last frag in the page performs the unmapping.
64 *
65 * It's OK for ring cleanup to call this in whatever order it likes because
66 * DMA is not in flight and so we can unmap while other ring entries still
67 * hold page references in their frags.
68 */
69static void rds_iw_recv_unmap_page(struct rds_iw_connection *ic,
70 struct rds_iw_recv_work *recv)
71{
72 struct rds_page_frag *frag = recv->r_frag;
73
74 rdsdebug("recv %p frag %p page %p\n", recv, frag, frag->f_page);
75 if (frag->f_mapped)
76 ib_dma_unmap_page(ic->i_cm_id->device,
77 frag->f_mapped,
78 RDS_FRAG_SIZE, DMA_FROM_DEVICE);
79 frag->f_mapped = 0;
80}
81
82void rds_iw_recv_init_ring(struct rds_iw_connection *ic)
83{
84 struct rds_iw_recv_work *recv;
85 u32 i;
86
87 for (i = 0, recv = ic->i_recvs; i < ic->i_recv_ring.w_nr; i++, recv++) {
88 struct ib_sge *sge;
89
90 recv->r_iwinc = NULL;
91 recv->r_frag = NULL;
92
93 recv->r_wr.next = NULL;
94 recv->r_wr.wr_id = i;
95 recv->r_wr.sg_list = recv->r_sge;
96 recv->r_wr.num_sge = RDS_IW_RECV_SGE;
97
98 sge = rds_iw_data_sge(ic, recv->r_sge);
99 sge->addr = 0;
100 sge->length = RDS_FRAG_SIZE;
101 sge->lkey = 0;
102
103 sge = rds_iw_header_sge(ic, recv->r_sge);
104 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
105 sge->length = sizeof(struct rds_header);
106 sge->lkey = 0;
107 }
108}
109
110static void rds_iw_recv_clear_one(struct rds_iw_connection *ic,
111 struct rds_iw_recv_work *recv)
112{
113 if (recv->r_iwinc) {
114 rds_inc_put(&recv->r_iwinc->ii_inc);
115 recv->r_iwinc = NULL;
116 }
117 if (recv->r_frag) {
118 rds_iw_recv_unmap_page(ic, recv);
119 if (recv->r_frag->f_page)
120 rds_iw_frag_drop_page(recv->r_frag);
121 rds_iw_frag_free(recv->r_frag);
122 recv->r_frag = NULL;
123 }
124}
125
126void rds_iw_recv_clear_ring(struct rds_iw_connection *ic)
127{
128 u32 i;
129
130 for (i = 0; i < ic->i_recv_ring.w_nr; i++)
131 rds_iw_recv_clear_one(ic, &ic->i_recvs[i]);
132
133 if (ic->i_frag.f_page)
134 rds_iw_frag_drop_page(&ic->i_frag);
135}
136
137static int rds_iw_recv_refill_one(struct rds_connection *conn,
138 struct rds_iw_recv_work *recv,
139 gfp_t kptr_gfp, gfp_t page_gfp)
140{
141 struct rds_iw_connection *ic = conn->c_transport_data;
142 dma_addr_t dma_addr;
143 struct ib_sge *sge;
144 int ret = -ENOMEM;
145
146 if (!recv->r_iwinc) {
147 if (!atomic_add_unless(&rds_iw_allocation, 1, rds_iw_sysctl_max_recv_allocation)) {
148 rds_iw_stats_inc(s_iw_rx_alloc_limit);
149 goto out;
150 }
151 recv->r_iwinc = kmem_cache_alloc(rds_iw_incoming_slab,
152 kptr_gfp);
153 if (!recv->r_iwinc) {
154 atomic_dec(&rds_iw_allocation);
155 goto out;
156 }
157 INIT_LIST_HEAD(&recv->r_iwinc->ii_frags);
158 rds_inc_init(&recv->r_iwinc->ii_inc, conn, conn->c_faddr);
159 }
160
161 if (!recv->r_frag) {
162 recv->r_frag = kmem_cache_alloc(rds_iw_frag_slab, kptr_gfp);
163 if (!recv->r_frag)
164 goto out;
165 INIT_LIST_HEAD(&recv->r_frag->f_item);
166 recv->r_frag->f_page = NULL;
167 }
168
169 if (!ic->i_frag.f_page) {
170 ic->i_frag.f_page = alloc_page(page_gfp);
171 if (!ic->i_frag.f_page)
172 goto out;
173 ic->i_frag.f_offset = 0;
174 }
175
176 dma_addr = ib_dma_map_page(ic->i_cm_id->device,
177 ic->i_frag.f_page,
178 ic->i_frag.f_offset,
179 RDS_FRAG_SIZE,
180 DMA_FROM_DEVICE);
181 if (ib_dma_mapping_error(ic->i_cm_id->device, dma_addr))
182 goto out;
183
184 /*
185 * Once we get the RDS_PAGE_LAST_OFF frag then rds_iw_frag_unmap()
186 * must be called on this recv. This happens as completions hit
187 * in order or on connection shutdown.
188 */
189 recv->r_frag->f_page = ic->i_frag.f_page;
190 recv->r_frag->f_offset = ic->i_frag.f_offset;
191 recv->r_frag->f_mapped = dma_addr;
192
193 sge = rds_iw_data_sge(ic, recv->r_sge);
194 sge->addr = dma_addr;
195 sge->length = RDS_FRAG_SIZE;
196
197 sge = rds_iw_header_sge(ic, recv->r_sge);
198 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
199 sge->length = sizeof(struct rds_header);
200
201 get_page(recv->r_frag->f_page);
202
203 if (ic->i_frag.f_offset < RDS_PAGE_LAST_OFF) {
204 ic->i_frag.f_offset += RDS_FRAG_SIZE;
205 } else {
206 put_page(ic->i_frag.f_page);
207 ic->i_frag.f_page = NULL;
208 ic->i_frag.f_offset = 0;
209 }
210
211 ret = 0;
212out:
213 return ret;
214}
215
216/*
217 * This tries to allocate and post unused work requests after making sure that
218 * they have all the allocations they need to queue received fragments into
219 * sockets. The i_recv_mutex is held here so that ring_alloc and _unalloc
220 * pairs don't go unmatched.
221 *
222 * -1 is returned if posting fails due to temporary resource exhaustion.
223 */
224int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
225 gfp_t page_gfp, int prefill)
226{
227 struct rds_iw_connection *ic = conn->c_transport_data;
228 struct rds_iw_recv_work *recv;
229 struct ib_recv_wr *failed_wr;
230 unsigned int posted = 0;
231 int ret = 0;
232 u32 pos;
233
234 while ((prefill || rds_conn_up(conn)) &&
235 rds_iw_ring_alloc(&ic->i_recv_ring, 1, &pos)) {
236 if (pos >= ic->i_recv_ring.w_nr) {
237 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n",
238 pos);
239 ret = -EINVAL;
240 break;
241 }
242
243 recv = &ic->i_recvs[pos];
244 ret = rds_iw_recv_refill_one(conn, recv, kptr_gfp, page_gfp);
245 if (ret) {
246 ret = -1;
247 break;
248 }
249
250 /* XXX when can this fail? */
251 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
252 rdsdebug("recv %p iwinc %p page %p addr %lu ret %d\n", recv,
253 recv->r_iwinc, recv->r_frag->f_page,
254 (long) recv->r_frag->f_mapped, ret);
255 if (ret) {
256 rds_iw_conn_error(conn, "recv post on "
257 "%pI4 returned %d, disconnecting and "
258 "reconnecting\n", &conn->c_faddr,
259 ret);
260 ret = -1;
261 break;
262 }
263
264 posted++;
265 }
266
267 /* We're doing flow control - update the window. */
268 if (ic->i_flowctl && posted)
269 rds_iw_advertise_credits(conn, posted);
270
271 if (ret)
272 rds_iw_ring_unalloc(&ic->i_recv_ring, 1);
273 return ret;
274}
275
276static void rds_iw_inc_purge(struct rds_incoming *inc)
277{
278 struct rds_iw_incoming *iwinc;
279 struct rds_page_frag *frag;
280 struct rds_page_frag *pos;
281
282 iwinc = container_of(inc, struct rds_iw_incoming, ii_inc);
283 rdsdebug("purging iwinc %p inc %p\n", iwinc, inc);
284
285 list_for_each_entry_safe(frag, pos, &iwinc->ii_frags, f_item) {
286 list_del_init(&frag->f_item);
287 rds_iw_frag_drop_page(frag);
288 rds_iw_frag_free(frag);
289 }
290}
291
292void rds_iw_inc_free(struct rds_incoming *inc)
293{
294 struct rds_iw_incoming *iwinc;
295
296 iwinc = container_of(inc, struct rds_iw_incoming, ii_inc);
297
298 rds_iw_inc_purge(inc);
299 rdsdebug("freeing iwinc %p inc %p\n", iwinc, inc);
300 BUG_ON(!list_empty(&iwinc->ii_frags));
301 kmem_cache_free(rds_iw_incoming_slab, iwinc);
302 atomic_dec(&rds_iw_allocation);
303 BUG_ON(atomic_read(&rds_iw_allocation) < 0);
304}
305
306int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to)
307{
308 struct rds_iw_incoming *iwinc;
309 struct rds_page_frag *frag;
310 unsigned long to_copy;
311 unsigned long frag_off = 0;
312 int copied = 0;
313 int ret;
314 u32 len;
315
316 iwinc = container_of(inc, struct rds_iw_incoming, ii_inc);
317 frag = list_entry(iwinc->ii_frags.next, struct rds_page_frag, f_item);
318 len = be32_to_cpu(inc->i_hdr.h_len);
319
320 while (iov_iter_count(to) && copied < len) {
321 if (frag_off == RDS_FRAG_SIZE) {
322 frag = list_entry(frag->f_item.next,
323 struct rds_page_frag, f_item);
324 frag_off = 0;
325 }
326 to_copy = min_t(unsigned long, iov_iter_count(to),
327 RDS_FRAG_SIZE - frag_off);
328 to_copy = min_t(unsigned long, to_copy, len - copied);
329
330 /* XXX needs + offset for multiple recvs per page */
331 rds_stats_add(s_copy_to_user, to_copy);
332 ret = copy_page_to_iter(frag->f_page,
333 frag->f_offset + frag_off,
334 to_copy,
335 to);
336 if (ret != to_copy)
337 return -EFAULT;
338
339 frag_off += to_copy;
340 copied += to_copy;
341 }
342
343 return copied;
344}
345
346/* ic starts out kzalloc()ed */
347void rds_iw_recv_init_ack(struct rds_iw_connection *ic)
348{
349 struct ib_send_wr *wr = &ic->i_ack_wr;
350 struct ib_sge *sge = &ic->i_ack_sge;
351
352 sge->addr = ic->i_ack_dma;
353 sge->length = sizeof(struct rds_header);
354 sge->lkey = rds_iw_local_dma_lkey(ic);
355
356 wr->sg_list = sge;
357 wr->num_sge = 1;
358 wr->opcode = IB_WR_SEND;
359 wr->wr_id = RDS_IW_ACK_WR_ID;
360 wr->send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
361}
362
363/*
364 * You'd think that with reliable IB connections you wouldn't need to ack
365 * messages that have been received. The problem is that IB hardware generates
366 * an ack message before it has DMAed the message into memory. This creates a
367 * potential message loss if the HCA is disabled for any reason between when it
368 * sends the ack and before the message is DMAed and processed. This is only a
369 * potential issue if another HCA is available for fail-over.
370 *
371 * When the remote host receives our ack they'll free the sent message from
372 * their send queue. To decrease the latency of this we always send an ack
373 * immediately after we've received messages.
374 *
375 * For simplicity, we only have one ack in flight at a time. This puts
376 * pressure on senders to have deep enough send queues to absorb the latency of
377 * a single ack frame being in flight. This might not be good enough.
378 *
379 * This is implemented by have a long-lived send_wr and sge which point to a
380 * statically allocated ack frame. This ack wr does not fall under the ring
381 * accounting that the tx and rx wrs do. The QP attribute specifically makes
382 * room for it beyond the ring size. Send completion notices its special
383 * wr_id and avoids working with the ring in that case.
384 */
385#ifndef KERNEL_HAS_ATOMIC64
386static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
387 int ack_required)
388{
389 unsigned long flags;
390
391 spin_lock_irqsave(&ic->i_ack_lock, flags);
392 ic->i_ack_next = seq;
393 if (ack_required)
394 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
395 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
396}
397
398static u64 rds_iw_get_ack(struct rds_iw_connection *ic)
399{
400 unsigned long flags;
401 u64 seq;
402
403 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
404
405 spin_lock_irqsave(&ic->i_ack_lock, flags);
406 seq = ic->i_ack_next;
407 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
408
409 return seq;
410}
411#else
412static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
413 int ack_required)
414{
415 atomic64_set(&ic->i_ack_next, seq);
416 if (ack_required) {
417 smp_mb__before_atomic();
418 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
419 }
420}
421
422static u64 rds_iw_get_ack(struct rds_iw_connection *ic)
423{
424 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
425 smp_mb__after_atomic();
426
427 return atomic64_read(&ic->i_ack_next);
428}
429#endif
430
431
432static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credits)
433{
434 struct rds_header *hdr = ic->i_ack;
435 struct ib_send_wr *failed_wr;
436 u64 seq;
437 int ret;
438
439 seq = rds_iw_get_ack(ic);
440
441 rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq);
442 rds_message_populate_header(hdr, 0, 0, 0);
443 hdr->h_ack = cpu_to_be64(seq);
444 hdr->h_credit = adv_credits;
445 rds_message_make_checksum(hdr);
446 ic->i_ack_queued = jiffies;
447
448 ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, &failed_wr);
449 if (unlikely(ret)) {
450 /* Failed to send. Release the WR, and
451 * force another ACK.
452 */
453 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
454 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
455
456 rds_iw_stats_inc(s_iw_ack_send_failure);
457
458 rds_iw_conn_error(ic->conn, "sending ack failed\n");
459 } else
460 rds_iw_stats_inc(s_iw_ack_sent);
461}
462
463/*
464 * There are 3 ways of getting acknowledgements to the peer:
465 * 1. We call rds_iw_attempt_ack from the recv completion handler
466 * to send an ACK-only frame.
467 * However, there can be only one such frame in the send queue
468 * at any time, so we may have to postpone it.
469 * 2. When another (data) packet is transmitted while there's
470 * an ACK in the queue, we piggyback the ACK sequence number
471 * on the data packet.
472 * 3. If the ACK WR is done sending, we get called from the
473 * send queue completion handler, and check whether there's
474 * another ACK pending (postponed because the WR was on the
475 * queue). If so, we transmit it.
476 *
477 * We maintain 2 variables:
478 * - i_ack_flags, which keeps track of whether the ACK WR
479 * is currently in the send queue or not (IB_ACK_IN_FLIGHT)
480 * - i_ack_next, which is the last sequence number we received
481 *
482 * Potentially, send queue and receive queue handlers can run concurrently.
483 * It would be nice to not have to use a spinlock to synchronize things,
484 * but the one problem that rules this out is that 64bit updates are
485 * not atomic on all platforms. Things would be a lot simpler if
486 * we had atomic64 or maybe cmpxchg64 everywhere.
487 *
488 * Reconnecting complicates this picture just slightly. When we
489 * reconnect, we may be seeing duplicate packets. The peer
490 * is retransmitting them, because it hasn't seen an ACK for
491 * them. It is important that we ACK these.
492 *
493 * ACK mitigation adds a header flag "ACK_REQUIRED"; any packet with
494 * this flag set *MUST* be acknowledged immediately.
495 */
496
497/*
498 * When we get here, we're called from the recv queue handler.
499 * Check whether we ought to transmit an ACK.
500 */
501void rds_iw_attempt_ack(struct rds_iw_connection *ic)
502{
503 unsigned int adv_credits;
504
505 if (!test_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
506 return;
507
508 if (test_and_set_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags)) {
509 rds_iw_stats_inc(s_iw_ack_send_delayed);
510 return;
511 }
512
513 /* Can we get a send credit? */
514 if (!rds_iw_send_grab_credits(ic, 1, &adv_credits, 0, RDS_MAX_ADV_CREDIT)) {
515 rds_iw_stats_inc(s_iw_tx_throttle);
516 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
517 return;
518 }
519
520 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
521 rds_iw_send_ack(ic, adv_credits);
522}
523
524/*
525 * We get here from the send completion handler, when the
526 * adapter tells us the ACK frame was sent.
527 */
528void rds_iw_ack_send_complete(struct rds_iw_connection *ic)
529{
530 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
531 rds_iw_attempt_ack(ic);
532}
533
534/*
535 * This is called by the regular xmit code when it wants to piggyback
536 * an ACK on an outgoing frame.
537 */
538u64 rds_iw_piggyb_ack(struct rds_iw_connection *ic)
539{
540 if (test_and_clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
541 rds_iw_stats_inc(s_iw_ack_send_piggybacked);
542 return rds_iw_get_ack(ic);
543}
544
545/*
546 * It's kind of lame that we're copying from the posted receive pages into
547 * long-lived bitmaps. We could have posted the bitmaps and rdma written into
548 * them. But receiving new congestion bitmaps should be a *rare* event, so
549 * hopefully we won't need to invest that complexity in making it more
550 * efficient. By copying we can share a simpler core with TCP which has to
551 * copy.
552 */
553static void rds_iw_cong_recv(struct rds_connection *conn,
554 struct rds_iw_incoming *iwinc)
555{
556 struct rds_cong_map *map;
557 unsigned int map_off;
558 unsigned int map_page;
559 struct rds_page_frag *frag;
560 unsigned long frag_off;
561 unsigned long to_copy;
562 unsigned long copied;
563 uint64_t uncongested = 0;
564 void *addr;
565
566 /* catch completely corrupt packets */
567 if (be32_to_cpu(iwinc->ii_inc.i_hdr.h_len) != RDS_CONG_MAP_BYTES)
568 return;
569
570 map = conn->c_fcong;
571 map_page = 0;
572 map_off = 0;
573
574 frag = list_entry(iwinc->ii_frags.next, struct rds_page_frag, f_item);
575 frag_off = 0;
576
577 copied = 0;
578
579 while (copied < RDS_CONG_MAP_BYTES) {
580 uint64_t *src, *dst;
581 unsigned int k;
582
583 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
584 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */
585
586 addr = kmap_atomic(frag->f_page);
587
588 src = addr + frag_off;
589 dst = (void *)map->m_page_addrs[map_page] + map_off;
590 for (k = 0; k < to_copy; k += 8) {
591 /* Record ports that became uncongested, ie
592 * bits that changed from 0 to 1. */
593 uncongested |= ~(*src) & *dst;
594 *dst++ = *src++;
595 }
596 kunmap_atomic(addr);
597
598 copied += to_copy;
599
600 map_off += to_copy;
601 if (map_off == PAGE_SIZE) {
602 map_off = 0;
603 map_page++;
604 }
605
606 frag_off += to_copy;
607 if (frag_off == RDS_FRAG_SIZE) {
608 frag = list_entry(frag->f_item.next,
609 struct rds_page_frag, f_item);
610 frag_off = 0;
611 }
612 }
613
614 /* the congestion map is in little endian order */
615 uncongested = le64_to_cpu(uncongested);
616
617 rds_cong_map_updated(map, uncongested);
618}
619
620/*
621 * Rings are posted with all the allocations they'll need to queue the
622 * incoming message to the receiving socket so this can't fail.
623 * All fragments start with a header, so we can make sure we're not receiving
624 * garbage, and we can tell a small 8 byte fragment from an ACK frame.
625 */
626struct rds_iw_ack_state {
627 u64 ack_next;
628 u64 ack_recv;
629 unsigned int ack_required:1;
630 unsigned int ack_next_valid:1;
631 unsigned int ack_recv_valid:1;
632};
633
634static void rds_iw_process_recv(struct rds_connection *conn,
635 struct rds_iw_recv_work *recv, u32 byte_len,
636 struct rds_iw_ack_state *state)
637{
638 struct rds_iw_connection *ic = conn->c_transport_data;
639 struct rds_iw_incoming *iwinc = ic->i_iwinc;
640 struct rds_header *ihdr, *hdr;
641
642 /* XXX shut down the connection if port 0,0 are seen? */
643
644 rdsdebug("ic %p iwinc %p recv %p byte len %u\n", ic, iwinc, recv,
645 byte_len);
646
647 if (byte_len < sizeof(struct rds_header)) {
648 rds_iw_conn_error(conn, "incoming message "
649 "from %pI4 didn't include a "
650 "header, disconnecting and "
651 "reconnecting\n",
652 &conn->c_faddr);
653 return;
654 }
655 byte_len -= sizeof(struct rds_header);
656
657 ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
658
659 /* Validate the checksum. */
660 if (!rds_message_verify_checksum(ihdr)) {
661 rds_iw_conn_error(conn, "incoming message "
662 "from %pI4 has corrupted header - "
663 "forcing a reconnect\n",
664 &conn->c_faddr);
665 rds_stats_inc(s_recv_drop_bad_checksum);
666 return;
667 }
668
669 /* Process the ACK sequence which comes with every packet */
670 state->ack_recv = be64_to_cpu(ihdr->h_ack);
671 state->ack_recv_valid = 1;
672
673 /* Process the credits update if there was one */
674 if (ihdr->h_credit)
675 rds_iw_send_add_credits(conn, ihdr->h_credit);
676
677 if (ihdr->h_sport == 0 && ihdr->h_dport == 0 && byte_len == 0) {
678 /* This is an ACK-only packet. The fact that it gets
679 * special treatment here is that historically, ACKs
680 * were rather special beasts.
681 */
682 rds_iw_stats_inc(s_iw_ack_received);
683
684 /*
685 * Usually the frags make their way on to incs and are then freed as
686 * the inc is freed. We don't go that route, so we have to drop the
687 * page ref ourselves. We can't just leave the page on the recv
688 * because that confuses the dma mapping of pages and each recv's use
689 * of a partial page. We can leave the frag, though, it will be
690 * reused.
691 *
692 * FIXME: Fold this into the code path below.
693 */
694 rds_iw_frag_drop_page(recv->r_frag);
695 return;
696 }
697
698 /*
699 * If we don't already have an inc on the connection then this
700 * fragment has a header and starts a message.. copy its header
701 * into the inc and save the inc so we can hang upcoming fragments
702 * off its list.
703 */
704 if (!iwinc) {
705 iwinc = recv->r_iwinc;
706 recv->r_iwinc = NULL;
707 ic->i_iwinc = iwinc;
708
709 hdr = &iwinc->ii_inc.i_hdr;
710 memcpy(hdr, ihdr, sizeof(*hdr));
711 ic->i_recv_data_rem = be32_to_cpu(hdr->h_len);
712
713 rdsdebug("ic %p iwinc %p rem %u flag 0x%x\n", ic, iwinc,
714 ic->i_recv_data_rem, hdr->h_flags);
715 } else {
716 hdr = &iwinc->ii_inc.i_hdr;
717 /* We can't just use memcmp here; fragments of a
718 * single message may carry different ACKs */
719 if (hdr->h_sequence != ihdr->h_sequence ||
720 hdr->h_len != ihdr->h_len ||
721 hdr->h_sport != ihdr->h_sport ||
722 hdr->h_dport != ihdr->h_dport) {
723 rds_iw_conn_error(conn,
724 "fragment header mismatch; forcing reconnect\n");
725 return;
726 }
727 }
728
729 list_add_tail(&recv->r_frag->f_item, &iwinc->ii_frags);
730 recv->r_frag = NULL;
731
732 if (ic->i_recv_data_rem > RDS_FRAG_SIZE)
733 ic->i_recv_data_rem -= RDS_FRAG_SIZE;
734 else {
735 ic->i_recv_data_rem = 0;
736 ic->i_iwinc = NULL;
737
738 if (iwinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
739 rds_iw_cong_recv(conn, iwinc);
740 else {
741 rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr,
742 &iwinc->ii_inc, GFP_ATOMIC);
743 state->ack_next = be64_to_cpu(hdr->h_sequence);
744 state->ack_next_valid = 1;
745 }
746
747 /* Evaluate the ACK_REQUIRED flag *after* we received
748 * the complete frame, and after bumping the next_rx
749 * sequence. */
750 if (hdr->h_flags & RDS_FLAG_ACK_REQUIRED) {
751 rds_stats_inc(s_recv_ack_required);
752 state->ack_required = 1;
753 }
754
755 rds_inc_put(&iwinc->ii_inc);
756 }
757}
758
759/*
760 * Plucking the oldest entry from the ring can be done concurrently with
761 * the thread refilling the ring. Each ring operation is protected by
762 * spinlocks and the transient state of refilling doesn't change the
763 * recording of which entry is oldest.
764 *
765 * This relies on IB only calling one cq comp_handler for each cq so that
766 * there will only be one caller of rds_recv_incoming() per RDS connection.
767 */
768void rds_iw_recv_cq_comp_handler(struct ib_cq *cq, void *context)
769{
770 struct rds_connection *conn = context;
771 struct rds_iw_connection *ic = conn->c_transport_data;
772
773 rdsdebug("conn %p cq %p\n", conn, cq);
774
775 rds_iw_stats_inc(s_iw_rx_cq_call);
776
777 tasklet_schedule(&ic->i_recv_tasklet);
778}
779
780static inline void rds_poll_cq(struct rds_iw_connection *ic,
781 struct rds_iw_ack_state *state)
782{
783 struct rds_connection *conn = ic->conn;
784 struct ib_wc wc;
785 struct rds_iw_recv_work *recv;
786
787 while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) {
788 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
789 (unsigned long long)wc.wr_id, wc.status, wc.byte_len,
790 be32_to_cpu(wc.ex.imm_data));
791 rds_iw_stats_inc(s_iw_rx_cq_event);
792
793 recv = &ic->i_recvs[rds_iw_ring_oldest(&ic->i_recv_ring)];
794
795 rds_iw_recv_unmap_page(ic, recv);
796
797 /*
798 * Also process recvs in connecting state because it is possible
799 * to get a recv completion _before_ the rdmacm ESTABLISHED
800 * event is processed.
801 */
802 if (rds_conn_up(conn) || rds_conn_connecting(conn)) {
803 /* We expect errors as the qp is drained during shutdown */
804 if (wc.status == IB_WC_SUCCESS) {
805 rds_iw_process_recv(conn, recv, wc.byte_len, state);
806 } else {
807 rds_iw_conn_error(conn, "recv completion on "
808 "%pI4 had status %u, disconnecting and "
809 "reconnecting\n", &conn->c_faddr,
810 wc.status);
811 }
812 }
813
814 rds_iw_ring_free(&ic->i_recv_ring, 1);
815 }
816}
817
818void rds_iw_recv_tasklet_fn(unsigned long data)
819{
820 struct rds_iw_connection *ic = (struct rds_iw_connection *) data;
821 struct rds_connection *conn = ic->conn;
822 struct rds_iw_ack_state state = { 0, };
823
824 rds_poll_cq(ic, &state);
825 ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
826 rds_poll_cq(ic, &state);
827
828 if (state.ack_next_valid)
829 rds_iw_set_ack(ic, state.ack_next, state.ack_required);
830 if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) {
831 rds_send_drop_acked(conn, state.ack_recv, NULL);
832 ic->i_ack_recv = state.ack_recv;
833 }
834 if (rds_conn_up(conn))
835 rds_iw_attempt_ack(ic);
836
837 /* If we ever end up with a really empty receive ring, we're
838 * in deep trouble, as the sender will definitely see RNR
839 * timeouts. */
840 if (rds_iw_ring_empty(&ic->i_recv_ring))
841 rds_iw_stats_inc(s_iw_rx_ring_empty);
842
843 /*
844 * If the ring is running low, then schedule the thread to refill.
845 */
846 if (rds_iw_ring_low(&ic->i_recv_ring))
847 queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
848}
849
850int rds_iw_recv(struct rds_connection *conn)
851{
852 struct rds_iw_connection *ic = conn->c_transport_data;
853 int ret = 0;
854
855 rdsdebug("conn %p\n", conn);
856
857 /*
858 * If we get a temporary posting failure in this context then
859 * we're really low and we want the caller to back off for a bit.
860 */
861 mutex_lock(&ic->i_recv_mutex);
862 if (rds_iw_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 0))
863 ret = -ENOMEM;
864 else
865 rds_iw_stats_inc(s_iw_rx_refill_from_thread);
866 mutex_unlock(&ic->i_recv_mutex);
867
868 if (rds_conn_up(conn))
869 rds_iw_attempt_ack(ic);
870
871 return ret;
872}
873
874int rds_iw_recv_init(void)
875{
876 struct sysinfo si;
877 int ret = -ENOMEM;
878
879 /* Default to 30% of all available RAM for recv memory */
880 si_meminfo(&si);
881 rds_iw_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE;
882
883 rds_iw_incoming_slab = kmem_cache_create("rds_iw_incoming",
884 sizeof(struct rds_iw_incoming),
885 0, 0, NULL);
886 if (!rds_iw_incoming_slab)
887 goto out;
888
889 rds_iw_frag_slab = kmem_cache_create("rds_iw_frag",
890 sizeof(struct rds_page_frag),
891 0, 0, NULL);
892 if (!rds_iw_frag_slab)
893 kmem_cache_destroy(rds_iw_incoming_slab);
894 else
895 ret = 0;
896out:
897 return ret;
898}
899
900void rds_iw_recv_exit(void)
901{
902 kmem_cache_destroy(rds_iw_incoming_slab);
903 kmem_cache_destroy(rds_iw_frag_slab);
904}
diff --git a/net/rds/iw_ring.c b/net/rds/iw_ring.c
deleted file mode 100644
index da8e3b63f663..000000000000
--- a/net/rds/iw_ring.c
+++ /dev/null
@@ -1,169 +0,0 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34
35#include "rds.h"
36#include "iw.h"
37
38/*
39 * Locking for IB rings.
40 * We assume that allocation is always protected by a mutex
41 * in the caller (this is a valid assumption for the current
42 * implementation).
43 *
44 * Freeing always happens in an interrupt, and hence only
45 * races with allocations, but not with other free()s.
46 *
47 * The interaction between allocation and freeing is that
48 * the alloc code has to determine the number of free entries.
49 * To this end, we maintain two counters; an allocation counter
50 * and a free counter. Both are allowed to run freely, and wrap
51 * around.
52 * The number of used entries is always (alloc_ctr - free_ctr) % NR.
53 *
54 * The current implementation makes free_ctr atomic. When the
55 * caller finds an allocation fails, it should set an "alloc fail"
56 * bit and retry the allocation. The "alloc fail" bit essentially tells
57 * the CQ completion handlers to wake it up after freeing some
58 * more entries.
59 */
60
61/*
62 * This only happens on shutdown.
63 */
64DECLARE_WAIT_QUEUE_HEAD(rds_iw_ring_empty_wait);
65
66void rds_iw_ring_init(struct rds_iw_work_ring *ring, u32 nr)
67{
68 memset(ring, 0, sizeof(*ring));
69 ring->w_nr = nr;
70 rdsdebug("ring %p nr %u\n", ring, ring->w_nr);
71}
72
73static inline u32 __rds_iw_ring_used(struct rds_iw_work_ring *ring)
74{
75 u32 diff;
76
77 /* This assumes that atomic_t has at least as many bits as u32 */
78 diff = ring->w_alloc_ctr - (u32) atomic_read(&ring->w_free_ctr);
79 BUG_ON(diff > ring->w_nr);
80
81 return diff;
82}
83
84void rds_iw_ring_resize(struct rds_iw_work_ring *ring, u32 nr)
85{
86 /* We only ever get called from the connection setup code,
87 * prior to creating the QP. */
88 BUG_ON(__rds_iw_ring_used(ring));
89 ring->w_nr = nr;
90}
91
92static int __rds_iw_ring_empty(struct rds_iw_work_ring *ring)
93{
94 return __rds_iw_ring_used(ring) == 0;
95}
96
97u32 rds_iw_ring_alloc(struct rds_iw_work_ring *ring, u32 val, u32 *pos)
98{
99 u32 ret = 0, avail;
100
101 avail = ring->w_nr - __rds_iw_ring_used(ring);
102
103 rdsdebug("ring %p val %u next %u free %u\n", ring, val,
104 ring->w_alloc_ptr, avail);
105
106 if (val && avail) {
107 ret = min(val, avail);
108 *pos = ring->w_alloc_ptr;
109
110 ring->w_alloc_ptr = (ring->w_alloc_ptr + ret) % ring->w_nr;
111 ring->w_alloc_ctr += ret;
112 }
113
114 return ret;
115}
116
117void rds_iw_ring_free(struct rds_iw_work_ring *ring, u32 val)
118{
119 ring->w_free_ptr = (ring->w_free_ptr + val) % ring->w_nr;
120 atomic_add(val, &ring->w_free_ctr);
121
122 if (__rds_iw_ring_empty(ring) &&
123 waitqueue_active(&rds_iw_ring_empty_wait))
124 wake_up(&rds_iw_ring_empty_wait);
125}
126
127void rds_iw_ring_unalloc(struct rds_iw_work_ring *ring, u32 val)
128{
129 ring->w_alloc_ptr = (ring->w_alloc_ptr - val) % ring->w_nr;
130 ring->w_alloc_ctr -= val;
131}
132
133int rds_iw_ring_empty(struct rds_iw_work_ring *ring)
134{
135 return __rds_iw_ring_empty(ring);
136}
137
138int rds_iw_ring_low(struct rds_iw_work_ring *ring)
139{
140 return __rds_iw_ring_used(ring) <= (ring->w_nr >> 1);
141}
142
143
144/*
145 * returns the oldest alloced ring entry. This will be the next one
146 * freed. This can't be called if there are none allocated.
147 */
148u32 rds_iw_ring_oldest(struct rds_iw_work_ring *ring)
149{
150 return ring->w_free_ptr;
151}
152
153/*
154 * returns the number of completed work requests.
155 */
156
157u32 rds_iw_ring_completed(struct rds_iw_work_ring *ring, u32 wr_id, u32 oldest)
158{
159 u32 ret;
160
161 if (oldest <= (unsigned long long)wr_id)
162 ret = (unsigned long long)wr_id - oldest + 1;
163 else
164 ret = ring->w_nr - oldest + (unsigned long long)wr_id + 1;
165
166 rdsdebug("ring %p ret %u wr_id %u oldest %u\n", ring, ret,
167 wr_id, oldest);
168 return ret;
169}
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
deleted file mode 100644
index e20bd503f4bd..000000000000
--- a/net/rds/iw_send.c
+++ /dev/null
@@ -1,981 +0,0 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/device.h>
36#include <linux/dmapool.h>
37#include <linux/ratelimit.h>
38
39#include "rds.h"
40#include "iw.h"
41
42static void rds_iw_send_rdma_complete(struct rds_message *rm,
43 int wc_status)
44{
45 int notify_status;
46
47 switch (wc_status) {
48 case IB_WC_WR_FLUSH_ERR:
49 return;
50
51 case IB_WC_SUCCESS:
52 notify_status = RDS_RDMA_SUCCESS;
53 break;
54
55 case IB_WC_REM_ACCESS_ERR:
56 notify_status = RDS_RDMA_REMOTE_ERROR;
57 break;
58
59 default:
60 notify_status = RDS_RDMA_OTHER_ERROR;
61 break;
62 }
63 rds_rdma_send_complete(rm, notify_status);
64}
65
66static void rds_iw_send_unmap_rdma(struct rds_iw_connection *ic,
67 struct rm_rdma_op *op)
68{
69 if (op->op_mapped) {
70 ib_dma_unmap_sg(ic->i_cm_id->device,
71 op->op_sg, op->op_nents,
72 op->op_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
73 op->op_mapped = 0;
74 }
75}
76
77static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
78 struct rds_iw_send_work *send,
79 int wc_status)
80{
81 struct rds_message *rm = send->s_rm;
82
83 rdsdebug("ic %p send %p rm %p\n", ic, send, rm);
84
85 ib_dma_unmap_sg(ic->i_cm_id->device,
86 rm->data.op_sg, rm->data.op_nents,
87 DMA_TO_DEVICE);
88
89 if (rm->rdma.op_active) {
90 rds_iw_send_unmap_rdma(ic, &rm->rdma);
91
92 /* If the user asked for a completion notification on this
93 * message, we can implement three different semantics:
94 * 1. Notify when we received the ACK on the RDS message
95 * that was queued with the RDMA. This provides reliable
96 * notification of RDMA status at the expense of a one-way
97 * packet delay.
98 * 2. Notify when the IB stack gives us the completion event for
99 * the RDMA operation.
100 * 3. Notify when the IB stack gives us the completion event for
101 * the accompanying RDS messages.
102 * Here, we implement approach #3. To implement approach #2,
103 * call rds_rdma_send_complete from the cq_handler. To implement #1,
104 * don't call rds_rdma_send_complete at all, and fall back to the notify
105 * handling in the ACK processing code.
106 *
107 * Note: There's no need to explicitly sync any RDMA buffers using
108 * ib_dma_sync_sg_for_cpu - the completion for the RDMA
109 * operation itself unmapped the RDMA buffers, which takes care
110 * of synching.
111 */
112 rds_iw_send_rdma_complete(rm, wc_status);
113
114 if (rm->rdma.op_write)
115 rds_stats_add(s_send_rdma_bytes, rm->rdma.op_bytes);
116 else
117 rds_stats_add(s_recv_rdma_bytes, rm->rdma.op_bytes);
118 }
119
120 /* If anyone waited for this message to get flushed out, wake
121 * them up now */
122 rds_message_unmapped(rm);
123
124 rds_message_put(rm);
125 send->s_rm = NULL;
126}
127
128void rds_iw_send_init_ring(struct rds_iw_connection *ic)
129{
130 struct rds_iw_send_work *send;
131 u32 i;
132
133 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
134 struct ib_sge *sge;
135
136 send->s_rm = NULL;
137 send->s_op = NULL;
138 send->s_mapping = NULL;
139
140 send->s_send_wr.next = NULL;
141 send->s_send_wr.wr_id = i;
142 send->s_send_wr.sg_list = send->s_sge;
143 send->s_send_wr.num_sge = 1;
144 send->s_send_wr.opcode = IB_WR_SEND;
145 send->s_send_wr.send_flags = 0;
146 send->s_send_wr.ex.imm_data = 0;
147
148 sge = rds_iw_data_sge(ic, send->s_sge);
149 sge->lkey = 0;
150
151 sge = rds_iw_header_sge(ic, send->s_sge);
152 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
153 sge->length = sizeof(struct rds_header);
154 sge->lkey = 0;
155
156 send->s_mr = ib_alloc_mr(ic->i_pd, IB_MR_TYPE_MEM_REG,
157 fastreg_message_size);
158 if (IS_ERR(send->s_mr)) {
159 printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed\n");
160 break;
161 }
162 }
163}
164
165void rds_iw_send_clear_ring(struct rds_iw_connection *ic)
166{
167 struct rds_iw_send_work *send;
168 u32 i;
169
170 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
171 BUG_ON(!send->s_mr);
172 ib_dereg_mr(send->s_mr);
173 if (send->s_send_wr.opcode == 0xdead)
174 continue;
175 if (send->s_rm)
176 rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
177 if (send->s_op)
178 rds_iw_send_unmap_rdma(ic, send->s_op);
179 }
180}
181
182/*
183 * The _oldest/_free ring operations here race cleanly with the alloc/unalloc
184 * operations performed in the send path. As the sender allocs and potentially
185 * unallocs the next free entry in the ring it doesn't alter which is
186 * the next to be freed, which is what this is concerned with.
187 */
188void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
189{
190 struct rds_connection *conn = context;
191 struct rds_iw_connection *ic = conn->c_transport_data;
192 struct ib_wc wc;
193 struct rds_iw_send_work *send;
194 u32 completed;
195 u32 oldest;
196 u32 i;
197 int ret;
198
199 rdsdebug("cq %p conn %p\n", cq, conn);
200 rds_iw_stats_inc(s_iw_tx_cq_call);
201 ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
202 if (ret)
203 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
204
205 while (ib_poll_cq(cq, 1, &wc) > 0) {
206 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
207 (unsigned long long)wc.wr_id, wc.status, wc.byte_len,
208 be32_to_cpu(wc.ex.imm_data));
209 rds_iw_stats_inc(s_iw_tx_cq_event);
210
211 if (wc.status != IB_WC_SUCCESS) {
212 printk(KERN_ERR "WC Error: status = %d opcode = %d\n", wc.status, wc.opcode);
213 break;
214 }
215
216 if (wc.opcode == IB_WC_LOCAL_INV && wc.wr_id == RDS_IW_LOCAL_INV_WR_ID) {
217 ic->i_fastreg_posted = 0;
218 continue;
219 }
220
221 if (wc.opcode == IB_WC_REG_MR && wc.wr_id == RDS_IW_REG_WR_ID) {
222 ic->i_fastreg_posted = 1;
223 continue;
224 }
225
226 if (wc.wr_id == RDS_IW_ACK_WR_ID) {
227 if (time_after(jiffies, ic->i_ack_queued + HZ/2))
228 rds_iw_stats_inc(s_iw_tx_stalled);
229 rds_iw_ack_send_complete(ic);
230 continue;
231 }
232
233 oldest = rds_iw_ring_oldest(&ic->i_send_ring);
234
235 completed = rds_iw_ring_completed(&ic->i_send_ring, wc.wr_id, oldest);
236
237 for (i = 0; i < completed; i++) {
238 send = &ic->i_sends[oldest];
239
240 /* In the error case, wc.opcode sometimes contains garbage */
241 switch (send->s_send_wr.opcode) {
242 case IB_WR_SEND:
243 if (send->s_rm)
244 rds_iw_send_unmap_rm(ic, send, wc.status);
245 break;
246 case IB_WR_REG_MR:
247 case IB_WR_RDMA_WRITE:
248 case IB_WR_RDMA_READ:
249 case IB_WR_RDMA_READ_WITH_INV:
250 /* Nothing to be done - the SG list will be unmapped
251 * when the SEND completes. */
252 break;
253 default:
254 printk_ratelimited(KERN_NOTICE
255 "RDS/IW: %s: unexpected opcode 0x%x in WR!\n",
256 __func__, send->s_send_wr.opcode);
257 break;
258 }
259
260 send->s_send_wr.opcode = 0xdead;
261 send->s_send_wr.num_sge = 1;
262 if (time_after(jiffies, send->s_queued + HZ/2))
263 rds_iw_stats_inc(s_iw_tx_stalled);
264
265 /* If a RDMA operation produced an error, signal this right
266 * away. If we don't, the subsequent SEND that goes with this
267 * RDMA will be canceled with ERR_WFLUSH, and the application
268 * never learn that the RDMA failed. */
269 if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) {
270 struct rds_message *rm;
271
272 rm = rds_send_get_message(conn, send->s_op);
273 if (rm)
274 rds_iw_send_rdma_complete(rm, wc.status);
275 }
276
277 oldest = (oldest + 1) % ic->i_send_ring.w_nr;
278 }
279
280 rds_iw_ring_free(&ic->i_send_ring, completed);
281
282 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
283 test_bit(0, &conn->c_map_queued))
284 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
285
286 /* We expect errors as the qp is drained during shutdown */
287 if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
288 rds_iw_conn_error(conn,
289 "send completion on %pI4 "
290 "had status %u, disconnecting and reconnecting\n",
291 &conn->c_faddr, wc.status);
292 }
293 }
294}
295
296/*
297 * This is the main function for allocating credits when sending
298 * messages.
299 *
300 * Conceptually, we have two counters:
301 * - send credits: this tells us how many WRs we're allowed
302 * to submit without overruning the receiver's queue. For
303 * each SEND WR we post, we decrement this by one.
304 *
305 * - posted credits: this tells us how many WRs we recently
306 * posted to the receive queue. This value is transferred
307 * to the peer as a "credit update" in a RDS header field.
308 * Every time we transmit credits to the peer, we subtract
309 * the amount of transferred credits from this counter.
310 *
311 * It is essential that we avoid situations where both sides have
312 * exhausted their send credits, and are unable to send new credits
313 * to the peer. We achieve this by requiring that we send at least
314 * one credit update to the peer before exhausting our credits.
315 * When new credits arrive, we subtract one credit that is withheld
316 * until we've posted new buffers and are ready to transmit these
317 * credits (see rds_iw_send_add_credits below).
318 *
319 * The RDS send code is essentially single-threaded; rds_send_xmit
320 * grabs c_send_lock to ensure exclusive access to the send ring.
321 * However, the ACK sending code is independent and can race with
322 * message SENDs.
323 *
324 * In the send path, we need to update the counters for send credits
325 * and the counter of posted buffers atomically - when we use the
326 * last available credit, we cannot allow another thread to race us
327 * and grab the posted credits counter. Hence, we have to use a
328 * spinlock to protect the credit counter, or use atomics.
329 *
330 * Spinlocks shared between the send and the receive path are bad,
331 * because they create unnecessary delays. An early implementation
332 * using a spinlock showed a 5% degradation in throughput at some
333 * loads.
334 *
335 * This implementation avoids spinlocks completely, putting both
336 * counters into a single atomic, and updating that atomic using
337 * atomic_add (in the receive path, when receiving fresh credits),
338 * and using atomic_cmpxchg when updating the two counters.
339 */
340int rds_iw_send_grab_credits(struct rds_iw_connection *ic,
341 u32 wanted, u32 *adv_credits, int need_posted, int max_posted)
342{
343 unsigned int avail, posted, got = 0, advertise;
344 long oldval, newval;
345
346 *adv_credits = 0;
347 if (!ic->i_flowctl)
348 return wanted;
349
350try_again:
351 advertise = 0;
352 oldval = newval = atomic_read(&ic->i_credits);
353 posted = IB_GET_POST_CREDITS(oldval);
354 avail = IB_GET_SEND_CREDITS(oldval);
355
356 rdsdebug("wanted=%u credits=%u posted=%u\n",
357 wanted, avail, posted);
358
359 /* The last credit must be used to send a credit update. */
360 if (avail && !posted)
361 avail--;
362
363 if (avail < wanted) {
364 struct rds_connection *conn = ic->i_cm_id->context;
365
366 /* Oops, there aren't that many credits left! */
367 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
368 got = avail;
369 } else {
370 /* Sometimes you get what you want, lalala. */
371 got = wanted;
372 }
373 newval -= IB_SET_SEND_CREDITS(got);
374
375 /*
376 * If need_posted is non-zero, then the caller wants
377 * the posted regardless of whether any send credits are
378 * available.
379 */
380 if (posted && (got || need_posted)) {
381 advertise = min_t(unsigned int, posted, max_posted);
382 newval -= IB_SET_POST_CREDITS(advertise);
383 }
384
385 /* Finally bill everything */
386 if (atomic_cmpxchg(&ic->i_credits, oldval, newval) != oldval)
387 goto try_again;
388
389 *adv_credits = advertise;
390 return got;
391}
392
393void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits)
394{
395 struct rds_iw_connection *ic = conn->c_transport_data;
396
397 if (credits == 0)
398 return;
399
400 rdsdebug("credits=%u current=%u%s\n",
401 credits,
402 IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)),
403 test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : "");
404
405 atomic_add(IB_SET_SEND_CREDITS(credits), &ic->i_credits);
406 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags))
407 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
408
409 WARN_ON(IB_GET_SEND_CREDITS(credits) >= 16384);
410
411 rds_iw_stats_inc(s_iw_rx_credit_updates);
412}
413
414void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted)
415{
416 struct rds_iw_connection *ic = conn->c_transport_data;
417
418 if (posted == 0)
419 return;
420
421 atomic_add(IB_SET_POST_CREDITS(posted), &ic->i_credits);
422
423 /* Decide whether to send an update to the peer now.
424 * If we would send a credit update for every single buffer we
425 * post, we would end up with an ACK storm (ACK arrives,
426 * consumes buffer, we refill the ring, send ACK to remote
427 * advertising the newly posted buffer... ad inf)
428 *
429 * Performance pretty much depends on how often we send
430 * credit updates - too frequent updates mean lots of ACKs.
431 * Too infrequent updates, and the peer will run out of
432 * credits and has to throttle.
433 * For the time being, 16 seems to be a good compromise.
434 */
435 if (IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)) >= 16)
436 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
437}
438
439static inline void
440rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
441 struct rds_iw_send_work *send, unsigned int pos,
442 unsigned long buffer, unsigned int length,
443 int send_flags)
444{
445 struct ib_sge *sge;
446
447 WARN_ON(pos != send - ic->i_sends);
448
449 send->s_send_wr.send_flags = send_flags;
450 send->s_send_wr.opcode = IB_WR_SEND;
451 send->s_send_wr.num_sge = 2;
452 send->s_send_wr.next = NULL;
453 send->s_queued = jiffies;
454 send->s_op = NULL;
455
456 if (length != 0) {
457 sge = rds_iw_data_sge(ic, send->s_sge);
458 sge->addr = buffer;
459 sge->length = length;
460 sge->lkey = rds_iw_local_dma_lkey(ic);
461
462 sge = rds_iw_header_sge(ic, send->s_sge);
463 } else {
464 /* We're sending a packet with no payload. There is only
465 * one SGE */
466 send->s_send_wr.num_sge = 1;
467 sge = &send->s_sge[0];
468 }
469
470 sge->addr = ic->i_send_hdrs_dma + (pos * sizeof(struct rds_header));
471 sge->length = sizeof(struct rds_header);
472 sge->lkey = rds_iw_local_dma_lkey(ic);
473}
474
475/*
476 * This can be called multiple times for a given message. The first time
477 * we see a message we map its scatterlist into the IB device so that
478 * we can provide that mapped address to the IB scatter gather entries
479 * in the IB work requests. We translate the scatterlist into a series
480 * of work requests that fragment the message. These work requests complete
481 * in order so we pass ownership of the message to the completion handler
482 * once we send the final fragment.
483 *
484 * The RDS core uses the c_send_lock to only enter this function once
485 * per connection. This makes sure that the tx ring alloc/unalloc pairs
486 * don't get out of sync and confuse the ring.
487 */
488int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
489 unsigned int hdr_off, unsigned int sg, unsigned int off)
490{
491 struct rds_iw_connection *ic = conn->c_transport_data;
492 struct ib_device *dev = ic->i_cm_id->device;
493 struct rds_iw_send_work *send = NULL;
494 struct rds_iw_send_work *first;
495 struct rds_iw_send_work *prev;
496 struct ib_send_wr *failed_wr;
497 struct scatterlist *scat;
498 u32 pos;
499 u32 i;
500 u32 work_alloc;
501 u32 credit_alloc;
502 u32 posted;
503 u32 adv_credits = 0;
504 int send_flags = 0;
505 int sent;
506 int ret;
507 int flow_controlled = 0;
508
509 BUG_ON(off % RDS_FRAG_SIZE);
510 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
511
512 /* Fastreg support */
513 if (rds_rdma_cookie_key(rm->m_rdma_cookie) && !ic->i_fastreg_posted) {
514 ret = -EAGAIN;
515 goto out;
516 }
517
518 /* FIXME we may overallocate here */
519 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0)
520 i = 1;
521 else
522 i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE);
523
524 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos);
525 if (work_alloc == 0) {
526 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
527 rds_iw_stats_inc(s_iw_tx_ring_full);
528 ret = -ENOMEM;
529 goto out;
530 }
531
532 credit_alloc = work_alloc;
533 if (ic->i_flowctl) {
534 credit_alloc = rds_iw_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT);
535 adv_credits += posted;
536 if (credit_alloc < work_alloc) {
537 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc);
538 work_alloc = credit_alloc;
539 flow_controlled++;
540 }
541 if (work_alloc == 0) {
542 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
543 rds_iw_stats_inc(s_iw_tx_throttle);
544 ret = -ENOMEM;
545 goto out;
546 }
547 }
548
549 /* map the message the first time we see it */
550 if (!ic->i_rm) {
551 /*
552 printk(KERN_NOTICE "rds_iw_xmit prep msg dport=%u flags=0x%x len=%d\n",
553 be16_to_cpu(rm->m_inc.i_hdr.h_dport),
554 rm->m_inc.i_hdr.h_flags,
555 be32_to_cpu(rm->m_inc.i_hdr.h_len));
556 */
557 if (rm->data.op_nents) {
558 rm->data.op_count = ib_dma_map_sg(dev,
559 rm->data.op_sg,
560 rm->data.op_nents,
561 DMA_TO_DEVICE);
562 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.op_count);
563 if (rm->data.op_count == 0) {
564 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
565 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
566 ret = -ENOMEM; /* XXX ? */
567 goto out;
568 }
569 } else {
570 rm->data.op_count = 0;
571 }
572
573 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
574 ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
575 rds_message_addref(rm);
576 rm->data.op_dmasg = 0;
577 rm->data.op_dmaoff = 0;
578 ic->i_rm = rm;
579
580 /* Finalize the header */
581 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags))
582 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_ACK_REQUIRED;
583 if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))
584 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
585
586 /* If it has a RDMA op, tell the peer we did it. This is
587 * used by the peer to release use-once RDMA MRs. */
588 if (rm->rdma.op_active) {
589 struct rds_ext_header_rdma ext_hdr;
590
591 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.op_rkey);
592 rds_message_add_extension(&rm->m_inc.i_hdr,
593 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
594 }
595 if (rm->m_rdma_cookie) {
596 rds_message_add_rdma_dest_extension(&rm->m_inc.i_hdr,
597 rds_rdma_cookie_key(rm->m_rdma_cookie),
598 rds_rdma_cookie_offset(rm->m_rdma_cookie));
599 }
600
601 /* Note - rds_iw_piggyb_ack clears the ACK_REQUIRED bit, so
602 * we should not do this unless we have a chance of at least
603 * sticking the header into the send ring. Which is why we
604 * should call rds_iw_ring_alloc first. */
605 rm->m_inc.i_hdr.h_ack = cpu_to_be64(rds_iw_piggyb_ack(ic));
606 rds_message_make_checksum(&rm->m_inc.i_hdr);
607
608 /*
609 * Update adv_credits since we reset the ACK_REQUIRED bit.
610 */
611 rds_iw_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
612 adv_credits += posted;
613 BUG_ON(adv_credits > 255);
614 }
615
616 send = &ic->i_sends[pos];
617 first = send;
618 prev = NULL;
619 scat = &rm->data.op_sg[rm->data.op_dmasg];
620 sent = 0;
621 i = 0;
622
623 /* Sometimes you want to put a fence between an RDMA
624 * READ and the following SEND.
625 * We could either do this all the time
626 * or when requested by the user. Right now, we let
627 * the application choose.
628 */
629 if (rm->rdma.op_active && rm->rdma.op_fence)
630 send_flags = IB_SEND_FENCE;
631
632 /*
633 * We could be copying the header into the unused tail of the page.
634 * That would need to be changed in the future when those pages might
635 * be mapped userspace pages or page cache pages. So instead we always
636 * use a second sge and our long-lived ring of mapped headers. We send
637 * the header after the data so that the data payload can be aligned on
638 * the receiver.
639 */
640
641 /* handle a 0-len message */
642 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) {
643 rds_iw_xmit_populate_wr(ic, send, pos, 0, 0, send_flags);
644 goto add_header;
645 }
646
647 /* if there's data reference it with a chain of work reqs */
648 for (; i < work_alloc && scat != &rm->data.op_sg[rm->data.op_count]; i++) {
649 unsigned int len;
650
651 send = &ic->i_sends[pos];
652
653 len = min(RDS_FRAG_SIZE,
654 ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff);
655 rds_iw_xmit_populate_wr(ic, send, pos,
656 ib_sg_dma_address(dev, scat) + rm->data.op_dmaoff, len,
657 send_flags);
658
659 /*
660 * We want to delay signaling completions just enough to get
661 * the batching benefits but not so much that we create dead time
662 * on the wire.
663 */
664 if (ic->i_unsignaled_wrs-- == 0) {
665 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
666 send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
667 }
668
669 ic->i_unsignaled_bytes -= len;
670 if (ic->i_unsignaled_bytes <= 0) {
671 ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
672 send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
673 }
674
675 /*
676 * Always signal the last one if we're stopping due to flow control.
677 */
678 if (flow_controlled && i == (work_alloc-1))
679 send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
680
681 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
682 &send->s_send_wr, send->s_send_wr.num_sge, send->s_send_wr.next);
683
684 sent += len;
685 rm->data.op_dmaoff += len;
686 if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) {
687 scat++;
688 rm->data.op_dmaoff = 0;
689 rm->data.op_dmasg++;
690 }
691
692add_header:
693 /* Tack on the header after the data. The header SGE should already
694 * have been set up to point to the right header buffer. */
695 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
696
697 if (0) {
698 struct rds_header *hdr = &ic->i_send_hdrs[pos];
699
700 printk(KERN_NOTICE "send WR dport=%u flags=0x%x len=%d\n",
701 be16_to_cpu(hdr->h_dport),
702 hdr->h_flags,
703 be32_to_cpu(hdr->h_len));
704 }
705 if (adv_credits) {
706 struct rds_header *hdr = &ic->i_send_hdrs[pos];
707
708 /* add credit and redo the header checksum */
709 hdr->h_credit = adv_credits;
710 rds_message_make_checksum(hdr);
711 adv_credits = 0;
712 rds_iw_stats_inc(s_iw_tx_credit_updates);
713 }
714
715 if (prev)
716 prev->s_send_wr.next = &send->s_send_wr;
717 prev = send;
718
719 pos = (pos + 1) % ic->i_send_ring.w_nr;
720 }
721
722 /* Account the RDS header in the number of bytes we sent, but just once.
723 * The caller has no concept of fragmentation. */
724 if (hdr_off == 0)
725 sent += sizeof(struct rds_header);
726
727 /* if we finished the message then send completion owns it */
728 if (scat == &rm->data.op_sg[rm->data.op_count]) {
729 prev->s_rm = ic->i_rm;
730 prev->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
731 ic->i_rm = NULL;
732 }
733
734 if (i < work_alloc) {
735 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i);
736 work_alloc = i;
737 }
738 if (ic->i_flowctl && i < credit_alloc)
739 rds_iw_send_add_credits(conn, credit_alloc - i);
740
741 /* XXX need to worry about failed_wr and partial sends. */
742 failed_wr = &first->s_send_wr;
743 ret = ib_post_send(ic->i_cm_id->qp, &first->s_send_wr, &failed_wr);
744 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
745 first, &first->s_send_wr, ret, failed_wr);
746 BUG_ON(failed_wr != &first->s_send_wr);
747 if (ret) {
748 printk(KERN_WARNING "RDS/IW: ib_post_send to %pI4 "
749 "returned %d\n", &conn->c_faddr, ret);
750 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
751 if (prev->s_rm) {
752 ic->i_rm = prev->s_rm;
753 prev->s_rm = NULL;
754 }
755 goto out;
756 }
757
758 ret = sent;
759out:
760 BUG_ON(adv_credits);
761 return ret;
762}
763
764static int rds_iw_build_send_reg(struct rds_iw_send_work *send,
765 struct scatterlist *sg,
766 int sg_nents)
767{
768 int n;
769
770 n = ib_map_mr_sg(send->s_mr, sg, sg_nents, PAGE_SIZE);
771 if (unlikely(n != sg_nents))
772 return n < 0 ? n : -EINVAL;
773
774 send->s_reg_wr.wr.opcode = IB_WR_REG_MR;
775 send->s_reg_wr.wr.wr_id = 0;
776 send->s_reg_wr.wr.num_sge = 0;
777 send->s_reg_wr.mr = send->s_mr;
778 send->s_reg_wr.key = send->s_mr->rkey;
779 send->s_reg_wr.access = IB_ACCESS_REMOTE_WRITE;
780
781 ib_update_fast_reg_key(send->s_mr, send->s_remap_count++);
782
783 return 0;
784}
785
786int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
787{
788 struct rds_iw_connection *ic = conn->c_transport_data;
789 struct rds_iw_send_work *send = NULL;
790 struct rds_iw_send_work *first;
791 struct rds_iw_send_work *prev;
792 struct ib_send_wr *failed_wr;
793 struct rds_iw_device *rds_iwdev;
794 struct scatterlist *scat;
795 unsigned long len;
796 u64 remote_addr = op->op_remote_addr;
797 u32 pos, fr_pos;
798 u32 work_alloc;
799 u32 i;
800 u32 j;
801 int sent;
802 int ret;
803 int num_sge;
804 int sg_nents;
805
806 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
807
808 /* map the message the first time we see it */
809 if (!op->op_mapped) {
810 op->op_count = ib_dma_map_sg(ic->i_cm_id->device,
811 op->op_sg, op->op_nents, (op->op_write) ?
812 DMA_TO_DEVICE : DMA_FROM_DEVICE);
813 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count);
814 if (op->op_count == 0) {
815 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
816 ret = -ENOMEM; /* XXX ? */
817 goto out;
818 }
819
820 op->op_mapped = 1;
821 }
822
823 if (!op->op_write) {
824 /* Alloc space on the send queue for the fastreg */
825 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, 1, &fr_pos);
826 if (work_alloc != 1) {
827 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
828 rds_iw_stats_inc(s_iw_tx_ring_full);
829 ret = -ENOMEM;
830 goto out;
831 }
832 }
833
834 /*
835 * Instead of knowing how to return a partial rdma read/write we insist that there
836 * be enough work requests to send the entire message.
837 */
838 i = ceil(op->op_count, rds_iwdev->max_sge);
839
840 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos);
841 if (work_alloc != i) {
842 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
843 rds_iw_stats_inc(s_iw_tx_ring_full);
844 ret = -ENOMEM;
845 goto out;
846 }
847
848 send = &ic->i_sends[pos];
849 if (!op->op_write) {
850 first = prev = &ic->i_sends[fr_pos];
851 } else {
852 first = send;
853 prev = NULL;
854 }
855 scat = &op->op_sg[0];
856 sent = 0;
857 num_sge = op->op_count;
858 sg_nents = 0;
859
860 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
861 send->s_rdma_wr.wr.send_flags = 0;
862 send->s_queued = jiffies;
863
864 /*
865 * We want to delay signaling completions just enough to get
866 * the batching benefits but not so much that we create dead time on the wire.
867 */
868 if (ic->i_unsignaled_wrs-- == 0) {
869 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
870 send->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED;
871 }
872
873 /* To avoid the need to have the plumbing to invalidate the fastreg_mr used
874 * for local access after RDS is finished with it, using
875 * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed.
876 */
877 if (op->op_write)
878 send->s_rdma_wr.wr.opcode = IB_WR_RDMA_WRITE;
879 else
880 send->s_rdma_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
881
882 send->s_rdma_wr.remote_addr = remote_addr;
883 send->s_rdma_wr.rkey = op->op_rkey;
884 send->s_op = op;
885
886 if (num_sge > rds_iwdev->max_sge) {
887 send->s_rdma_wr.wr.num_sge = rds_iwdev->max_sge;
888 num_sge -= rds_iwdev->max_sge;
889 } else
890 send->s_rdma_wr.wr.num_sge = num_sge;
891
892 send->s_rdma_wr.wr.next = NULL;
893
894 if (prev)
895 prev->s_send_wr.next = &send->s_rdma_wr.wr;
896
897 for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
898 scat != &op->op_sg[op->op_count]; j++) {
899 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
900
901 if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV)
902 sg_nents++;
903 else {
904 send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat);
905 send->s_sge[j].length = len;
906 send->s_sge[j].lkey = rds_iw_local_dma_lkey(ic);
907 }
908
909 sent += len;
910 rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr);
911 remote_addr += len;
912
913 scat++;
914 }
915
916 if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
917 send->s_rdma_wr.wr.num_sge = 1;
918 send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr;
919 send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes;
920 send->s_sge[0].lkey = ic->i_sends[fr_pos].s_mr->lkey;
921 }
922
923 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
924 &send->s_rdma_wr,
925 send->s_rdma_wr.wr.num_sge,
926 send->s_rdma_wr.wr.next);
927
928 prev = send;
929 if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
930 send = ic->i_sends;
931 }
932
933 /* if we finished the message then send completion owns it */
934 if (scat == &op->op_sg[op->op_count])
935 first->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED;
936
937 if (i < work_alloc) {
938 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i);
939 work_alloc = i;
940 }
941
942 /* On iWARP, local memory access by a remote system (ie, RDMA Read) is not
943 * recommended. Putting the lkey on the wire is a security hole, as it can
944 * allow for memory access to all of memory on the remote system. Some
945 * adapters do not allow using the lkey for this at all. To bypass this use a
946 * fastreg_mr (or possibly a dma_mr)
947 */
948 if (!op->op_write) {
949 ret = rds_iw_build_send_reg(&ic->i_sends[fr_pos],
950 &op->op_sg[0], sg_nents);
951 if (ret) {
952 printk(KERN_WARNING "RDS/IW: failed to reg send mem\n");
953 goto out;
954 }
955 work_alloc++;
956 }
957
958 failed_wr = &first->s_rdma_wr.wr;
959 ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
960 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
961 first, &first->s_rdma_wr, ret, failed_wr);
962 BUG_ON(failed_wr != &first->s_rdma_wr.wr);
963 if (ret) {
964 printk(KERN_WARNING "RDS/IW: rdma ib_post_send to %pI4 "
965 "returned %d\n", &conn->c_faddr, ret);
966 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
967 goto out;
968 }
969
970out:
971 return ret;
972}
973
974void rds_iw_xmit_complete(struct rds_connection *conn)
975{
976 struct rds_iw_connection *ic = conn->c_transport_data;
977
978 /* We may have a pending ACK or window update we were unable
979 * to send previously (due to flow control). Try again. */
980 rds_iw_attempt_ack(ic);
981}
diff --git a/net/rds/iw_stats.c b/net/rds/iw_stats.c
deleted file mode 100644
index 5fe67f6a1d80..000000000000
--- a/net/rds/iw_stats.c
+++ /dev/null
@@ -1,95 +0,0 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/percpu.h>
34#include <linux/seq_file.h>
35#include <linux/proc_fs.h>
36
37#include "rds.h"
38#include "iw.h"
39
40DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_iw_statistics, rds_iw_stats);
41
42static const char *const rds_iw_stat_names[] = {
43 "iw_connect_raced",
44 "iw_listen_closed_stale",
45 "iw_tx_cq_call",
46 "iw_tx_cq_event",
47 "iw_tx_ring_full",
48 "iw_tx_throttle",
49 "iw_tx_sg_mapping_failure",
50 "iw_tx_stalled",
51 "iw_tx_credit_updates",
52 "iw_rx_cq_call",
53 "iw_rx_cq_event",
54 "iw_rx_ring_empty",
55 "iw_rx_refill_from_cq",
56 "iw_rx_refill_from_thread",
57 "iw_rx_alloc_limit",
58 "iw_rx_credit_updates",
59 "iw_ack_sent",
60 "iw_ack_send_failure",
61 "iw_ack_send_delayed",
62 "iw_ack_send_piggybacked",
63 "iw_ack_received",
64 "iw_rdma_mr_alloc",
65 "iw_rdma_mr_free",
66 "iw_rdma_mr_used",
67 "iw_rdma_mr_pool_flush",
68 "iw_rdma_mr_pool_wait",
69 "iw_rdma_mr_pool_depleted",
70};
71
72unsigned int rds_iw_stats_info_copy(struct rds_info_iterator *iter,
73 unsigned int avail)
74{
75 struct rds_iw_statistics stats = {0, };
76 uint64_t *src;
77 uint64_t *sum;
78 size_t i;
79 int cpu;
80
81 if (avail < ARRAY_SIZE(rds_iw_stat_names))
82 goto out;
83
84 for_each_online_cpu(cpu) {
85 src = (uint64_t *)&(per_cpu(rds_iw_stats, cpu));
86 sum = (uint64_t *)&stats;
87 for (i = 0; i < sizeof(stats) / sizeof(uint64_t); i++)
88 *(sum++) += *(src++);
89 }
90
91 rds_stats_info_copy(iter, (uint64_t *)&stats, rds_iw_stat_names,
92 ARRAY_SIZE(rds_iw_stat_names));
93out:
94 return ARRAY_SIZE(rds_iw_stat_names);
95}
diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c
deleted file mode 100644
index 139239d2cb22..000000000000
--- a/net/rds/iw_sysctl.c
+++ /dev/null
@@ -1,123 +0,0 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/sysctl.h>
35#include <linux/proc_fs.h>
36
37#include "iw.h"
38
39static struct ctl_table_header *rds_iw_sysctl_hdr;
40
41unsigned long rds_iw_sysctl_max_send_wr = RDS_IW_DEFAULT_SEND_WR;
42unsigned long rds_iw_sysctl_max_recv_wr = RDS_IW_DEFAULT_RECV_WR;
43unsigned long rds_iw_sysctl_max_recv_allocation = (128 * 1024 * 1024) / RDS_FRAG_SIZE;
44static unsigned long rds_iw_sysctl_max_wr_min = 1;
45/* hardware will fail CQ creation long before this */
46static unsigned long rds_iw_sysctl_max_wr_max = (u32)~0;
47
48unsigned long rds_iw_sysctl_max_unsig_wrs = 16;
49static unsigned long rds_iw_sysctl_max_unsig_wr_min = 1;
50static unsigned long rds_iw_sysctl_max_unsig_wr_max = 64;
51
52unsigned long rds_iw_sysctl_max_unsig_bytes = (16 << 20);
53static unsigned long rds_iw_sysctl_max_unsig_bytes_min = 1;
54static unsigned long rds_iw_sysctl_max_unsig_bytes_max = ~0UL;
55
56unsigned int rds_iw_sysctl_flow_control = 1;
57
58static struct ctl_table rds_iw_sysctl_table[] = {
59 {
60 .procname = "max_send_wr",
61 .data = &rds_iw_sysctl_max_send_wr,
62 .maxlen = sizeof(unsigned long),
63 .mode = 0644,
64 .proc_handler = proc_doulongvec_minmax,
65 .extra1 = &rds_iw_sysctl_max_wr_min,
66 .extra2 = &rds_iw_sysctl_max_wr_max,
67 },
68 {
69 .procname = "max_recv_wr",
70 .data = &rds_iw_sysctl_max_recv_wr,
71 .maxlen = sizeof(unsigned long),
72 .mode = 0644,
73 .proc_handler = proc_doulongvec_minmax,
74 .extra1 = &rds_iw_sysctl_max_wr_min,
75 .extra2 = &rds_iw_sysctl_max_wr_max,
76 },
77 {
78 .procname = "max_unsignaled_wr",
79 .data = &rds_iw_sysctl_max_unsig_wrs,
80 .maxlen = sizeof(unsigned long),
81 .mode = 0644,
82 .proc_handler = proc_doulongvec_minmax,
83 .extra1 = &rds_iw_sysctl_max_unsig_wr_min,
84 .extra2 = &rds_iw_sysctl_max_unsig_wr_max,
85 },
86 {
87 .procname = "max_unsignaled_bytes",
88 .data = &rds_iw_sysctl_max_unsig_bytes,
89 .maxlen = sizeof(unsigned long),
90 .mode = 0644,
91 .proc_handler = proc_doulongvec_minmax,
92 .extra1 = &rds_iw_sysctl_max_unsig_bytes_min,
93 .extra2 = &rds_iw_sysctl_max_unsig_bytes_max,
94 },
95 {
96 .procname = "max_recv_allocation",
97 .data = &rds_iw_sysctl_max_recv_allocation,
98 .maxlen = sizeof(unsigned long),
99 .mode = 0644,
100 .proc_handler = proc_doulongvec_minmax,
101 },
102 {
103 .procname = "flow_control",
104 .data = &rds_iw_sysctl_flow_control,
105 .maxlen = sizeof(rds_iw_sysctl_flow_control),
106 .mode = 0644,
107 .proc_handler = proc_dointvec,
108 },
109 { }
110};
111
112void rds_iw_sysctl_exit(void)
113{
114 unregister_net_sysctl_table(rds_iw_sysctl_hdr);
115}
116
117int rds_iw_sysctl_init(void)
118{
119 rds_iw_sysctl_hdr = register_net_sysctl(&init_net, "net/rds/iw", rds_iw_sysctl_table);
120 if (!rds_iw_sysctl_hdr)
121 return -ENOMEM;
122 return 0;
123}
diff --git a/net/rds/page.c b/net/rds/page.c
index 5a14e6d6a926..e2b5a5832d3d 100644
--- a/net/rds/page.c
+++ b/net/rds/page.c
@@ -42,8 +42,8 @@ struct rds_page_remainder {
42 unsigned long r_offset; 42 unsigned long r_offset;
43}; 43};
44 44
45static DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, 45static
46 rds_page_remainders); 46DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders);
47 47
48/* 48/*
49 * returns 0 on success or -errno on failure. 49 * returns 0 on success or -errno on failure.
@@ -135,8 +135,8 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
135 if (rem->r_offset != 0) 135 if (rem->r_offset != 0)
136 rds_stats_inc(s_page_remainder_hit); 136 rds_stats_inc(s_page_remainder_hit);
137 137
138 rem->r_offset += bytes; 138 rem->r_offset += ALIGN(bytes, 8);
139 if (rem->r_offset == PAGE_SIZE) { 139 if (rem->r_offset >= PAGE_SIZE) {
140 __free_page(rem->r_page); 140 __free_page(rem->r_page);
141 rem->r_page = NULL; 141 rem->r_page = NULL;
142 } 142 }
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 9c1fed81bf0f..7220bebcf558 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -49,9 +49,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
49 rdsdebug("conn %p id %p handling event %u (%s)\n", conn, cm_id, 49 rdsdebug("conn %p id %p handling event %u (%s)\n", conn, cm_id,
50 event->event, rdma_event_msg(event->event)); 50 event->event, rdma_event_msg(event->event));
51 51
52 if (cm_id->device->node_type == RDMA_NODE_RNIC) 52 if (cm_id->device->node_type == RDMA_NODE_IB_CA)
53 trans = &rds_iw_transport;
54 else
55 trans = &rds_ib_transport; 53 trans = &rds_ib_transport;
56 54
57 /* Prevent shutdown from tearing down the connection 55 /* Prevent shutdown from tearing down the connection
@@ -119,6 +117,14 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
119 rds_conn_drop(conn); 117 rds_conn_drop(conn);
120 break; 118 break;
121 119
120 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
121 if (conn) {
122 pr_info("RDS: RDMA_CM_EVENT_TIMEWAIT_EXIT event: dropping connection %pI4->%pI4\n",
123 &conn->c_laddr, &conn->c_faddr);
124 rds_conn_drop(conn);
125 }
126 break;
127
122 default: 128 default:
123 /* things like device disconnect? */ 129 /* things like device disconnect? */
124 printk(KERN_ERR "RDS: unknown event %u (%s)!\n", 130 printk(KERN_ERR "RDS: unknown event %u (%s)!\n",
@@ -200,10 +206,6 @@ static int rds_rdma_init(void)
200 if (ret) 206 if (ret)
201 goto out; 207 goto out;
202 208
203 ret = rds_iw_init();
204 if (ret)
205 goto err_iw_init;
206
207 ret = rds_ib_init(); 209 ret = rds_ib_init();
208 if (ret) 210 if (ret)
209 goto err_ib_init; 211 goto err_ib_init;
@@ -211,8 +213,6 @@ static int rds_rdma_init(void)
211 goto out; 213 goto out;
212 214
213err_ib_init: 215err_ib_init:
214 rds_iw_exit();
215err_iw_init:
216 rds_rdma_listen_stop(); 216 rds_rdma_listen_stop();
217out: 217out:
218 return ret; 218 return ret;
@@ -224,11 +224,10 @@ static void rds_rdma_exit(void)
224 /* stop listening first to ensure no new connections are attempted */ 224 /* stop listening first to ensure no new connections are attempted */
225 rds_rdma_listen_stop(); 225 rds_rdma_listen_stop();
226 rds_ib_exit(); 226 rds_ib_exit();
227 rds_iw_exit();
228} 227}
229module_exit(rds_rdma_exit); 228module_exit(rds_rdma_exit);
230 229
231MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>"); 230MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>");
232MODULE_DESCRIPTION("RDS: IB/iWARP transport"); 231MODULE_DESCRIPTION("RDS: IB transport");
233MODULE_LICENSE("Dual BSD/GPL"); 232MODULE_LICENSE("Dual BSD/GPL");
234 233
diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h
index faba4e382695..ff2010e9d20c 100644
--- a/net/rds/rdma_transport.h
+++ b/net/rds/rdma_transport.h
@@ -16,9 +16,4 @@ extern struct rds_transport rds_ib_transport;
16int rds_ib_init(void); 16int rds_ib_init(void);
17void rds_ib_exit(void); 17void rds_ib_exit(void);
18 18
19/* from iw.c */
20extern struct rds_transport rds_iw_transport;
21int rds_iw_init(void);
22void rds_iw_exit(void);
23
24#endif 19#endif
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 0e2797bdc316..80256b08eac0 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -222,6 +222,7 @@ struct rds_incoming {
222 __be32 i_saddr; 222 __be32 i_saddr;
223 223
224 rds_rdma_cookie_t i_rdma_cookie; 224 rds_rdma_cookie_t i_rdma_cookie;
225 struct timeval i_rx_tstamp;
225}; 226};
226 227
227struct rds_mr { 228struct rds_mr {
diff --git a/net/rds/recv.c b/net/rds/recv.c
index a00462b0d01d..c0be1ecd11c9 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -35,6 +35,8 @@
35#include <net/sock.h> 35#include <net/sock.h>
36#include <linux/in.h> 36#include <linux/in.h>
37#include <linux/export.h> 37#include <linux/export.h>
38#include <linux/time.h>
39#include <linux/rds.h>
38 40
39#include "rds.h" 41#include "rds.h"
40 42
@@ -46,6 +48,8 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
46 inc->i_conn = conn; 48 inc->i_conn = conn;
47 inc->i_saddr = saddr; 49 inc->i_saddr = saddr;
48 inc->i_rdma_cookie = 0; 50 inc->i_rdma_cookie = 0;
51 inc->i_rx_tstamp.tv_sec = 0;
52 inc->i_rx_tstamp.tv_usec = 0;
49} 53}
50EXPORT_SYMBOL_GPL(rds_inc_init); 54EXPORT_SYMBOL_GPL(rds_inc_init);
51 55
@@ -228,6 +232,8 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
228 rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong, 232 rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong,
229 be32_to_cpu(inc->i_hdr.h_len), 233 be32_to_cpu(inc->i_hdr.h_len),
230 inc->i_hdr.h_dport); 234 inc->i_hdr.h_dport);
235 if (sock_flag(sk, SOCK_RCVTSTAMP))
236 do_gettimeofday(&inc->i_rx_tstamp);
231 rds_inc_addref(inc); 237 rds_inc_addref(inc);
232 list_add_tail(&inc->i_item, &rs->rs_recv_queue); 238 list_add_tail(&inc->i_item, &rs->rs_recv_queue);
233 __rds_wake_sk_sleep(sk); 239 __rds_wake_sk_sleep(sk);
@@ -381,7 +387,8 @@ static int rds_notify_cong(struct rds_sock *rs, struct msghdr *msghdr)
381/* 387/*
382 * Receive any control messages. 388 * Receive any control messages.
383 */ 389 */
384static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg) 390static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
391 struct rds_sock *rs)
385{ 392{
386 int ret = 0; 393 int ret = 0;
387 394
@@ -392,6 +399,15 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg)
392 return ret; 399 return ret;
393 } 400 }
394 401
402 if ((inc->i_rx_tstamp.tv_sec != 0) &&
403 sock_flag(rds_rs_to_sk(rs), SOCK_RCVTSTAMP)) {
404 ret = put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
405 sizeof(struct timeval),
406 &inc->i_rx_tstamp);
407 if (ret)
408 return ret;
409 }
410
395 return 0; 411 return 0;
396} 412}
397 413
@@ -474,7 +490,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
474 msg->msg_flags |= MSG_TRUNC; 490 msg->msg_flags |= MSG_TRUNC;
475 } 491 }
476 492
477 if (rds_cmsg_recv(inc, msg)) { 493 if (rds_cmsg_recv(inc, msg, rs)) {
478 ret = -EFAULT; 494 ret = -EFAULT;
479 goto out; 495 goto out;
480 } 496 }
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 9d6ddbacd875..86187dad1440 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -37,7 +37,6 @@
37#include <net/tcp.h> 37#include <net/tcp.h>
38#include <net/net_namespace.h> 38#include <net/net_namespace.h>
39#include <net/netns/generic.h> 39#include <net/netns/generic.h>
40#include <net/tcp.h>
41 40
42#include "rds.h" 41#include "rds.h"
43#include "tcp.h" 42#include "tcp.h"
@@ -53,7 +52,34 @@ static LIST_HEAD(rds_tcp_conn_list);
53 52
54static struct kmem_cache *rds_tcp_conn_slab; 53static struct kmem_cache *rds_tcp_conn_slab;
55 54
56#define RDS_TCP_DEFAULT_BUFSIZE (128 * 1024) 55static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
56 void __user *buffer, size_t *lenp,
57 loff_t *fpos);
58
59int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF;
60int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF;
61
62static struct ctl_table rds_tcp_sysctl_table[] = {
63#define RDS_TCP_SNDBUF 0
64 {
65 .procname = "rds_tcp_sndbuf",
66 /* data is per-net pointer */
67 .maxlen = sizeof(int),
68 .mode = 0644,
69 .proc_handler = rds_tcp_skbuf_handler,
70 .extra1 = &rds_tcp_min_sndbuf,
71 },
72#define RDS_TCP_RCVBUF 1
73 {
74 .procname = "rds_tcp_rcvbuf",
75 /* data is per-net pointer */
76 .maxlen = sizeof(int),
77 .mode = 0644,
78 .proc_handler = rds_tcp_skbuf_handler,
79 .extra1 = &rds_tcp_min_rcvbuf,
80 },
81 { }
82};
57 83
58/* doing it this way avoids calling tcp_sk() */ 84/* doing it this way avoids calling tcp_sk() */
59void rds_tcp_nonagle(struct socket *sock) 85void rds_tcp_nonagle(struct socket *sock)
@@ -67,15 +93,6 @@ void rds_tcp_nonagle(struct socket *sock)
67 set_fs(oldfs); 93 set_fs(oldfs);
68} 94}
69 95
70/* All module specific customizations to the RDS-TCP socket should be done in
71 * rds_tcp_tune() and applied after socket creation. In general these
72 * customizations should be tunable via module_param()
73 */
74void rds_tcp_tune(struct socket *sock)
75{
76 rds_tcp_nonagle(sock);
77}
78
79u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc) 96u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)
80{ 97{
81 return tcp_sk(tc->t_sock->sk)->snd_nxt; 98 return tcp_sk(tc->t_sock->sk)->snd_nxt;
@@ -110,7 +127,7 @@ void rds_tcp_restore_callbacks(struct socket *sock,
110 127
111/* 128/*
112 * This is the only path that sets tc->t_sock. Send and receive trust that 129 * This is the only path that sets tc->t_sock. Send and receive trust that
113 * it is set. The RDS_CONN_CONNECTED bit protects those paths from being 130 * it is set. The RDS_CONN_UP bit protects those paths from being
114 * called while it isn't set. 131 * called while it isn't set.
115 */ 132 */
116void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) 133void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
@@ -199,6 +216,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
199 if (!tc) 216 if (!tc)
200 return -ENOMEM; 217 return -ENOMEM;
201 218
219 mutex_init(&tc->t_conn_lock);
202 tc->t_sock = NULL; 220 tc->t_sock = NULL;
203 tc->t_tinc = NULL; 221 tc->t_tinc = NULL;
204 tc->t_tinc_hdr_rem = sizeof(struct rds_header); 222 tc->t_tinc_hdr_rem = sizeof(struct rds_header);
@@ -273,8 +291,34 @@ static int rds_tcp_netid;
273struct rds_tcp_net { 291struct rds_tcp_net {
274 struct socket *rds_tcp_listen_sock; 292 struct socket *rds_tcp_listen_sock;
275 struct work_struct rds_tcp_accept_w; 293 struct work_struct rds_tcp_accept_w;
294 struct ctl_table_header *rds_tcp_sysctl;
295 struct ctl_table *ctl_table;
296 int sndbuf_size;
297 int rcvbuf_size;
276}; 298};
277 299
300/* All module specific customizations to the RDS-TCP socket should be done in
301 * rds_tcp_tune() and applied after socket creation.
302 */
303void rds_tcp_tune(struct socket *sock)
304{
305 struct sock *sk = sock->sk;
306 struct net *net = sock_net(sk);
307 struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
308
309 rds_tcp_nonagle(sock);
310 lock_sock(sk);
311 if (rtn->sndbuf_size > 0) {
312 sk->sk_sndbuf = rtn->sndbuf_size;
313 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
314 }
315 if (rtn->rcvbuf_size > 0) {
316 sk->sk_sndbuf = rtn->rcvbuf_size;
317 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
318 }
319 release_sock(sk);
320}
321
278static void rds_tcp_accept_worker(struct work_struct *work) 322static void rds_tcp_accept_worker(struct work_struct *work)
279{ 323{
280 struct rds_tcp_net *rtn = container_of(work, 324 struct rds_tcp_net *rtn = container_of(work,
@@ -296,20 +340,60 @@ void rds_tcp_accept_work(struct sock *sk)
296static __net_init int rds_tcp_init_net(struct net *net) 340static __net_init int rds_tcp_init_net(struct net *net)
297{ 341{
298 struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); 342 struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
343 struct ctl_table *tbl;
344 int err = 0;
299 345
346 memset(rtn, 0, sizeof(*rtn));
347
348 /* {snd, rcv}buf_size default to 0, which implies we let the
349 * stack pick the value, and permit auto-tuning of buffer size.
350 */
351 if (net == &init_net) {
352 tbl = rds_tcp_sysctl_table;
353 } else {
354 tbl = kmemdup(rds_tcp_sysctl_table,
355 sizeof(rds_tcp_sysctl_table), GFP_KERNEL);
356 if (!tbl) {
357 pr_warn("could not set allocate syctl table\n");
358 return -ENOMEM;
359 }
360 rtn->ctl_table = tbl;
361 }
362 tbl[RDS_TCP_SNDBUF].data = &rtn->sndbuf_size;
363 tbl[RDS_TCP_RCVBUF].data = &rtn->rcvbuf_size;
364 rtn->rds_tcp_sysctl = register_net_sysctl(net, "net/rds/tcp", tbl);
365 if (!rtn->rds_tcp_sysctl) {
366 pr_warn("could not register sysctl\n");
367 err = -ENOMEM;
368 goto fail;
369 }
300 rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net); 370 rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net);
301 if (!rtn->rds_tcp_listen_sock) { 371 if (!rtn->rds_tcp_listen_sock) {
302 pr_warn("could not set up listen sock\n"); 372 pr_warn("could not set up listen sock\n");
303 return -EAFNOSUPPORT; 373 unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
374 rtn->rds_tcp_sysctl = NULL;
375 err = -EAFNOSUPPORT;
376 goto fail;
304 } 377 }
305 INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker); 378 INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker);
306 return 0; 379 return 0;
380
381fail:
382 if (net != &init_net)
383 kfree(tbl);
384 return err;
307} 385}
308 386
309static void __net_exit rds_tcp_exit_net(struct net *net) 387static void __net_exit rds_tcp_exit_net(struct net *net)
310{ 388{
311 struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); 389 struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
312 390
391 if (rtn->rds_tcp_sysctl)
392 unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
393
394 if (net != &init_net && rtn->ctl_table)
395 kfree(rtn->ctl_table);
396
313 /* If rds_tcp_exit_net() is called as a result of netns deletion, 397 /* If rds_tcp_exit_net() is called as a result of netns deletion,
314 * the rds_tcp_kill_sock() device notifier would already have cleaned 398 * the rds_tcp_kill_sock() device notifier would already have cleaned
315 * up the listen socket, thus there is no work to do in this function. 399 * up the listen socket, thus there is no work to do in this function.
@@ -384,6 +468,45 @@ static struct notifier_block rds_tcp_dev_notifier = {
384 .priority = -10, /* must be called after other network notifiers */ 468 .priority = -10, /* must be called after other network notifiers */
385}; 469};
386 470
471/* when sysctl is used to modify some kernel socket parameters,this
472 * function resets the RDS connections in that netns so that we can
473 * restart with new parameters. The assumption is that such reset
474 * events are few and far-between.
475 */
476static void rds_tcp_sysctl_reset(struct net *net)
477{
478 struct rds_tcp_connection *tc, *_tc;
479
480 spin_lock_irq(&rds_tcp_conn_lock);
481 list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
482 struct net *c_net = read_pnet(&tc->conn->c_net);
483
484 if (net != c_net || !tc->t_sock)
485 continue;
486
487 rds_conn_drop(tc->conn); /* reconnect with new parameters */
488 }
489 spin_unlock_irq(&rds_tcp_conn_lock);
490}
491
492static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
493 void __user *buffer, size_t *lenp,
494 loff_t *fpos)
495{
496 struct net *net = current->nsproxy->net_ns;
497 int err;
498
499 err = proc_dointvec_minmax(ctl, write, buffer, lenp, fpos);
500 if (err < 0) {
501 pr_warn("Invalid input. Must be >= %d\n",
502 *(int *)(ctl->extra1));
503 return err;
504 }
505 if (write)
506 rds_tcp_sysctl_reset(net);
507 return 0;
508}
509
387static void rds_tcp_exit(void) 510static void rds_tcp_exit(void)
388{ 511{
389 rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); 512 rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 64f873c0c6b6..41c228300525 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -12,6 +12,10 @@ struct rds_tcp_connection {
12 12
13 struct list_head t_tcp_node; 13 struct list_head t_tcp_node;
14 struct rds_connection *conn; 14 struct rds_connection *conn;
15 /* t_conn_lock synchronizes the connection establishment between
16 * rds_tcp_accept_one and rds_tcp_conn_connect
17 */
18 struct mutex t_conn_lock;
15 struct socket *t_sock; 19 struct socket *t_sock;
16 void *t_orig_write_space; 20 void *t_orig_write_space;
17 void *t_orig_data_ready; 21 void *t_orig_data_ready;
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 5cb16875c460..49a3fcfed360 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -78,7 +78,14 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
78 struct socket *sock = NULL; 78 struct socket *sock = NULL;
79 struct sockaddr_in src, dest; 79 struct sockaddr_in src, dest;
80 int ret; 80 int ret;
81 struct rds_tcp_connection *tc = conn->c_transport_data;
82
83 mutex_lock(&tc->t_conn_lock);
81 84
85 if (rds_conn_up(conn)) {
86 mutex_unlock(&tc->t_conn_lock);
87 return 0;
88 }
82 ret = sock_create_kern(rds_conn_net(conn), PF_INET, 89 ret = sock_create_kern(rds_conn_net(conn), PF_INET,
83 SOCK_STREAM, IPPROTO_TCP, &sock); 90 SOCK_STREAM, IPPROTO_TCP, &sock);
84 if (ret < 0) 91 if (ret < 0)
@@ -120,6 +127,7 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
120 } 127 }
121 128
122out: 129out:
130 mutex_unlock(&tc->t_conn_lock);
123 if (sock) 131 if (sock)
124 sock_release(sock); 132 sock_release(sock);
125 return ret; 133 return ret;
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 0936a4a32b47..be263cdf268b 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -76,7 +76,9 @@ int rds_tcp_accept_one(struct socket *sock)
76 struct rds_connection *conn; 76 struct rds_connection *conn;
77 int ret; 77 int ret;
78 struct inet_sock *inet; 78 struct inet_sock *inet;
79 struct rds_tcp_connection *rs_tcp; 79 struct rds_tcp_connection *rs_tcp = NULL;
80 int conn_state;
81 struct sock *nsk;
80 82
81 ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family, 83 ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family,
82 sock->sk->sk_type, sock->sk->sk_protocol, 84 sock->sk->sk_type, sock->sk->sk_protocol,
@@ -115,28 +117,44 @@ int rds_tcp_accept_one(struct socket *sock)
115 * rds_tcp_state_change() will do that cleanup 117 * rds_tcp_state_change() will do that cleanup
116 */ 118 */
117 rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data; 119 rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data;
118 if (rs_tcp->t_sock &&
119 ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) {
120 struct sock *nsk = new_sock->sk;
121
122 nsk->sk_user_data = NULL;
123 nsk->sk_prot->disconnect(nsk, 0);
124 tcp_done(nsk);
125 new_sock = NULL;
126 ret = 0;
127 goto out;
128 } else if (rs_tcp->t_sock) {
129 rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp);
130 conn->c_outgoing = 0;
131 }
132
133 rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING); 120 rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
121 mutex_lock(&rs_tcp->t_conn_lock);
122 conn_state = rds_conn_state(conn);
123 if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP)
124 goto rst_nsk;
125 if (rs_tcp->t_sock) {
126 /* Need to resolve a duelling SYN between peers.
127 * We have an outstanding SYN to this peer, which may
128 * potentially have transitioned to the RDS_CONN_UP state,
129 * so we must quiesce any send threads before resetting
130 * c_transport_data.
131 */
132 wait_event(conn->c_waitq,
133 !test_bit(RDS_IN_XMIT, &conn->c_flags));
134 if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) {
135 goto rst_nsk;
136 } else if (rs_tcp->t_sock) {
137 rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp);
138 conn->c_outgoing = 0;
139 }
140 }
134 rds_tcp_set_callbacks(new_sock, conn); 141 rds_tcp_set_callbacks(new_sock, conn);
135 rds_connect_complete(conn); 142 rds_connect_complete(conn); /* marks RDS_CONN_UP */
143 new_sock = NULL;
144 ret = 0;
145 goto out;
146rst_nsk:
147 /* reset the newly returned accept sock and bail */
148 nsk = new_sock->sk;
149 rds_tcp_stats_inc(s_tcp_listen_closed_stale);
150 nsk->sk_user_data = NULL;
151 nsk->sk_prot->disconnect(nsk, 0);
152 tcp_done(nsk);
136 new_sock = NULL; 153 new_sock = NULL;
137 ret = 0; 154 ret = 0;
138
139out: 155out:
156 if (rs_tcp)
157 mutex_unlock(&rs_tcp->t_conn_lock);
140 if (new_sock) 158 if (new_sock)
141 sock_release(new_sock); 159 sock_release(new_sock);
142 return ret; 160 return ret;
diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig
index 598d374f6a35..868f1ad0415a 100644
--- a/net/rfkill/Kconfig
+++ b/net/rfkill/Kconfig
@@ -41,5 +41,4 @@ config RFKILL_GPIO
41 default n 41 default n
42 help 42 help
43 If you say yes here you get support of a generic gpio RFKILL 43 If you say yes here you get support of a generic gpio RFKILL
44 driver. The platform should fill in the appropriate fields in the 44 driver.
45 rfkill_gpio_platform_data structure and pass that to the driver.
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index cf5b69ab1829..03f26e3a6f48 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -57,6 +57,8 @@ struct rfkill {
57 57
58 bool registered; 58 bool registered;
59 bool persistent; 59 bool persistent;
60 bool polling_paused;
61 bool suspended;
60 62
61 const struct rfkill_ops *ops; 63 const struct rfkill_ops *ops;
62 void *data; 64 void *data;
@@ -233,29 +235,6 @@ static void rfkill_event(struct rfkill *rfkill)
233 rfkill_send_events(rfkill, RFKILL_OP_CHANGE); 235 rfkill_send_events(rfkill, RFKILL_OP_CHANGE);
234} 236}
235 237
236static bool __rfkill_set_hw_state(struct rfkill *rfkill,
237 bool blocked, bool *change)
238{
239 unsigned long flags;
240 bool prev, any;
241
242 BUG_ON(!rfkill);
243
244 spin_lock_irqsave(&rfkill->lock, flags);
245 prev = !!(rfkill->state & RFKILL_BLOCK_HW);
246 if (blocked)
247 rfkill->state |= RFKILL_BLOCK_HW;
248 else
249 rfkill->state &= ~RFKILL_BLOCK_HW;
250 *change = prev != blocked;
251 any = !!(rfkill->state & RFKILL_BLOCK_ANY);
252 spin_unlock_irqrestore(&rfkill->lock, flags);
253
254 rfkill_led_trigger_event(rfkill);
255
256 return any;
257}
258
259/** 238/**
260 * rfkill_set_block - wrapper for set_block method 239 * rfkill_set_block - wrapper for set_block method
261 * 240 *
@@ -285,7 +264,7 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked)
285 spin_lock_irqsave(&rfkill->lock, flags); 264 spin_lock_irqsave(&rfkill->lock, flags);
286 prev = rfkill->state & RFKILL_BLOCK_SW; 265 prev = rfkill->state & RFKILL_BLOCK_SW;
287 266
288 if (rfkill->state & RFKILL_BLOCK_SW) 267 if (prev)
289 rfkill->state |= RFKILL_BLOCK_SW_PREV; 268 rfkill->state |= RFKILL_BLOCK_SW_PREV;
290 else 269 else
291 rfkill->state &= ~RFKILL_BLOCK_SW_PREV; 270 rfkill->state &= ~RFKILL_BLOCK_SW_PREV;
@@ -303,8 +282,8 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked)
303 spin_lock_irqsave(&rfkill->lock, flags); 282 spin_lock_irqsave(&rfkill->lock, flags);
304 if (err) { 283 if (err) {
305 /* 284 /*
306 * Failed -- reset status to _prev, this may be different 285 * Failed -- reset status to _PREV, which may be different
307 * from what set set _PREV to earlier in this function 286 * from what we have set _PREV to earlier in this function
308 * if rfkill_set_sw_state was invoked. 287 * if rfkill_set_sw_state was invoked.
309 */ 288 */
310 if (rfkill->state & RFKILL_BLOCK_SW_PREV) 289 if (rfkill->state & RFKILL_BLOCK_SW_PREV)
@@ -323,6 +302,19 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked)
323 rfkill_event(rfkill); 302 rfkill_event(rfkill);
324} 303}
325 304
305static void rfkill_update_global_state(enum rfkill_type type, bool blocked)
306{
307 int i;
308
309 if (type != RFKILL_TYPE_ALL) {
310 rfkill_global_states[type].cur = blocked;
311 return;
312 }
313
314 for (i = 0; i < NUM_RFKILL_TYPES; i++)
315 rfkill_global_states[i].cur = blocked;
316}
317
326#ifdef CONFIG_RFKILL_INPUT 318#ifdef CONFIG_RFKILL_INPUT
327static atomic_t rfkill_input_disabled = ATOMIC_INIT(0); 319static atomic_t rfkill_input_disabled = ATOMIC_INIT(0);
328 320
@@ -332,8 +324,7 @@ static atomic_t rfkill_input_disabled = ATOMIC_INIT(0);
332 * @blocked: the new state 324 * @blocked: the new state
333 * 325 *
334 * This function sets the state of all switches of given type, 326 * This function sets the state of all switches of given type,
335 * unless a specific switch is claimed by userspace (in which case, 327 * unless a specific switch is suspended.
336 * that switch is left alone) or suspended.
337 * 328 *
338 * Caller must have acquired rfkill_global_mutex. 329 * Caller must have acquired rfkill_global_mutex.
339 */ 330 */
@@ -341,15 +332,7 @@ static void __rfkill_switch_all(const enum rfkill_type type, bool blocked)
341{ 332{
342 struct rfkill *rfkill; 333 struct rfkill *rfkill;
343 334
344 if (type == RFKILL_TYPE_ALL) { 335 rfkill_update_global_state(type, blocked);
345 int i;
346
347 for (i = 0; i < NUM_RFKILL_TYPES; i++)
348 rfkill_global_states[i].cur = blocked;
349 } else {
350 rfkill_global_states[type].cur = blocked;
351 }
352
353 list_for_each_entry(rfkill, &rfkill_list, node) { 336 list_for_each_entry(rfkill, &rfkill_list, node) {
354 if (rfkill->type != type && type != RFKILL_TYPE_ALL) 337 if (rfkill->type != type && type != RFKILL_TYPE_ALL)
355 continue; 338 continue;
@@ -477,17 +460,28 @@ bool rfkill_get_global_sw_state(const enum rfkill_type type)
477} 460}
478#endif 461#endif
479 462
480
481bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked) 463bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked)
482{ 464{
483 bool ret, change; 465 unsigned long flags;
466 bool ret, prev;
467
468 BUG_ON(!rfkill);
484 469
485 ret = __rfkill_set_hw_state(rfkill, blocked, &change); 470 spin_lock_irqsave(&rfkill->lock, flags);
471 prev = !!(rfkill->state & RFKILL_BLOCK_HW);
472 if (blocked)
473 rfkill->state |= RFKILL_BLOCK_HW;
474 else
475 rfkill->state &= ~RFKILL_BLOCK_HW;
476 ret = !!(rfkill->state & RFKILL_BLOCK_ANY);
477 spin_unlock_irqrestore(&rfkill->lock, flags);
478
479 rfkill_led_trigger_event(rfkill);
486 480
487 if (!rfkill->registered) 481 if (!rfkill->registered)
488 return ret; 482 return ret;
489 483
490 if (change) 484 if (prev != blocked)
491 schedule_work(&rfkill->uevent_work); 485 schedule_work(&rfkill->uevent_work);
492 486
493 return ret; 487 return ret;
@@ -582,6 +576,34 @@ void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw)
582} 576}
583EXPORT_SYMBOL(rfkill_set_states); 577EXPORT_SYMBOL(rfkill_set_states);
584 578
579static const char * const rfkill_types[] = {
580 NULL, /* RFKILL_TYPE_ALL */
581 "wlan",
582 "bluetooth",
583 "ultrawideband",
584 "wimax",
585 "wwan",
586 "gps",
587 "fm",
588 "nfc",
589};
590
591enum rfkill_type rfkill_find_type(const char *name)
592{
593 int i;
594
595 BUILD_BUG_ON(ARRAY_SIZE(rfkill_types) != NUM_RFKILL_TYPES);
596
597 if (!name)
598 return RFKILL_TYPE_ALL;
599
600 for (i = 1; i < NUM_RFKILL_TYPES; i++)
601 if (!strcmp(name, rfkill_types[i]))
602 return i;
603 return RFKILL_TYPE_ALL;
604}
605EXPORT_SYMBOL(rfkill_find_type);
606
585static ssize_t name_show(struct device *dev, struct device_attribute *attr, 607static ssize_t name_show(struct device *dev, struct device_attribute *attr,
586 char *buf) 608 char *buf)
587{ 609{
@@ -591,38 +613,12 @@ static ssize_t name_show(struct device *dev, struct device_attribute *attr,
591} 613}
592static DEVICE_ATTR_RO(name); 614static DEVICE_ATTR_RO(name);
593 615
594static const char *rfkill_get_type_str(enum rfkill_type type)
595{
596 BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_NFC + 1);
597
598 switch (type) {
599 case RFKILL_TYPE_WLAN:
600 return "wlan";
601 case RFKILL_TYPE_BLUETOOTH:
602 return "bluetooth";
603 case RFKILL_TYPE_UWB:
604 return "ultrawideband";
605 case RFKILL_TYPE_WIMAX:
606 return "wimax";
607 case RFKILL_TYPE_WWAN:
608 return "wwan";
609 case RFKILL_TYPE_GPS:
610 return "gps";
611 case RFKILL_TYPE_FM:
612 return "fm";
613 case RFKILL_TYPE_NFC:
614 return "nfc";
615 default:
616 BUG();
617 }
618}
619
620static ssize_t type_show(struct device *dev, struct device_attribute *attr, 616static ssize_t type_show(struct device *dev, struct device_attribute *attr,
621 char *buf) 617 char *buf)
622{ 618{
623 struct rfkill *rfkill = to_rfkill(dev); 619 struct rfkill *rfkill = to_rfkill(dev);
624 620
625 return sprintf(buf, "%s\n", rfkill_get_type_str(rfkill->type)); 621 return sprintf(buf, "%s\n", rfkill_types[rfkill->type]);
626} 622}
627static DEVICE_ATTR_RO(type); 623static DEVICE_ATTR_RO(type);
628 624
@@ -730,20 +726,12 @@ static ssize_t state_store(struct device *dev, struct device_attribute *attr,
730} 726}
731static DEVICE_ATTR_RW(state); 727static DEVICE_ATTR_RW(state);
732 728
733static ssize_t claim_show(struct device *dev, struct device_attribute *attr,
734 char *buf)
735{
736 return sprintf(buf, "%d\n", 0);
737}
738static DEVICE_ATTR_RO(claim);
739
740static struct attribute *rfkill_dev_attrs[] = { 729static struct attribute *rfkill_dev_attrs[] = {
741 &dev_attr_name.attr, 730 &dev_attr_name.attr,
742 &dev_attr_type.attr, 731 &dev_attr_type.attr,
743 &dev_attr_index.attr, 732 &dev_attr_index.attr,
744 &dev_attr_persistent.attr, 733 &dev_attr_persistent.attr,
745 &dev_attr_state.attr, 734 &dev_attr_state.attr,
746 &dev_attr_claim.attr,
747 &dev_attr_soft.attr, 735 &dev_attr_soft.attr,
748 &dev_attr_hard.attr, 736 &dev_attr_hard.attr,
749 NULL, 737 NULL,
@@ -768,7 +756,7 @@ static int rfkill_dev_uevent(struct device *dev, struct kobj_uevent_env *env)
768 if (error) 756 if (error)
769 return error; 757 return error;
770 error = add_uevent_var(env, "RFKILL_TYPE=%s", 758 error = add_uevent_var(env, "RFKILL_TYPE=%s",
771 rfkill_get_type_str(rfkill->type)); 759 rfkill_types[rfkill->type]);
772 if (error) 760 if (error)
773 return error; 761 return error;
774 spin_lock_irqsave(&rfkill->lock, flags); 762 spin_lock_irqsave(&rfkill->lock, flags);
@@ -786,6 +774,7 @@ void rfkill_pause_polling(struct rfkill *rfkill)
786 if (!rfkill->ops->poll) 774 if (!rfkill->ops->poll)
787 return; 775 return;
788 776
777 rfkill->polling_paused = true;
789 cancel_delayed_work_sync(&rfkill->poll_work); 778 cancel_delayed_work_sync(&rfkill->poll_work);
790} 779}
791EXPORT_SYMBOL(rfkill_pause_polling); 780EXPORT_SYMBOL(rfkill_pause_polling);
@@ -797,6 +786,11 @@ void rfkill_resume_polling(struct rfkill *rfkill)
797 if (!rfkill->ops->poll) 786 if (!rfkill->ops->poll)
798 return; 787 return;
799 788
789 rfkill->polling_paused = false;
790
791 if (rfkill->suspended)
792 return;
793
800 queue_delayed_work(system_power_efficient_wq, 794 queue_delayed_work(system_power_efficient_wq,
801 &rfkill->poll_work, 0); 795 &rfkill->poll_work, 0);
802} 796}
@@ -807,7 +801,8 @@ static int rfkill_suspend(struct device *dev)
807{ 801{
808 struct rfkill *rfkill = to_rfkill(dev); 802 struct rfkill *rfkill = to_rfkill(dev);
809 803
810 rfkill_pause_polling(rfkill); 804 rfkill->suspended = true;
805 cancel_delayed_work_sync(&rfkill->poll_work);
811 806
812 return 0; 807 return 0;
813} 808}
@@ -817,12 +812,16 @@ static int rfkill_resume(struct device *dev)
817 struct rfkill *rfkill = to_rfkill(dev); 812 struct rfkill *rfkill = to_rfkill(dev);
818 bool cur; 813 bool cur;
819 814
815 rfkill->suspended = false;
816
820 if (!rfkill->persistent) { 817 if (!rfkill->persistent) {
821 cur = !!(rfkill->state & RFKILL_BLOCK_SW); 818 cur = !!(rfkill->state & RFKILL_BLOCK_SW);
822 rfkill_set_block(rfkill, cur); 819 rfkill_set_block(rfkill, cur);
823 } 820 }
824 821
825 rfkill_resume_polling(rfkill); 822 if (rfkill->ops->poll && !rfkill->polling_paused)
823 queue_delayed_work(system_power_efficient_wq,
824 &rfkill->poll_work, 0);
826 825
827 return 0; 826 return 0;
828} 827}
@@ -1164,15 +1163,8 @@ static ssize_t rfkill_fop_write(struct file *file, const char __user *buf,
1164 1163
1165 mutex_lock(&rfkill_global_mutex); 1164 mutex_lock(&rfkill_global_mutex);
1166 1165
1167 if (ev.op == RFKILL_OP_CHANGE_ALL) { 1166 if (ev.op == RFKILL_OP_CHANGE_ALL)
1168 if (ev.type == RFKILL_TYPE_ALL) { 1167 rfkill_update_global_state(ev.type, ev.soft);
1169 enum rfkill_type i;
1170 for (i = 0; i < NUM_RFKILL_TYPES; i++)
1171 rfkill_global_states[i].cur = ev.soft;
1172 } else {
1173 rfkill_global_states[ev.type].cur = ev.soft;
1174 }
1175 }
1176 1168
1177 list_for_each_entry(rfkill, &rfkill_list, node) { 1169 list_for_each_entry(rfkill, &rfkill_list, node) {
1178 if (rfkill->idx != ev.idx && ev.op != RFKILL_OP_CHANGE_ALL) 1170 if (rfkill->idx != ev.idx && ev.op != RFKILL_OP_CHANGE_ALL)
@@ -1261,10 +1253,8 @@ static struct miscdevice rfkill_miscdev = {
1261static int __init rfkill_init(void) 1253static int __init rfkill_init(void)
1262{ 1254{
1263 int error; 1255 int error;
1264 int i;
1265 1256
1266 for (i = 0; i < NUM_RFKILL_TYPES; i++) 1257 rfkill_update_global_state(RFKILL_TYPE_ALL, !rfkill_default_state);
1267 rfkill_global_states[i].cur = !rfkill_default_state;
1268 1258
1269 error = class_register(&rfkill_class); 1259 error = class_register(&rfkill_class);
1270 if (error) 1260 if (error)
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 4b1e3f35f06c..76c01cbd56e3 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -27,8 +27,6 @@
27#include <linux/acpi.h> 27#include <linux/acpi.h>
28#include <linux/gpio/consumer.h> 28#include <linux/gpio/consumer.h>
29 29
30#include <linux/rfkill-gpio.h>
31
32struct rfkill_gpio_data { 30struct rfkill_gpio_data {
33 const char *name; 31 const char *name;
34 enum rfkill_type type; 32 enum rfkill_type type;
@@ -81,7 +79,6 @@ static int rfkill_gpio_acpi_probe(struct device *dev,
81 if (!id) 79 if (!id)
82 return -ENODEV; 80 return -ENODEV;
83 81
84 rfkill->name = dev_name(dev);
85 rfkill->type = (unsigned)id->driver_data; 82 rfkill->type = (unsigned)id->driver_data;
86 83
87 return acpi_dev_add_driver_gpios(ACPI_COMPANION(dev), 84 return acpi_dev_add_driver_gpios(ACPI_COMPANION(dev),
@@ -90,24 +87,27 @@ static int rfkill_gpio_acpi_probe(struct device *dev,
90 87
91static int rfkill_gpio_probe(struct platform_device *pdev) 88static int rfkill_gpio_probe(struct platform_device *pdev)
92{ 89{
93 struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data;
94 struct rfkill_gpio_data *rfkill; 90 struct rfkill_gpio_data *rfkill;
95 struct gpio_desc *gpio; 91 struct gpio_desc *gpio;
92 const char *type_name;
96 int ret; 93 int ret;
97 94
98 rfkill = devm_kzalloc(&pdev->dev, sizeof(*rfkill), GFP_KERNEL); 95 rfkill = devm_kzalloc(&pdev->dev, sizeof(*rfkill), GFP_KERNEL);
99 if (!rfkill) 96 if (!rfkill)
100 return -ENOMEM; 97 return -ENOMEM;
101 98
99 device_property_read_string(&pdev->dev, "name", &rfkill->name);
100 device_property_read_string(&pdev->dev, "type", &type_name);
101
102 if (!rfkill->name)
103 rfkill->name = dev_name(&pdev->dev);
104
105 rfkill->type = rfkill_find_type(type_name);
106
102 if (ACPI_HANDLE(&pdev->dev)) { 107 if (ACPI_HANDLE(&pdev->dev)) {
103 ret = rfkill_gpio_acpi_probe(&pdev->dev, rfkill); 108 ret = rfkill_gpio_acpi_probe(&pdev->dev, rfkill);
104 if (ret) 109 if (ret)
105 return ret; 110 return ret;
106 } else if (pdata) {
107 rfkill->name = pdata->name;
108 rfkill->type = pdata->type;
109 } else {
110 return -ENODEV;
111 } 111 }
112 112
113 rfkill->clk = devm_clk_get(&pdev->dev, NULL); 113 rfkill->clk = devm_clk_get(&pdev->dev, NULL);
@@ -124,10 +124,8 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
124 124
125 rfkill->shutdown_gpio = gpio; 125 rfkill->shutdown_gpio = gpio;
126 126
127 /* Make sure at-least one of the GPIO is defined and that 127 /* Make sure at-least one GPIO is defined for this instance */
128 * a name is specified for this instance 128 if (!rfkill->reset_gpio && !rfkill->shutdown_gpio) {
129 */
130 if ((!rfkill->reset_gpio && !rfkill->shutdown_gpio) || !rfkill->name) {
131 dev_err(&pdev->dev, "invalid platform data\n"); 129 dev_err(&pdev->dev, "invalid platform data\n");
132 return -EINVAL; 130 return -EINVAL;
133 } 131 }
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 7e2d1057d8bc..9d935fa5a2a9 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -37,7 +37,7 @@ static struct proto rxrpc_proto;
37static const struct proto_ops rxrpc_rpc_ops; 37static const struct proto_ops rxrpc_rpc_ops;
38 38
39/* local epoch for detecting local-end reset */ 39/* local epoch for detecting local-end reset */
40__be32 rxrpc_epoch; 40u32 rxrpc_epoch;
41 41
42/* current debugging ID */ 42/* current debugging ID */
43atomic_t rxrpc_debug_id; 43atomic_t rxrpc_debug_id;
@@ -81,6 +81,8 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx,
81 struct sockaddr_rxrpc *srx, 81 struct sockaddr_rxrpc *srx,
82 int len) 82 int len)
83{ 83{
84 unsigned int tail;
85
84 if (len < sizeof(struct sockaddr_rxrpc)) 86 if (len < sizeof(struct sockaddr_rxrpc))
85 return -EINVAL; 87 return -EINVAL;
86 88
@@ -103,9 +105,7 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx,
103 _debug("INET: %x @ %pI4", 105 _debug("INET: %x @ %pI4",
104 ntohs(srx->transport.sin.sin_port), 106 ntohs(srx->transport.sin.sin_port),
105 &srx->transport.sin.sin_addr); 107 &srx->transport.sin.sin_addr);
106 if (srx->transport_len > 8) 108 tail = offsetof(struct sockaddr_rxrpc, transport.sin.__pad);
107 memset((void *)&srx->transport + 8, 0,
108 srx->transport_len - 8);
109 break; 109 break;
110 110
111 case AF_INET6: 111 case AF_INET6:
@@ -113,6 +113,8 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx,
113 return -EAFNOSUPPORT; 113 return -EAFNOSUPPORT;
114 } 114 }
115 115
116 if (tail < len)
117 memset((void *)srx + tail, 0, len - tail);
116 return 0; 118 return 0;
117} 119}
118 120
@@ -121,11 +123,10 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx,
121 */ 123 */
122static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) 124static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
123{ 125{
124 struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) saddr; 126 struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr;
125 struct sock *sk = sock->sk; 127 struct sock *sk = sock->sk;
126 struct rxrpc_local *local; 128 struct rxrpc_local *local;
127 struct rxrpc_sock *rx = rxrpc_sk(sk), *prx; 129 struct rxrpc_sock *rx = rxrpc_sk(sk), *prx;
128 __be16 service_id;
129 int ret; 130 int ret;
130 131
131 _enter("%p,%p,%d", rx, saddr, len); 132 _enter("%p,%p,%d", rx, saddr, len);
@@ -143,7 +144,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
143 144
144 memcpy(&rx->srx, srx, sizeof(rx->srx)); 145 memcpy(&rx->srx, srx, sizeof(rx->srx));
145 146
146 /* find a local transport endpoint if we don't have one already */ 147 /* Find or create a local transport endpoint to use */
147 local = rxrpc_lookup_local(&rx->srx); 148 local = rxrpc_lookup_local(&rx->srx);
148 if (IS_ERR(local)) { 149 if (IS_ERR(local)) {
149 ret = PTR_ERR(local); 150 ret = PTR_ERR(local);
@@ -152,14 +153,12 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
152 153
153 rx->local = local; 154 rx->local = local;
154 if (srx->srx_service) { 155 if (srx->srx_service) {
155 service_id = htons(srx->srx_service);
156 write_lock_bh(&local->services_lock); 156 write_lock_bh(&local->services_lock);
157 list_for_each_entry(prx, &local->services, listen_link) { 157 list_for_each_entry(prx, &local->services, listen_link) {
158 if (prx->service_id == service_id) 158 if (prx->srx.srx_service == srx->srx_service)
159 goto service_in_use; 159 goto service_in_use;
160 } 160 }
161 161
162 rx->service_id = service_id;
163 list_add_tail(&rx->listen_link, &local->services); 162 list_add_tail(&rx->listen_link, &local->services);
164 write_unlock_bh(&local->services_lock); 163 write_unlock_bh(&local->services_lock);
165 164
@@ -276,7 +275,6 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
276 struct rxrpc_transport *trans; 275 struct rxrpc_transport *trans;
277 struct rxrpc_call *call; 276 struct rxrpc_call *call;
278 struct rxrpc_sock *rx = rxrpc_sk(sock->sk); 277 struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
279 __be16 service_id;
280 278
281 _enter(",,%x,%lx", key_serial(key), user_call_ID); 279 _enter(",,%x,%lx", key_serial(key), user_call_ID);
282 280
@@ -299,16 +297,14 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
299 atomic_inc(&trans->usage); 297 atomic_inc(&trans->usage);
300 } 298 }
301 299
302 service_id = rx->service_id; 300 if (!srx)
303 if (srx) 301 srx = &rx->srx;
304 service_id = htons(srx->srx_service);
305
306 if (!key) 302 if (!key)
307 key = rx->key; 303 key = rx->key;
308 if (key && !key->payload.data[0]) 304 if (key && !key->payload.data[0])
309 key = NULL; /* a no-security key */ 305 key = NULL; /* a no-security key */
310 306
311 bundle = rxrpc_get_bundle(rx, trans, key, service_id, gfp); 307 bundle = rxrpc_get_bundle(rx, trans, key, srx->srx_service, gfp);
312 if (IS_ERR(bundle)) { 308 if (IS_ERR(bundle)) {
313 call = ERR_CAST(bundle); 309 call = ERR_CAST(bundle);
314 goto out; 310 goto out;
@@ -324,7 +320,6 @@ out_notrans:
324 _leave(" = %p", call); 320 _leave(" = %p", call);
325 return call; 321 return call;
326} 322}
327
328EXPORT_SYMBOL(rxrpc_kernel_begin_call); 323EXPORT_SYMBOL(rxrpc_kernel_begin_call);
329 324
330/** 325/**
@@ -340,7 +335,6 @@ void rxrpc_kernel_end_call(struct rxrpc_call *call)
340 rxrpc_remove_user_ID(call->socket, call); 335 rxrpc_remove_user_ID(call->socket, call);
341 rxrpc_put_call(call); 336 rxrpc_put_call(call);
342} 337}
343
344EXPORT_SYMBOL(rxrpc_kernel_end_call); 338EXPORT_SYMBOL(rxrpc_kernel_end_call);
345 339
346/** 340/**
@@ -425,7 +419,6 @@ static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
425 } 419 }
426 420
427 rx->trans = trans; 421 rx->trans = trans;
428 rx->service_id = htons(srx->srx_service);
429 rx->sk.sk_state = RXRPC_CLIENT_CONNECTED; 422 rx->sk.sk_state = RXRPC_CLIENT_CONNECTED;
430 423
431 release_sock(&rx->sk); 424 release_sock(&rx->sk);
@@ -622,7 +615,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
622 if (!net_eq(net, &init_net)) 615 if (!net_eq(net, &init_net))
623 return -EAFNOSUPPORT; 616 return -EAFNOSUPPORT;
624 617
625 /* we support transport protocol UDP only */ 618 /* we support transport protocol UDP/UDP6 only */
626 if (protocol != PF_INET) 619 if (protocol != PF_INET)
627 return -EPROTONOSUPPORT; 620 return -EPROTONOSUPPORT;
628 621
@@ -754,7 +747,7 @@ static int rxrpc_release(struct socket *sock)
754 * RxRPC network protocol 747 * RxRPC network protocol
755 */ 748 */
756static const struct proto_ops rxrpc_rpc_ops = { 749static const struct proto_ops rxrpc_rpc_ops = {
757 .family = PF_UNIX, 750 .family = PF_RXRPC,
758 .owner = THIS_MODULE, 751 .owner = THIS_MODULE,
759 .release = rxrpc_release, 752 .release = rxrpc_release,
760 .bind = rxrpc_bind, 753 .bind = rxrpc_bind,
@@ -778,7 +771,7 @@ static struct proto rxrpc_proto = {
778 .name = "RXRPC", 771 .name = "RXRPC",
779 .owner = THIS_MODULE, 772 .owner = THIS_MODULE,
780 .obj_size = sizeof(struct rxrpc_sock), 773 .obj_size = sizeof(struct rxrpc_sock),
781 .max_header = sizeof(struct rxrpc_header), 774 .max_header = sizeof(struct rxrpc_wire_header),
782}; 775};
783 776
784static const struct net_proto_family rxrpc_family_ops = { 777static const struct net_proto_family rxrpc_family_ops = {
@@ -796,7 +789,7 @@ static int __init af_rxrpc_init(void)
796 789
797 BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb)); 790 BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb));
798 791
799 rxrpc_epoch = htonl(get_seconds()); 792 rxrpc_epoch = get_seconds();
800 793
801 ret = -ENOMEM; 794 ret = -ENOMEM;
802 rxrpc_call_jar = kmem_cache_create( 795 rxrpc_call_jar = kmem_cache_create(
diff --git a/net/rxrpc/ar-accept.c b/net/rxrpc/ar-accept.c
index 6d79310fcaae..277731a5e67a 100644
--- a/net/rxrpc/ar-accept.c
+++ b/net/rxrpc/ar-accept.c
@@ -27,7 +27,7 @@
27 * generate a connection-level abort 27 * generate a connection-level abort
28 */ 28 */
29static int rxrpc_busy(struct rxrpc_local *local, struct sockaddr_rxrpc *srx, 29static int rxrpc_busy(struct rxrpc_local *local, struct sockaddr_rxrpc *srx,
30 struct rxrpc_header *hdr) 30 struct rxrpc_wire_header *whdr)
31{ 31{
32 struct msghdr msg; 32 struct msghdr msg;
33 struct kvec iov[1]; 33 struct kvec iov[1];
@@ -36,25 +36,21 @@ static int rxrpc_busy(struct rxrpc_local *local, struct sockaddr_rxrpc *srx,
36 36
37 _enter("%d,,", local->debug_id); 37 _enter("%d,,", local->debug_id);
38 38
39 whdr->type = RXRPC_PACKET_TYPE_BUSY;
40 whdr->serial = htonl(1);
41
39 msg.msg_name = &srx->transport.sin; 42 msg.msg_name = &srx->transport.sin;
40 msg.msg_namelen = sizeof(srx->transport.sin); 43 msg.msg_namelen = sizeof(srx->transport.sin);
41 msg.msg_control = NULL; 44 msg.msg_control = NULL;
42 msg.msg_controllen = 0; 45 msg.msg_controllen = 0;
43 msg.msg_flags = 0; 46 msg.msg_flags = 0;
44 47
45 hdr->seq = 0; 48 iov[0].iov_base = whdr;
46 hdr->type = RXRPC_PACKET_TYPE_BUSY; 49 iov[0].iov_len = sizeof(*whdr);
47 hdr->flags = 0;
48 hdr->userStatus = 0;
49 hdr->_rsvd = 0;
50
51 iov[0].iov_base = hdr;
52 iov[0].iov_len = sizeof(*hdr);
53 50
54 len = iov[0].iov_len; 51 len = iov[0].iov_len;
55 52
56 hdr->serial = htonl(1); 53 _proto("Tx BUSY %%1");
57 _proto("Tx BUSY %%%u", ntohl(hdr->serial));
58 54
59 ret = kernel_sendmsg(local->socket, &msg, iov, 1, len); 55 ret = kernel_sendmsg(local->socket, &msg, iov, 1, len);
60 if (ret < 0) { 56 if (ret < 0) {
@@ -185,8 +181,8 @@ invalid_service:
185 read_unlock_bh(&local->services_lock); 181 read_unlock_bh(&local->services_lock);
186 182
187 read_lock_bh(&call->state_lock); 183 read_lock_bh(&call->state_lock);
188 if (!test_bit(RXRPC_CALL_RELEASE, &call->flags) && 184 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
189 !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events)) { 185 !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
190 rxrpc_get_call(call); 186 rxrpc_get_call(call);
191 rxrpc_queue_call(call); 187 rxrpc_queue_call(call);
192 } 188 }
@@ -211,8 +207,8 @@ void rxrpc_accept_incoming_calls(struct work_struct *work)
211 struct rxrpc_skb_priv *sp; 207 struct rxrpc_skb_priv *sp;
212 struct sockaddr_rxrpc srx; 208 struct sockaddr_rxrpc srx;
213 struct rxrpc_sock *rx; 209 struct rxrpc_sock *rx;
210 struct rxrpc_wire_header whdr;
214 struct sk_buff *skb; 211 struct sk_buff *skb;
215 __be16 service_id;
216 int ret; 212 int ret;
217 213
218 _enter("%d", local->debug_id); 214 _enter("%d", local->debug_id);
@@ -240,6 +236,19 @@ process_next_packet:
240 236
241 sp = rxrpc_skb(skb); 237 sp = rxrpc_skb(skb);
242 238
239 /* Set up a response packet header in case we need it */
240 whdr.epoch = htonl(sp->hdr.epoch);
241 whdr.cid = htonl(sp->hdr.cid);
242 whdr.callNumber = htonl(sp->hdr.callNumber);
243 whdr.seq = htonl(sp->hdr.seq);
244 whdr.serial = 0;
245 whdr.flags = 0;
246 whdr.type = 0;
247 whdr.userStatus = 0;
248 whdr.securityIndex = sp->hdr.securityIndex;
249 whdr._rsvd = 0;
250 whdr.serviceId = htons(sp->hdr.serviceId);
251
243 /* determine the remote address */ 252 /* determine the remote address */
244 memset(&srx, 0, sizeof(srx)); 253 memset(&srx, 0, sizeof(srx));
245 srx.srx_family = AF_RXRPC; 254 srx.srx_family = AF_RXRPC;
@@ -256,10 +265,9 @@ process_next_packet:
256 } 265 }
257 266
258 /* get the socket providing the service */ 267 /* get the socket providing the service */
259 service_id = sp->hdr.serviceId;
260 read_lock_bh(&local->services_lock); 268 read_lock_bh(&local->services_lock);
261 list_for_each_entry(rx, &local->services, listen_link) { 269 list_for_each_entry(rx, &local->services, listen_link) {
262 if (rx->service_id == service_id && 270 if (rx->srx.srx_service == sp->hdr.serviceId &&
263 rx->sk.sk_state != RXRPC_CLOSE) 271 rx->sk.sk_state != RXRPC_CLOSE)
264 goto found_service; 272 goto found_service;
265 } 273 }
@@ -267,7 +275,7 @@ process_next_packet:
267 goto invalid_service; 275 goto invalid_service;
268 276
269found_service: 277found_service:
270 _debug("found service %hd", ntohs(rx->service_id)); 278 _debug("found service %hd", rx->srx.srx_service);
271 if (sk_acceptq_is_full(&rx->sk)) 279 if (sk_acceptq_is_full(&rx->sk))
272 goto backlog_full; 280 goto backlog_full;
273 sk_acceptq_added(&rx->sk); 281 sk_acceptq_added(&rx->sk);
@@ -296,7 +304,7 @@ found_service:
296backlog_full: 304backlog_full:
297 read_unlock_bh(&local->services_lock); 305 read_unlock_bh(&local->services_lock);
298busy: 306busy:
299 rxrpc_busy(local, &srx, &sp->hdr); 307 rxrpc_busy(local, &srx, &whdr);
300 rxrpc_free_skb(skb); 308 rxrpc_free_skb(skb);
301 goto process_next_packet; 309 goto process_next_packet;
302 310
@@ -379,7 +387,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
379 rb_insert_color(&call->sock_node, &rx->calls); 387 rb_insert_color(&call->sock_node, &rx->calls);
380 if (test_and_set_bit(RXRPC_CALL_HAS_USERID, &call->flags)) 388 if (test_and_set_bit(RXRPC_CALL_HAS_USERID, &call->flags))
381 BUG(); 389 BUG();
382 if (test_and_set_bit(RXRPC_CALL_ACCEPTED, &call->events)) 390 if (test_and_set_bit(RXRPC_CALL_EV_ACCEPTED, &call->events))
383 BUG(); 391 BUG();
384 rxrpc_queue_call(call); 392 rxrpc_queue_call(call);
385 393
@@ -395,7 +403,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
395out_release: 403out_release:
396 _debug("release %p", call); 404 _debug("release %p", call);
397 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && 405 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
398 !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events)) 406 !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
399 rxrpc_queue_call(call); 407 rxrpc_queue_call(call);
400out_discard: 408out_discard:
401 write_unlock_bh(&call->state_lock); 409 write_unlock_bh(&call->state_lock);
@@ -407,7 +415,7 @@ out:
407} 415}
408 416
409/* 417/*
410 * handle rejectance of a call by userspace 418 * Handle rejection of a call by userspace
411 * - reject the call at the front of the queue 419 * - reject the call at the front of the queue
412 */ 420 */
413int rxrpc_reject_call(struct rxrpc_sock *rx) 421int rxrpc_reject_call(struct rxrpc_sock *rx)
@@ -434,7 +442,7 @@ int rxrpc_reject_call(struct rxrpc_sock *rx)
434 switch (call->state) { 442 switch (call->state) {
435 case RXRPC_CALL_SERVER_ACCEPTING: 443 case RXRPC_CALL_SERVER_ACCEPTING:
436 call->state = RXRPC_CALL_SERVER_BUSY; 444 call->state = RXRPC_CALL_SERVER_BUSY;
437 if (test_and_set_bit(RXRPC_CALL_REJECT_BUSY, &call->events)) 445 if (test_and_set_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events))
438 rxrpc_queue_call(call); 446 rxrpc_queue_call(call);
439 ret = 0; 447 ret = 0;
440 goto out_release; 448 goto out_release;
@@ -458,7 +466,7 @@ int rxrpc_reject_call(struct rxrpc_sock *rx)
458out_release: 466out_release:
459 _debug("release %p", call); 467 _debug("release %p", call);
460 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && 468 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
461 !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events)) 469 !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
462 rxrpc_queue_call(call); 470 rxrpc_queue_call(call);
463out_discard: 471out_discard:
464 write_unlock_bh(&call->state_lock); 472 write_unlock_bh(&call->state_lock);
@@ -487,7 +495,6 @@ struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *sock,
487 _leave(" = %p", call); 495 _leave(" = %p", call);
488 return call; 496 return call;
489} 497}
490
491EXPORT_SYMBOL(rxrpc_kernel_accept_call); 498EXPORT_SYMBOL(rxrpc_kernel_accept_call);
492 499
493/** 500/**
@@ -506,5 +513,4 @@ int rxrpc_kernel_reject_call(struct socket *sock)
506 _leave(" = %d", ret); 513 _leave(" = %d", ret);
507 return ret; 514 return ret;
508} 515}
509
510EXPORT_SYMBOL(rxrpc_kernel_reject_call); 516EXPORT_SYMBOL(rxrpc_kernel_reject_call);
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index adc555e0323d..16d967075eaf 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -23,7 +23,7 @@
23 * How long to wait before scheduling ACK generation after seeing a 23 * How long to wait before scheduling ACK generation after seeing a
24 * packet with RXRPC_REQUEST_ACK set (in jiffies). 24 * packet with RXRPC_REQUEST_ACK set (in jiffies).
25 */ 25 */
26unsigned rxrpc_requested_ack_delay = 1; 26unsigned int rxrpc_requested_ack_delay = 1;
27 27
28/* 28/*
29 * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). 29 * How long to wait before scheduling an ACK with subtype DELAY (in jiffies).
@@ -32,7 +32,7 @@ unsigned rxrpc_requested_ack_delay = 1;
32 * all consumed within this time we will send a DELAY ACK if an ACK was not 32 * all consumed within this time we will send a DELAY ACK if an ACK was not
33 * requested to let the sender know it doesn't need to resend. 33 * requested to let the sender know it doesn't need to resend.
34 */ 34 */
35unsigned rxrpc_soft_ack_delay = 1 * HZ; 35unsigned int rxrpc_soft_ack_delay = 1 * HZ;
36 36
37/* 37/*
38 * How long to wait before scheduling an ACK with subtype IDLE (in jiffies). 38 * How long to wait before scheduling an ACK with subtype IDLE (in jiffies).
@@ -41,7 +41,7 @@ unsigned rxrpc_soft_ack_delay = 1 * HZ;
41 * further packets aren't immediately received to decide when to send an IDLE 41 * further packets aren't immediately received to decide when to send an IDLE
42 * ACK let the other end know that it can free up its Tx buffer space. 42 * ACK let the other end know that it can free up its Tx buffer space.
43 */ 43 */
44unsigned rxrpc_idle_ack_delay = 0.5 * HZ; 44unsigned int rxrpc_idle_ack_delay = 0.5 * HZ;
45 45
46/* 46/*
47 * Receive window size in packets. This indicates the maximum number of 47 * Receive window size in packets. This indicates the maximum number of
@@ -49,19 +49,19 @@ unsigned rxrpc_idle_ack_delay = 0.5 * HZ;
49 * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further 49 * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further
50 * packets. 50 * packets.
51 */ 51 */
52unsigned rxrpc_rx_window_size = 32; 52unsigned int rxrpc_rx_window_size = 32;
53 53
54/* 54/*
55 * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet 55 * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet
56 * made by gluing normal packets together that we're willing to handle. 56 * made by gluing normal packets together that we're willing to handle.
57 */ 57 */
58unsigned rxrpc_rx_mtu = 5692; 58unsigned int rxrpc_rx_mtu = 5692;
59 59
60/* 60/*
61 * The maximum number of fragments in a received jumbo packet that we tell the 61 * The maximum number of fragments in a received jumbo packet that we tell the
62 * sender that we're willing to handle. 62 * sender that we're willing to handle.
63 */ 63 */
64unsigned rxrpc_rx_jumbo_max = 4; 64unsigned int rxrpc_rx_jumbo_max = 4;
65 65
66static const char *rxrpc_acks(u8 reason) 66static const char *rxrpc_acks(u8 reason)
67{ 67{
@@ -91,7 +91,7 @@ static const s8 rxrpc_ack_priority[] = {
91 * propose an ACK be sent 91 * propose an ACK be sent
92 */ 92 */
93void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, 93void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
94 __be32 serial, bool immediate) 94 u32 serial, bool immediate)
95{ 95{
96 unsigned long expiry; 96 unsigned long expiry;
97 s8 prior = rxrpc_ack_priority[ack_reason]; 97 s8 prior = rxrpc_ack_priority[ack_reason];
@@ -99,8 +99,7 @@ void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
99 ASSERTCMP(prior, >, 0); 99 ASSERTCMP(prior, >, 0);
100 100
101 _enter("{%d},%s,%%%x,%u", 101 _enter("{%d},%s,%%%x,%u",
102 call->debug_id, rxrpc_acks(ack_reason), ntohl(serial), 102 call->debug_id, rxrpc_acks(ack_reason), serial, immediate);
103 immediate);
104 103
105 if (prior < rxrpc_ack_priority[call->ackr_reason]) { 104 if (prior < rxrpc_ack_priority[call->ackr_reason]) {
106 if (immediate) 105 if (immediate)
@@ -139,7 +138,7 @@ void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
139 expiry = rxrpc_requested_ack_delay; 138 expiry = rxrpc_requested_ack_delay;
140 if (!expiry) 139 if (!expiry)
141 goto cancel_timer; 140 goto cancel_timer;
142 if (!immediate || serial == cpu_to_be32(1)) { 141 if (!immediate || serial == 1) {
143 _debug("run defer timer"); 142 _debug("run defer timer");
144 goto run_timer; 143 goto run_timer;
145 } 144 }
@@ -157,11 +156,11 @@ run_timer:
157 return; 156 return;
158 157
159cancel_timer: 158cancel_timer:
160 _debug("cancel timer %%%u", ntohl(serial)); 159 _debug("cancel timer %%%u", serial);
161 try_to_del_timer_sync(&call->ack_timer); 160 try_to_del_timer_sync(&call->ack_timer);
162 read_lock_bh(&call->state_lock); 161 read_lock_bh(&call->state_lock);
163 if (call->state <= RXRPC_CALL_COMPLETE && 162 if (call->state <= RXRPC_CALL_COMPLETE &&
164 !test_and_set_bit(RXRPC_CALL_ACK, &call->events)) 163 !test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events))
165 rxrpc_queue_call(call); 164 rxrpc_queue_call(call);
166 read_unlock_bh(&call->state_lock); 165 read_unlock_bh(&call->state_lock);
167} 166}
@@ -170,7 +169,7 @@ cancel_timer:
170 * propose an ACK be sent, locking the call structure 169 * propose an ACK be sent, locking the call structure
171 */ 170 */
172void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, 171void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
173 __be32 serial, bool immediate) 172 u32 serial, bool immediate)
174{ 173{
175 s8 prior = rxrpc_ack_priority[ack_reason]; 174 s8 prior = rxrpc_ack_priority[ack_reason];
176 175
@@ -193,7 +192,7 @@ static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend,
193 192
194 if (resend & 1) { 193 if (resend & 1) {
195 _debug("SET RESEND"); 194 _debug("SET RESEND");
196 set_bit(RXRPC_CALL_RESEND, &call->events); 195 set_bit(RXRPC_CALL_EV_RESEND, &call->events);
197 } 196 }
198 197
199 if (resend & 2) { 198 if (resend & 2) {
@@ -203,7 +202,7 @@ static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend,
203 } else { 202 } else {
204 _debug("KILL RESEND TIMER"); 203 _debug("KILL RESEND TIMER");
205 del_timer_sync(&call->resend_timer); 204 del_timer_sync(&call->resend_timer);
206 clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events); 205 clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events);
207 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); 206 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
208 } 207 }
209 read_unlock_bh(&call->state_lock); 208 read_unlock_bh(&call->state_lock);
@@ -214,8 +213,8 @@ static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend,
214 */ 213 */
215static void rxrpc_resend(struct rxrpc_call *call) 214static void rxrpc_resend(struct rxrpc_call *call)
216{ 215{
216 struct rxrpc_wire_header *whdr;
217 struct rxrpc_skb_priv *sp; 217 struct rxrpc_skb_priv *sp;
218 struct rxrpc_header *hdr;
219 struct sk_buff *txb; 218 struct sk_buff *txb;
220 unsigned long *p_txb, resend_at; 219 unsigned long *p_txb, resend_at;
221 bool stop; 220 bool stop;
@@ -247,14 +246,13 @@ static void rxrpc_resend(struct rxrpc_call *call)
247 sp->need_resend = false; 246 sp->need_resend = false;
248 247
249 /* each Tx packet has a new serial number */ 248 /* each Tx packet has a new serial number */
250 sp->hdr.serial = 249 sp->hdr.serial = atomic_inc_return(&call->conn->serial);
251 htonl(atomic_inc_return(&call->conn->serial));
252 250
253 hdr = (struct rxrpc_header *) txb->head; 251 whdr = (struct rxrpc_wire_header *)txb->head;
254 hdr->serial = sp->hdr.serial; 252 whdr->serial = htonl(sp->hdr.serial);
255 253
256 _proto("Tx DATA %%%u { #%d }", 254 _proto("Tx DATA %%%u { #%d }",
257 ntohl(sp->hdr.serial), ntohl(sp->hdr.seq)); 255 sp->hdr.serial, sp->hdr.seq);
258 if (rxrpc_send_packet(call->conn->trans, txb) < 0) { 256 if (rxrpc_send_packet(call->conn->trans, txb) < 0) {
259 stop = true; 257 stop = true;
260 sp->resend_at = jiffies + 3; 258 sp->resend_at = jiffies + 3;
@@ -428,7 +426,7 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard)
428 int tail = call->acks_tail, old_tail; 426 int tail = call->acks_tail, old_tail;
429 int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz); 427 int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz);
430 428
431 _enter("{%u,%u},%u", call->acks_hard, win, hard); 429 kenter("{%u,%u},%u", call->acks_hard, win, hard);
432 430
433 ASSERTCMP(hard - call->acks_hard, <=, win); 431 ASSERTCMP(hard - call->acks_hard, <=, win);
434 432
@@ -478,11 +476,11 @@ static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call)
478 sp = rxrpc_skb(skb); 476 sp = rxrpc_skb(skb);
479 477
480 _debug("drain OOS packet %d [%d]", 478 _debug("drain OOS packet %d [%d]",
481 ntohl(sp->hdr.seq), call->rx_first_oos); 479 sp->hdr.seq, call->rx_first_oos);
482 480
483 if (ntohl(sp->hdr.seq) != call->rx_first_oos) { 481 if (sp->hdr.seq != call->rx_first_oos) {
484 skb_queue_head(&call->rx_oos_queue, skb); 482 skb_queue_head(&call->rx_oos_queue, skb);
485 call->rx_first_oos = ntohl(rxrpc_skb(skb)->hdr.seq); 483 call->rx_first_oos = rxrpc_skb(skb)->hdr.seq;
486 _debug("requeue %p {%u}", skb, call->rx_first_oos); 484 _debug("requeue %p {%u}", skb, call->rx_first_oos);
487 } else { 485 } else {
488 skb->mark = RXRPC_SKB_MARK_DATA; 486 skb->mark = RXRPC_SKB_MARK_DATA;
@@ -496,8 +494,7 @@ static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call)
496 /* find out what the next packet is */ 494 /* find out what the next packet is */
497 skb = skb_peek(&call->rx_oos_queue); 495 skb = skb_peek(&call->rx_oos_queue);
498 if (skb) 496 if (skb)
499 call->rx_first_oos = 497 call->rx_first_oos = rxrpc_skb(skb)->hdr.seq;
500 ntohl(rxrpc_skb(skb)->hdr.seq);
501 else 498 else
502 call->rx_first_oos = 0; 499 call->rx_first_oos = 0;
503 _debug("peek %p {%u}", skb, call->rx_first_oos); 500 _debug("peek %p {%u}", skb, call->rx_first_oos);
@@ -522,7 +519,7 @@ static void rxrpc_insert_oos_packet(struct rxrpc_call *call,
522 u32 seq; 519 u32 seq;
523 520
524 sp = rxrpc_skb(skb); 521 sp = rxrpc_skb(skb);
525 seq = ntohl(sp->hdr.seq); 522 seq = sp->hdr.seq;
526 _enter(",,{%u}", seq); 523 _enter(",,{%u}", seq);
527 524
528 skb->destructor = rxrpc_packet_destructor; 525 skb->destructor = rxrpc_packet_destructor;
@@ -535,9 +532,8 @@ static void rxrpc_insert_oos_packet(struct rxrpc_call *call,
535 532
536 skb_queue_walk(&call->rx_oos_queue, p) { 533 skb_queue_walk(&call->rx_oos_queue, p) {
537 psp = rxrpc_skb(p); 534 psp = rxrpc_skb(p);
538 if (ntohl(psp->hdr.seq) > seq) { 535 if (psp->hdr.seq > seq) {
539 _debug("insert oos #%u before #%u", 536 _debug("insert oos #%u before #%u", seq, psp->hdr.seq);
540 seq, ntohl(psp->hdr.seq));
541 skb_insert(p, skb, &call->rx_oos_queue); 537 skb_insert(p, skb, &call->rx_oos_queue);
542 goto inserted; 538 goto inserted;
543 } 539 }
@@ -555,7 +551,7 @@ inserted:
555 if (call->state < RXRPC_CALL_COMPLETE && 551 if (call->state < RXRPC_CALL_COMPLETE &&
556 call->rx_data_post == call->rx_first_oos) { 552 call->rx_data_post == call->rx_first_oos) {
557 _debug("drain rx oos now"); 553 _debug("drain rx oos now");
558 set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events); 554 set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events);
559 } 555 }
560 read_unlock(&call->state_lock); 556 read_unlock(&call->state_lock);
561 557
@@ -586,7 +582,7 @@ static void rxrpc_zap_tx_window(struct rxrpc_call *call)
586 582
587 skb = (struct sk_buff *) _skb; 583 skb = (struct sk_buff *) _skb;
588 sp = rxrpc_skb(skb); 584 sp = rxrpc_skb(skb);
589 _debug("+++ clear Tx %u", ntohl(sp->hdr.seq)); 585 _debug("+++ clear Tx %u", sp->hdr.seq);
590 rxrpc_free_skb(skb); 586 rxrpc_free_skb(skb);
591 } 587 }
592 588
@@ -657,8 +653,7 @@ process_further:
657 /* data packets that wind up here have been received out of 653 /* data packets that wind up here have been received out of
658 * order, need security processing or are jumbo packets */ 654 * order, need security processing or are jumbo packets */
659 case RXRPC_PACKET_TYPE_DATA: 655 case RXRPC_PACKET_TYPE_DATA:
660 _proto("OOSQ DATA %%%u { #%u }", 656 _proto("OOSQ DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq);
661 ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
662 657
663 /* secured packets must be verified and possibly decrypted */ 658 /* secured packets must be verified and possibly decrypted */
664 if (rxrpc_verify_packet(call, skb, _abort_code) < 0) 659 if (rxrpc_verify_packet(call, skb, _abort_code) < 0)
@@ -676,7 +671,7 @@ process_further:
676 if (!skb_pull(skb, sizeof(ack))) 671 if (!skb_pull(skb, sizeof(ack)))
677 BUG(); 672 BUG();
678 673
679 latest = ntohl(sp->hdr.serial); 674 latest = sp->hdr.serial;
680 hard = ntohl(ack.firstPacket); 675 hard = ntohl(ack.firstPacket);
681 tx = atomic_read(&call->sequence); 676 tx = atomic_read(&call->sequence);
682 677
@@ -793,7 +788,7 @@ all_acked:
793 788
794 del_timer_sync(&call->resend_timer); 789 del_timer_sync(&call->resend_timer);
795 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); 790 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
796 clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events); 791 clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events);
797 792
798 if (call->acks_window) 793 if (call->acks_window)
799 rxrpc_zap_tx_window(call); 794 rxrpc_zap_tx_window(call);
@@ -881,16 +876,17 @@ void rxrpc_process_call(struct work_struct *work)
881{ 876{
882 struct rxrpc_call *call = 877 struct rxrpc_call *call =
883 container_of(work, struct rxrpc_call, processor); 878 container_of(work, struct rxrpc_call, processor);
879 struct rxrpc_wire_header whdr;
884 struct rxrpc_ackpacket ack; 880 struct rxrpc_ackpacket ack;
885 struct rxrpc_ackinfo ackinfo; 881 struct rxrpc_ackinfo ackinfo;
886 struct rxrpc_header hdr;
887 struct msghdr msg; 882 struct msghdr msg;
888 struct kvec iov[5]; 883 struct kvec iov[5];
884 enum rxrpc_call_event genbit;
889 unsigned long bits; 885 unsigned long bits;
890 __be32 data, pad; 886 __be32 data, pad;
891 size_t len; 887 size_t len;
892 int genbit, loop, nbit, ioc, ret, mtu; 888 int loop, nbit, ioc, ret, mtu;
893 u32 abort_code = RX_PROTOCOL_ERROR; 889 u32 serial, abort_code = RX_PROTOCOL_ERROR;
894 u8 *acks = NULL; 890 u8 *acks = NULL;
895 891
896 //printk("\n--------------------\n"); 892 //printk("\n--------------------\n");
@@ -911,33 +907,33 @@ void rxrpc_process_call(struct work_struct *work)
911 msg.msg_controllen = 0; 907 msg.msg_controllen = 0;
912 msg.msg_flags = 0; 908 msg.msg_flags = 0;
913 909
914 hdr.epoch = call->conn->epoch; 910 whdr.epoch = htonl(call->conn->epoch);
915 hdr.cid = call->cid; 911 whdr.cid = htonl(call->cid);
916 hdr.callNumber = call->call_id; 912 whdr.callNumber = htonl(call->call_id);
917 hdr.seq = 0; 913 whdr.seq = 0;
918 hdr.type = RXRPC_PACKET_TYPE_ACK; 914 whdr.type = RXRPC_PACKET_TYPE_ACK;
919 hdr.flags = call->conn->out_clientflag; 915 whdr.flags = call->conn->out_clientflag;
920 hdr.userStatus = 0; 916 whdr.userStatus = 0;
921 hdr.securityIndex = call->conn->security_ix; 917 whdr.securityIndex = call->conn->security_ix;
922 hdr._rsvd = 0; 918 whdr._rsvd = 0;
923 hdr.serviceId = call->conn->service_id; 919 whdr.serviceId = htons(call->service_id);
924 920
925 memset(iov, 0, sizeof(iov)); 921 memset(iov, 0, sizeof(iov));
926 iov[0].iov_base = &hdr; 922 iov[0].iov_base = &whdr;
927 iov[0].iov_len = sizeof(hdr); 923 iov[0].iov_len = sizeof(whdr);
928 924
929 /* deal with events of a final nature */ 925 /* deal with events of a final nature */
930 if (test_bit(RXRPC_CALL_RELEASE, &call->events)) { 926 if (test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
931 rxrpc_release_call(call); 927 rxrpc_release_call(call);
932 clear_bit(RXRPC_CALL_RELEASE, &call->events); 928 clear_bit(RXRPC_CALL_EV_RELEASE, &call->events);
933 } 929 }
934 930
935 if (test_bit(RXRPC_CALL_RCVD_ERROR, &call->events)) { 931 if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) {
936 int error; 932 int error;
937 933
938 clear_bit(RXRPC_CALL_CONN_ABORT, &call->events); 934 clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events);
939 clear_bit(RXRPC_CALL_REJECT_BUSY, &call->events); 935 clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events);
940 clear_bit(RXRPC_CALL_ABORT, &call->events); 936 clear_bit(RXRPC_CALL_EV_ABORT, &call->events);
941 937
942 error = call->conn->trans->peer->net_error; 938 error = call->conn->trans->peer->net_error;
943 _debug("post net error %d", error); 939 _debug("post net error %d", error);
@@ -945,47 +941,47 @@ void rxrpc_process_call(struct work_struct *work)
945 if (rxrpc_post_message(call, RXRPC_SKB_MARK_NET_ERROR, 941 if (rxrpc_post_message(call, RXRPC_SKB_MARK_NET_ERROR,
946 error, true) < 0) 942 error, true) < 0)
947 goto no_mem; 943 goto no_mem;
948 clear_bit(RXRPC_CALL_RCVD_ERROR, &call->events); 944 clear_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events);
949 goto kill_ACKs; 945 goto kill_ACKs;
950 } 946 }
951 947
952 if (test_bit(RXRPC_CALL_CONN_ABORT, &call->events)) { 948 if (test_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events)) {
953 ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE); 949 ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);
954 950
955 clear_bit(RXRPC_CALL_REJECT_BUSY, &call->events); 951 clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events);
956 clear_bit(RXRPC_CALL_ABORT, &call->events); 952 clear_bit(RXRPC_CALL_EV_ABORT, &call->events);
957 953
958 _debug("post conn abort"); 954 _debug("post conn abort");
959 955
960 if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, 956 if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
961 call->conn->error, true) < 0) 957 call->conn->error, true) < 0)
962 goto no_mem; 958 goto no_mem;
963 clear_bit(RXRPC_CALL_CONN_ABORT, &call->events); 959 clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events);
964 goto kill_ACKs; 960 goto kill_ACKs;
965 } 961 }
966 962
967 if (test_bit(RXRPC_CALL_REJECT_BUSY, &call->events)) { 963 if (test_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events)) {
968 hdr.type = RXRPC_PACKET_TYPE_BUSY; 964 whdr.type = RXRPC_PACKET_TYPE_BUSY;
969 genbit = RXRPC_CALL_REJECT_BUSY; 965 genbit = RXRPC_CALL_EV_REJECT_BUSY;
970 goto send_message; 966 goto send_message;
971 } 967 }
972 968
973 if (test_bit(RXRPC_CALL_ABORT, &call->events)) { 969 if (test_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
974 ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE); 970 ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);
975 971
976 if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, 972 if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
977 ECONNABORTED, true) < 0) 973 ECONNABORTED, true) < 0)
978 goto no_mem; 974 goto no_mem;
979 hdr.type = RXRPC_PACKET_TYPE_ABORT; 975 whdr.type = RXRPC_PACKET_TYPE_ABORT;
980 data = htonl(call->abort_code); 976 data = htonl(call->abort_code);
981 iov[1].iov_base = &data; 977 iov[1].iov_base = &data;
982 iov[1].iov_len = sizeof(data); 978 iov[1].iov_len = sizeof(data);
983 genbit = RXRPC_CALL_ABORT; 979 genbit = RXRPC_CALL_EV_ABORT;
984 goto send_message; 980 goto send_message;
985 } 981 }
986 982
987 if (test_bit(RXRPC_CALL_ACK_FINAL, &call->events)) { 983 if (test_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events)) {
988 genbit = RXRPC_CALL_ACK_FINAL; 984 genbit = RXRPC_CALL_EV_ACK_FINAL;
989 985
990 ack.bufferSpace = htons(8); 986 ack.bufferSpace = htons(8);
991 ack.maxSkew = 0; 987 ack.maxSkew = 0;
@@ -995,9 +991,9 @@ void rxrpc_process_call(struct work_struct *work)
995 call->ackr_reason = 0; 991 call->ackr_reason = 0;
996 992
997 spin_lock_bh(&call->lock); 993 spin_lock_bh(&call->lock);
998 ack.serial = call->ackr_serial; 994 ack.serial = htonl(call->ackr_serial);
999 ack.previousPacket = call->ackr_prev_seq; 995 ack.previousPacket = htonl(call->ackr_prev_seq);
1000 ack.firstPacket = htonl(call->rx_data_eaten + 1); 996 ack.firstPacket = htonl(call->rx_data_eaten + 1);
1001 spin_unlock_bh(&call->lock); 997 spin_unlock_bh(&call->lock);
1002 998
1003 pad = 0; 999 pad = 0;
@@ -1011,12 +1007,12 @@ void rxrpc_process_call(struct work_struct *work)
1011 goto send_ACK; 1007 goto send_ACK;
1012 } 1008 }
1013 1009
1014 if (call->events & ((1 << RXRPC_CALL_RCVD_BUSY) | 1010 if (call->events & ((1 << RXRPC_CALL_EV_RCVD_BUSY) |
1015 (1 << RXRPC_CALL_RCVD_ABORT)) 1011 (1 << RXRPC_CALL_EV_RCVD_ABORT))
1016 ) { 1012 ) {
1017 u32 mark; 1013 u32 mark;
1018 1014
1019 if (test_bit(RXRPC_CALL_RCVD_ABORT, &call->events)) 1015 if (test_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events))
1020 mark = RXRPC_SKB_MARK_REMOTE_ABORT; 1016 mark = RXRPC_SKB_MARK_REMOTE_ABORT;
1021 else 1017 else
1022 mark = RXRPC_SKB_MARK_BUSY; 1018 mark = RXRPC_SKB_MARK_BUSY;
@@ -1026,22 +1022,22 @@ void rxrpc_process_call(struct work_struct *work)
1026 if (rxrpc_post_message(call, mark, ECONNABORTED, true) < 0) 1022 if (rxrpc_post_message(call, mark, ECONNABORTED, true) < 0)
1027 goto no_mem; 1023 goto no_mem;
1028 1024
1029 clear_bit(RXRPC_CALL_RCVD_BUSY, &call->events); 1025 clear_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events);
1030 clear_bit(RXRPC_CALL_RCVD_ABORT, &call->events); 1026 clear_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events);
1031 goto kill_ACKs; 1027 goto kill_ACKs;
1032 } 1028 }
1033 1029
1034 if (test_and_clear_bit(RXRPC_CALL_RCVD_ACKALL, &call->events)) { 1030 if (test_and_clear_bit(RXRPC_CALL_EV_RCVD_ACKALL, &call->events)) {
1035 _debug("do implicit ackall"); 1031 _debug("do implicit ackall");
1036 rxrpc_clear_tx_window(call); 1032 rxrpc_clear_tx_window(call);
1037 } 1033 }
1038 1034
1039 if (test_bit(RXRPC_CALL_LIFE_TIMER, &call->events)) { 1035 if (test_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events)) {
1040 write_lock_bh(&call->state_lock); 1036 write_lock_bh(&call->state_lock);
1041 if (call->state <= RXRPC_CALL_COMPLETE) { 1037 if (call->state <= RXRPC_CALL_COMPLETE) {
1042 call->state = RXRPC_CALL_LOCALLY_ABORTED; 1038 call->state = RXRPC_CALL_LOCALLY_ABORTED;
1043 call->abort_code = RX_CALL_TIMEOUT; 1039 call->abort_code = RX_CALL_TIMEOUT;
1044 set_bit(RXRPC_CALL_ABORT, &call->events); 1040 set_bit(RXRPC_CALL_EV_ABORT, &call->events);
1045 } 1041 }
1046 write_unlock_bh(&call->state_lock); 1042 write_unlock_bh(&call->state_lock);
1047 1043
@@ -1050,7 +1046,7 @@ void rxrpc_process_call(struct work_struct *work)
1050 ETIME, true) < 0) 1046 ETIME, true) < 0)
1051 goto no_mem; 1047 goto no_mem;
1052 1048
1053 clear_bit(RXRPC_CALL_LIFE_TIMER, &call->events); 1049 clear_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events);
1054 goto kill_ACKs; 1050 goto kill_ACKs;
1055 } 1051 }
1056 1052
@@ -1071,13 +1067,13 @@ void rxrpc_process_call(struct work_struct *work)
1071 } 1067 }
1072 1068
1073 /* handle resending */ 1069 /* handle resending */
1074 if (test_and_clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events)) 1070 if (test_and_clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events))
1075 rxrpc_resend_timer(call); 1071 rxrpc_resend_timer(call);
1076 if (test_and_clear_bit(RXRPC_CALL_RESEND, &call->events)) 1072 if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events))
1077 rxrpc_resend(call); 1073 rxrpc_resend(call);
1078 1074
1079 /* consider sending an ordinary ACK */ 1075 /* consider sending an ordinary ACK */
1080 if (test_bit(RXRPC_CALL_ACK, &call->events)) { 1076 if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) {
1081 _debug("send ACK: window: %d - %d { %lx }", 1077 _debug("send ACK: window: %d - %d { %lx }",
1082 call->rx_data_eaten, call->ackr_win_top, 1078 call->rx_data_eaten, call->ackr_win_top,
1083 call->ackr_window[0]); 1079 call->ackr_window[0]);
@@ -1085,11 +1081,11 @@ void rxrpc_process_call(struct work_struct *work)
1085 if (call->state > RXRPC_CALL_SERVER_ACK_REQUEST && 1081 if (call->state > RXRPC_CALL_SERVER_ACK_REQUEST &&
1086 call->ackr_reason != RXRPC_ACK_PING_RESPONSE) { 1082 call->ackr_reason != RXRPC_ACK_PING_RESPONSE) {
1087 /* ACK by sending reply DATA packet in this state */ 1083 /* ACK by sending reply DATA packet in this state */
1088 clear_bit(RXRPC_CALL_ACK, &call->events); 1084 clear_bit(RXRPC_CALL_EV_ACK, &call->events);
1089 goto maybe_reschedule; 1085 goto maybe_reschedule;
1090 } 1086 }
1091 1087
1092 genbit = RXRPC_CALL_ACK; 1088 genbit = RXRPC_CALL_EV_ACK;
1093 1089
1094 acks = kzalloc(call->ackr_win_top - call->rx_data_eaten, 1090 acks = kzalloc(call->ackr_win_top - call->rx_data_eaten,
1095 GFP_NOFS); 1091 GFP_NOFS);
@@ -1099,13 +1095,11 @@ void rxrpc_process_call(struct work_struct *work)
1099 //hdr.flags = RXRPC_SLOW_START_OK; 1095 //hdr.flags = RXRPC_SLOW_START_OK;
1100 ack.bufferSpace = htons(8); 1096 ack.bufferSpace = htons(8);
1101 ack.maxSkew = 0; 1097 ack.maxSkew = 0;
1102 ack.serial = 0;
1103 ack.reason = 0;
1104 1098
1105 spin_lock_bh(&call->lock); 1099 spin_lock_bh(&call->lock);
1106 ack.reason = call->ackr_reason; 1100 ack.reason = call->ackr_reason;
1107 ack.serial = call->ackr_serial; 1101 ack.serial = htonl(call->ackr_serial);
1108 ack.previousPacket = call->ackr_prev_seq; 1102 ack.previousPacket = htonl(call->ackr_prev_seq);
1109 ack.firstPacket = htonl(call->rx_data_eaten + 1); 1103 ack.firstPacket = htonl(call->rx_data_eaten + 1);
1110 1104
1111 ack.nAcks = 0; 1105 ack.nAcks = 0;
@@ -1152,7 +1146,7 @@ void rxrpc_process_call(struct work_struct *work)
1152 1146
1153 /* handle completion of security negotiations on an incoming 1147 /* handle completion of security negotiations on an incoming
1154 * connection */ 1148 * connection */
1155 if (test_and_clear_bit(RXRPC_CALL_SECURED, &call->events)) { 1149 if (test_and_clear_bit(RXRPC_CALL_EV_SECURED, &call->events)) {
1156 _debug("secured"); 1150 _debug("secured");
1157 spin_lock_bh(&call->lock); 1151 spin_lock_bh(&call->lock);
1158 1152
@@ -1160,7 +1154,7 @@ void rxrpc_process_call(struct work_struct *work)
1160 _debug("securing"); 1154 _debug("securing");
1161 write_lock(&call->conn->lock); 1155 write_lock(&call->conn->lock);
1162 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && 1156 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
1163 !test_bit(RXRPC_CALL_RELEASE, &call->events)) { 1157 !test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
1164 _debug("not released"); 1158 _debug("not released");
1165 call->state = RXRPC_CALL_SERVER_ACCEPTING; 1159 call->state = RXRPC_CALL_SERVER_ACCEPTING;
1166 list_move_tail(&call->accept_link, 1160 list_move_tail(&call->accept_link,
@@ -1169,39 +1163,39 @@ void rxrpc_process_call(struct work_struct *work)
1169 write_unlock(&call->conn->lock); 1163 write_unlock(&call->conn->lock);
1170 read_lock(&call->state_lock); 1164 read_lock(&call->state_lock);
1171 if (call->state < RXRPC_CALL_COMPLETE) 1165 if (call->state < RXRPC_CALL_COMPLETE)
1172 set_bit(RXRPC_CALL_POST_ACCEPT, &call->events); 1166 set_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events);
1173 read_unlock(&call->state_lock); 1167 read_unlock(&call->state_lock);
1174 } 1168 }
1175 1169
1176 spin_unlock_bh(&call->lock); 1170 spin_unlock_bh(&call->lock);
1177 if (!test_bit(RXRPC_CALL_POST_ACCEPT, &call->events)) 1171 if (!test_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events))
1178 goto maybe_reschedule; 1172 goto maybe_reschedule;
1179 } 1173 }
1180 1174
1181 /* post a notification of an acceptable connection to the app */ 1175 /* post a notification of an acceptable connection to the app */
1182 if (test_bit(RXRPC_CALL_POST_ACCEPT, &call->events)) { 1176 if (test_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events)) {
1183 _debug("post accept"); 1177 _debug("post accept");
1184 if (rxrpc_post_message(call, RXRPC_SKB_MARK_NEW_CALL, 1178 if (rxrpc_post_message(call, RXRPC_SKB_MARK_NEW_CALL,
1185 0, false) < 0) 1179 0, false) < 0)
1186 goto no_mem; 1180 goto no_mem;
1187 clear_bit(RXRPC_CALL_POST_ACCEPT, &call->events); 1181 clear_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events);
1188 goto maybe_reschedule; 1182 goto maybe_reschedule;
1189 } 1183 }
1190 1184
1191 /* handle incoming call acceptance */ 1185 /* handle incoming call acceptance */
1192 if (test_and_clear_bit(RXRPC_CALL_ACCEPTED, &call->events)) { 1186 if (test_and_clear_bit(RXRPC_CALL_EV_ACCEPTED, &call->events)) {
1193 _debug("accepted"); 1187 _debug("accepted");
1194 ASSERTCMP(call->rx_data_post, ==, 0); 1188 ASSERTCMP(call->rx_data_post, ==, 0);
1195 call->rx_data_post = 1; 1189 call->rx_data_post = 1;
1196 read_lock_bh(&call->state_lock); 1190 read_lock_bh(&call->state_lock);
1197 if (call->state < RXRPC_CALL_COMPLETE) 1191 if (call->state < RXRPC_CALL_COMPLETE)
1198 set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events); 1192 set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events);
1199 read_unlock_bh(&call->state_lock); 1193 read_unlock_bh(&call->state_lock);
1200 } 1194 }
1201 1195
1202 /* drain the out of sequence received packet queue into the packet Rx 1196 /* drain the out of sequence received packet queue into the packet Rx
1203 * queue */ 1197 * queue */
1204 if (test_and_clear_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events)) { 1198 if (test_and_clear_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events)) {
1205 while (call->rx_data_post == call->rx_first_oos) 1199 while (call->rx_data_post == call->rx_first_oos)
1206 if (rxrpc_drain_rx_oos_queue(call) < 0) 1200 if (rxrpc_drain_rx_oos_queue(call) < 0)
1207 break; 1201 break;
@@ -1224,9 +1218,10 @@ send_ACK:
1224 ackinfo.rxMTU = htonl(rxrpc_rx_mtu); 1218 ackinfo.rxMTU = htonl(rxrpc_rx_mtu);
1225 ackinfo.jumbo_max = htonl(rxrpc_rx_jumbo_max); 1219 ackinfo.jumbo_max = htonl(rxrpc_rx_jumbo_max);
1226 1220
1227 hdr.serial = htonl(atomic_inc_return(&call->conn->serial)); 1221 serial = atomic_inc_return(&call->conn->serial);
1222 whdr.serial = htonl(serial);
1228 _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", 1223 _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
1229 ntohl(hdr.serial), 1224 serial,
1230 ntohs(ack.maxSkew), 1225 ntohs(ack.maxSkew),
1231 ntohl(ack.firstPacket), 1226 ntohl(ack.firstPacket),
1232 ntohl(ack.previousPacket), 1227 ntohl(ack.previousPacket),
@@ -1242,8 +1237,9 @@ send_ACK:
1242send_message: 1237send_message:
1243 _debug("send message"); 1238 _debug("send message");
1244 1239
1245 hdr.serial = htonl(atomic_inc_return(&call->conn->serial)); 1240 serial = atomic_inc_return(&call->conn->serial);
1246 _proto("Tx %s %%%u", rxrpc_pkts[hdr.type], ntohl(hdr.serial)); 1241 whdr.serial = htonl(serial);
1242 _proto("Tx %s %%%u", rxrpc_pkts[whdr.type], serial);
1247send_message_2: 1243send_message_2:
1248 1244
1249 len = iov[0].iov_len; 1245 len = iov[0].iov_len;
@@ -1280,12 +1276,12 @@ send_message_2:
1280 } 1276 }
1281 1277
1282 switch (genbit) { 1278 switch (genbit) {
1283 case RXRPC_CALL_ABORT: 1279 case RXRPC_CALL_EV_ABORT:
1284 clear_bit(genbit, &call->events); 1280 clear_bit(genbit, &call->events);
1285 clear_bit(RXRPC_CALL_RCVD_ABORT, &call->events); 1281 clear_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events);
1286 goto kill_ACKs; 1282 goto kill_ACKs;
1287 1283
1288 case RXRPC_CALL_ACK_FINAL: 1284 case RXRPC_CALL_EV_ACK_FINAL:
1289 write_lock_bh(&call->state_lock); 1285 write_lock_bh(&call->state_lock);
1290 if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK) 1286 if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK)
1291 call->state = RXRPC_CALL_COMPLETE; 1287 call->state = RXRPC_CALL_COMPLETE;
@@ -1310,9 +1306,9 @@ send_message_2:
1310 1306
1311kill_ACKs: 1307kill_ACKs:
1312 del_timer_sync(&call->ack_timer); 1308 del_timer_sync(&call->ack_timer);
1313 if (test_and_clear_bit(RXRPC_CALL_ACK_FINAL, &call->events)) 1309 if (test_and_clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events))
1314 rxrpc_put_call(call); 1310 rxrpc_put_call(call);
1315 clear_bit(RXRPC_CALL_ACK, &call->events); 1311 clear_bit(RXRPC_CALL_EV_ACK, &call->events);
1316 1312
1317maybe_reschedule: 1313maybe_reschedule:
1318 if (call->events || !skb_queue_empty(&call->rx_queue)) { 1314 if (call->events || !skb_queue_empty(&call->rx_queue)) {
@@ -1326,12 +1322,11 @@ maybe_reschedule:
1326 if (call->state >= RXRPC_CALL_COMPLETE && 1322 if (call->state >= RXRPC_CALL_COMPLETE &&
1327 !list_empty(&call->accept_link)) { 1323 !list_empty(&call->accept_link)) {
1328 _debug("X unlinking once-pending call %p { e=%lx f=%lx c=%x }", 1324 _debug("X unlinking once-pending call %p { e=%lx f=%lx c=%x }",
1329 call, call->events, call->flags, 1325 call, call->events, call->flags, call->conn->cid);
1330 ntohl(call->conn->cid));
1331 1326
1332 read_lock_bh(&call->state_lock); 1327 read_lock_bh(&call->state_lock);
1333 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && 1328 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
1334 !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events)) 1329 !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
1335 rxrpc_queue_call(call); 1330 rxrpc_queue_call(call);
1336 read_unlock_bh(&call->state_lock); 1331 read_unlock_bh(&call->state_lock);
1337 } 1332 }
@@ -1345,7 +1340,7 @@ error:
1345 * this means there's a race between clearing the flag and setting the 1340 * this means there's a race between clearing the flag and setting the
1346 * work pending bit and the work item being processed again */ 1341 * work pending bit and the work item being processed again */
1347 if (call->events && !work_pending(&call->processor)) { 1342 if (call->events && !work_pending(&call->processor)) {
1348 _debug("jumpstart %x", ntohl(call->conn->cid)); 1343 _debug("jumpstart %x", call->conn->cid);
1349 rxrpc_queue_call(call); 1344 rxrpc_queue_call(call);
1350 } 1345 }
1351 1346
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
index a9e05db0f5d5..7c8d300ade9b 100644
--- a/net/rxrpc/ar-call.c
+++ b/net/rxrpc/ar-call.c
@@ -21,14 +21,14 @@
21/* 21/*
22 * Maximum lifetime of a call (in jiffies). 22 * Maximum lifetime of a call (in jiffies).
23 */ 23 */
24unsigned rxrpc_max_call_lifetime = 60 * HZ; 24unsigned int rxrpc_max_call_lifetime = 60 * HZ;
25 25
26/* 26/*
27 * Time till dead call expires after last use (in jiffies). 27 * Time till dead call expires after last use (in jiffies).
28 */ 28 */
29unsigned rxrpc_dead_call_expiry = 2 * HZ; 29unsigned int rxrpc_dead_call_expiry = 2 * HZ;
30 30
31const char *const rxrpc_call_states[] = { 31const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = {
32 [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq", 32 [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq",
33 [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", 33 [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl",
34 [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", 34 [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl",
@@ -64,11 +64,11 @@ static DEFINE_HASHTABLE(rxrpc_call_hash, 10);
64 * Hash function for rxrpc_call_hash 64 * Hash function for rxrpc_call_hash
65 */ 65 */
66static unsigned long rxrpc_call_hashfunc( 66static unsigned long rxrpc_call_hashfunc(
67 u8 clientflag, 67 u8 in_clientflag,
68 __be32 cid, 68 u32 cid,
69 __be32 call_id, 69 u32 call_id,
70 __be32 epoch, 70 u32 epoch,
71 __be16 service_id, 71 u16 service_id,
72 sa_family_t proto, 72 sa_family_t proto,
73 void *localptr, 73 void *localptr,
74 unsigned int addr_size, 74 unsigned int addr_size,
@@ -77,7 +77,6 @@ static unsigned long rxrpc_call_hashfunc(
77 const u16 *p; 77 const u16 *p;
78 unsigned int i; 78 unsigned int i;
79 unsigned long key; 79 unsigned long key;
80 u32 hcid = ntohl(cid);
81 80
82 _enter(""); 81 _enter("");
83 82
@@ -85,12 +84,12 @@ static unsigned long rxrpc_call_hashfunc(
85 /* We just want to add up the __be32 values, so forcing the 84 /* We just want to add up the __be32 values, so forcing the
86 * cast should be okay. 85 * cast should be okay.
87 */ 86 */
88 key += (__force u32)epoch; 87 key += epoch;
89 key += (__force u16)service_id; 88 key += service_id;
90 key += (__force u32)call_id; 89 key += call_id;
91 key += (hcid & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT; 90 key += (cid & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT;
92 key += hcid & RXRPC_CHANNELMASK; 91 key += cid & RXRPC_CHANNELMASK;
93 key += clientflag; 92 key += in_clientflag;
94 key += proto; 93 key += proto;
95 /* Step through the peer address in 16-bit portions for speed */ 94 /* Step through the peer address in 16-bit portions for speed */
96 for (i = 0, p = (const u16 *)peer_addr; i < addr_size >> 1; i++, p++) 95 for (i = 0, p = (const u16 *)peer_addr; i < addr_size >> 1; i++, p++)
@@ -148,19 +147,16 @@ static void rxrpc_call_hash_del(struct rxrpc_call *call)
148 * isn't there. 147 * isn't there.
149 */ 148 */
150struct rxrpc_call *rxrpc_find_call_hash( 149struct rxrpc_call *rxrpc_find_call_hash(
151 u8 clientflag, 150 struct rxrpc_host_header *hdr,
152 __be32 cid,
153 __be32 call_id,
154 __be32 epoch,
155 __be16 service_id,
156 void *localptr, 151 void *localptr,
157 sa_family_t proto, 152 sa_family_t proto,
158 const u8 *peer_addr) 153 const void *peer_addr)
159{ 154{
160 unsigned long key; 155 unsigned long key;
161 unsigned int addr_size = 0; 156 unsigned int addr_size = 0;
162 struct rxrpc_call *call = NULL; 157 struct rxrpc_call *call = NULL;
163 struct rxrpc_call *ret = NULL; 158 struct rxrpc_call *ret = NULL;
159 u8 in_clientflag = hdr->flags & RXRPC_CLIENT_INITIATED;
164 160
165 _enter(""); 161 _enter("");
166 switch (proto) { 162 switch (proto) {
@@ -174,20 +170,21 @@ struct rxrpc_call *rxrpc_find_call_hash(
174 break; 170 break;
175 } 171 }
176 172
177 key = rxrpc_call_hashfunc(clientflag, cid, call_id, epoch, 173 key = rxrpc_call_hashfunc(in_clientflag, hdr->cid, hdr->callNumber,
178 service_id, proto, localptr, addr_size, 174 hdr->epoch, hdr->serviceId,
175 proto, localptr, addr_size,
179 peer_addr); 176 peer_addr);
180 hash_for_each_possible_rcu(rxrpc_call_hash, call, hash_node, key) { 177 hash_for_each_possible_rcu(rxrpc_call_hash, call, hash_node, key) {
181 if (call->hash_key == key && 178 if (call->hash_key == key &&
182 call->call_id == call_id && 179 call->call_id == hdr->callNumber &&
183 call->cid == cid && 180 call->cid == hdr->cid &&
184 call->in_clientflag == clientflag && 181 call->in_clientflag == in_clientflag &&
185 call->service_id == service_id && 182 call->service_id == hdr->serviceId &&
186 call->proto == proto && 183 call->proto == proto &&
187 call->local == localptr && 184 call->local == localptr &&
188 memcmp(call->peer_ip.ipv6_addr, peer_addr, 185 memcmp(call->peer_ip.ipv6_addr, peer_addr,
189 addr_size) == 0 && 186 addr_size) == 0 &&
190 call->epoch == epoch) { 187 call->epoch == hdr->epoch) {
191 ret = call; 188 ret = call;
192 break; 189 break;
193 } 190 }
@@ -414,12 +411,12 @@ found_extant_second:
414 */ 411 */
415struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, 412struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
416 struct rxrpc_connection *conn, 413 struct rxrpc_connection *conn,
417 struct rxrpc_header *hdr, 414 struct rxrpc_host_header *hdr,
418 gfp_t gfp) 415 gfp_t gfp)
419{ 416{
420 struct rxrpc_call *call, *candidate; 417 struct rxrpc_call *call, *candidate;
421 struct rb_node **p, *parent; 418 struct rb_node **p, *parent;
422 __be32 call_id; 419 u32 call_id;
423 420
424 _enter(",%d,,%x", conn->debug_id, gfp); 421 _enter(",%d,,%x", conn->debug_id, gfp);
425 422
@@ -433,7 +430,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
433 candidate->conn = conn; 430 candidate->conn = conn;
434 candidate->cid = hdr->cid; 431 candidate->cid = hdr->cid;
435 candidate->call_id = hdr->callNumber; 432 candidate->call_id = hdr->callNumber;
436 candidate->channel = ntohl(hdr->cid) & RXRPC_CHANNELMASK; 433 candidate->channel = hdr->cid & RXRPC_CHANNELMASK;
437 candidate->rx_data_post = 0; 434 candidate->rx_data_post = 0;
438 candidate->state = RXRPC_CALL_SERVER_ACCEPTING; 435 candidate->state = RXRPC_CALL_SERVER_ACCEPTING;
439 if (conn->security_ix > 0) 436 if (conn->security_ix > 0)
@@ -452,7 +449,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
452 read_lock(&call->state_lock); 449 read_lock(&call->state_lock);
453 switch (call->state) { 450 switch (call->state) {
454 case RXRPC_CALL_LOCALLY_ABORTED: 451 case RXRPC_CALL_LOCALLY_ABORTED:
455 if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events)) 452 if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events))
456 rxrpc_queue_call(call); 453 rxrpc_queue_call(call);
457 case RXRPC_CALL_REMOTELY_ABORTED: 454 case RXRPC_CALL_REMOTELY_ABORTED:
458 read_unlock(&call->state_lock); 455 read_unlock(&call->state_lock);
@@ -492,9 +489,9 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
492 /* The tree is sorted in order of the __be32 value without 489 /* The tree is sorted in order of the __be32 value without
493 * turning it into host order. 490 * turning it into host order.
494 */ 491 */
495 if ((__force u32)call_id < (__force u32)call->call_id) 492 if (call_id < call->call_id)
496 p = &(*p)->rb_left; 493 p = &(*p)->rb_left;
497 else if ((__force u32)call_id > (__force u32)call->call_id) 494 else if (call_id > call->call_id)
498 p = &(*p)->rb_right; 495 p = &(*p)->rb_right;
499 else 496 else
500 goto old_call; 497 goto old_call;
@@ -686,7 +683,7 @@ void rxrpc_release_call(struct rxrpc_call *call)
686 _debug("+++ ABORTING STATE %d +++\n", call->state); 683 _debug("+++ ABORTING STATE %d +++\n", call->state);
687 call->state = RXRPC_CALL_LOCALLY_ABORTED; 684 call->state = RXRPC_CALL_LOCALLY_ABORTED;
688 call->abort_code = RX_CALL_DEAD; 685 call->abort_code = RX_CALL_DEAD;
689 set_bit(RXRPC_CALL_ABORT, &call->events); 686 set_bit(RXRPC_CALL_EV_ABORT, &call->events);
690 rxrpc_queue_call(call); 687 rxrpc_queue_call(call);
691 } 688 }
692 write_unlock(&call->state_lock); 689 write_unlock(&call->state_lock);
@@ -714,8 +711,7 @@ void rxrpc_release_call(struct rxrpc_call *call)
714 711
715 _debug("- zap %s %%%u #%u", 712 _debug("- zap %s %%%u #%u",
716 rxrpc_pkts[sp->hdr.type], 713 rxrpc_pkts[sp->hdr.type],
717 ntohl(sp->hdr.serial), 714 sp->hdr.serial, sp->hdr.seq);
718 ntohl(sp->hdr.seq));
719 rxrpc_free_skb(skb); 715 rxrpc_free_skb(skb);
720 spin_lock_bh(&call->lock); 716 spin_lock_bh(&call->lock);
721 } 717 }
@@ -763,10 +759,10 @@ static void rxrpc_mark_call_released(struct rxrpc_call *call)
763 _debug("abort call %p", call); 759 _debug("abort call %p", call);
764 call->state = RXRPC_CALL_LOCALLY_ABORTED; 760 call->state = RXRPC_CALL_LOCALLY_ABORTED;
765 call->abort_code = RX_CALL_DEAD; 761 call->abort_code = RX_CALL_DEAD;
766 if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events)) 762 if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events))
767 sched = true; 763 sched = true;
768 } 764 }
769 if (!test_and_set_bit(RXRPC_CALL_RELEASE, &call->events)) 765 if (!test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
770 sched = true; 766 sched = true;
771 if (sched) 767 if (sched)
772 rxrpc_queue_call(call); 768 rxrpc_queue_call(call);
@@ -873,9 +869,9 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call)
873 unsigned long _skb; 869 unsigned long _skb;
874 870
875 _skb = call->acks_window[call->acks_tail] & ~1; 871 _skb = call->acks_window[call->acks_tail] & ~1;
876 sp = rxrpc_skb((struct sk_buff *) _skb); 872 sp = rxrpc_skb((struct sk_buff *)_skb);
877 _debug("+++ clear Tx %u", ntohl(sp->hdr.seq)); 873 _debug("+++ clear Tx %u", sp->hdr.seq);
878 rxrpc_free_skb((struct sk_buff *) _skb); 874 rxrpc_free_skb((struct sk_buff *)_skb);
879 call->acks_tail = 875 call->acks_tail =
880 (call->acks_tail + 1) & (call->acks_winsz - 1); 876 (call->acks_tail + 1) & (call->acks_winsz - 1);
881 } 877 }
@@ -975,7 +971,7 @@ static void rxrpc_call_life_expired(unsigned long _call)
975 _enter("{%d}", call->debug_id); 971 _enter("{%d}", call->debug_id);
976 read_lock_bh(&call->state_lock); 972 read_lock_bh(&call->state_lock);
977 if (call->state < RXRPC_CALL_COMPLETE) { 973 if (call->state < RXRPC_CALL_COMPLETE) {
978 set_bit(RXRPC_CALL_LIFE_TIMER, &call->events); 974 set_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events);
979 rxrpc_queue_call(call); 975 rxrpc_queue_call(call);
980 } 976 }
981 read_unlock_bh(&call->state_lock); 977 read_unlock_bh(&call->state_lock);
@@ -995,7 +991,7 @@ static void rxrpc_resend_time_expired(unsigned long _call)
995 return; 991 return;
996 992
997 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); 993 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
998 if (!test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events)) 994 if (!test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events))
999 rxrpc_queue_call(call); 995 rxrpc_queue_call(call);
1000} 996}
1001 997
@@ -1013,7 +1009,7 @@ static void rxrpc_ack_time_expired(unsigned long _call)
1013 1009
1014 read_lock_bh(&call->state_lock); 1010 read_lock_bh(&call->state_lock);
1015 if (call->state < RXRPC_CALL_COMPLETE && 1011 if (call->state < RXRPC_CALL_COMPLETE &&
1016 !test_and_set_bit(RXRPC_CALL_ACK, &call->events)) 1012 !test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events))
1017 rxrpc_queue_call(call); 1013 rxrpc_queue_call(call);
1018 read_unlock_bh(&call->state_lock); 1014 read_unlock_bh(&call->state_lock);
1019} 1015}
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 6c71ed1caf16..9942da1edbf6 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -21,7 +21,7 @@
21/* 21/*
22 * Time till a connection expires after last use (in seconds). 22 * Time till a connection expires after last use (in seconds).
23 */ 23 */
24unsigned rxrpc_connection_expiry = 10 * 60; 24unsigned int rxrpc_connection_expiry = 10 * 60;
25 25
26static void rxrpc_connection_reaper(struct work_struct *work); 26static void rxrpc_connection_reaper(struct work_struct *work);
27 27
@@ -57,10 +57,10 @@ static struct rxrpc_conn_bundle *rxrpc_alloc_bundle(gfp_t gfp)
57 */ 57 */
58static inline 58static inline
59int rxrpc_cmp_bundle(const struct rxrpc_conn_bundle *bundle, 59int rxrpc_cmp_bundle(const struct rxrpc_conn_bundle *bundle,
60 struct key *key, __be16 service_id) 60 struct key *key, u16 service_id)
61{ 61{
62 return (bundle->service_id - service_id) ?: 62 return (bundle->service_id - service_id) ?:
63 ((unsigned long) bundle->key - (unsigned long) key); 63 ((unsigned long)bundle->key - (unsigned long)key);
64} 64}
65 65
66/* 66/*
@@ -69,14 +69,14 @@ int rxrpc_cmp_bundle(const struct rxrpc_conn_bundle *bundle,
69struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *rx, 69struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *rx,
70 struct rxrpc_transport *trans, 70 struct rxrpc_transport *trans,
71 struct key *key, 71 struct key *key,
72 __be16 service_id, 72 u16 service_id,
73 gfp_t gfp) 73 gfp_t gfp)
74{ 74{
75 struct rxrpc_conn_bundle *bundle, *candidate; 75 struct rxrpc_conn_bundle *bundle, *candidate;
76 struct rb_node *p, *parent, **pp; 76 struct rb_node *p, *parent, **pp;
77 77
78 _enter("%p{%x},%x,%hx,", 78 _enter("%p{%x},%x,%hx,",
79 rx, key_serial(key), trans->debug_id, ntohs(service_id)); 79 rx, key_serial(key), trans->debug_id, service_id);
80 80
81 if (rx->trans == trans && rx->bundle) { 81 if (rx->trans == trans && rx->bundle) {
82 atomic_inc(&rx->bundle->usage); 82 atomic_inc(&rx->bundle->usage);
@@ -213,7 +213,7 @@ static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
213 conn->debug_id = atomic_inc_return(&rxrpc_debug_id); 213 conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
214 conn->avail_calls = RXRPC_MAXCALLS; 214 conn->avail_calls = RXRPC_MAXCALLS;
215 conn->size_align = 4; 215 conn->size_align = 4;
216 conn->header_size = sizeof(struct rxrpc_header); 216 conn->header_size = sizeof(struct rxrpc_wire_header);
217 } 217 }
218 218
219 _leave(" = %p{%d}", conn, conn ? conn->debug_id : 0); 219 _leave(" = %p{%d}", conn, conn ? conn->debug_id : 0);
@@ -230,7 +230,7 @@ static void rxrpc_assign_connection_id(struct rxrpc_connection *conn)
230 struct rxrpc_connection *xconn; 230 struct rxrpc_connection *xconn;
231 struct rb_node *parent, **p; 231 struct rb_node *parent, **p;
232 __be32 epoch; 232 __be32 epoch;
233 u32 real_conn_id; 233 u32 cid;
234 234
235 _enter(""); 235 _enter("");
236 236
@@ -241,7 +241,7 @@ static void rxrpc_assign_connection_id(struct rxrpc_connection *conn)
241 conn->trans->conn_idcounter += RXRPC_CID_INC; 241 conn->trans->conn_idcounter += RXRPC_CID_INC;
242 if (conn->trans->conn_idcounter < RXRPC_CID_INC) 242 if (conn->trans->conn_idcounter < RXRPC_CID_INC)
243 conn->trans->conn_idcounter = RXRPC_CID_INC; 243 conn->trans->conn_idcounter = RXRPC_CID_INC;
244 real_conn_id = conn->trans->conn_idcounter; 244 cid = conn->trans->conn_idcounter;
245 245
246attempt_insertion: 246attempt_insertion:
247 parent = NULL; 247 parent = NULL;
@@ -255,9 +255,9 @@ attempt_insertion:
255 p = &(*p)->rb_left; 255 p = &(*p)->rb_left;
256 else if (epoch > xconn->epoch) 256 else if (epoch > xconn->epoch)
257 p = &(*p)->rb_right; 257 p = &(*p)->rb_right;
258 else if (real_conn_id < xconn->real_conn_id) 258 else if (cid < xconn->cid)
259 p = &(*p)->rb_left; 259 p = &(*p)->rb_left;
260 else if (real_conn_id > xconn->real_conn_id) 260 else if (cid > xconn->cid)
261 p = &(*p)->rb_right; 261 p = &(*p)->rb_right;
262 else 262 else
263 goto id_exists; 263 goto id_exists;
@@ -268,20 +268,19 @@ attempt_insertion:
268 rb_link_node(&conn->node, parent, p); 268 rb_link_node(&conn->node, parent, p);
269 rb_insert_color(&conn->node, &conn->trans->client_conns); 269 rb_insert_color(&conn->node, &conn->trans->client_conns);
270 270
271 conn->real_conn_id = real_conn_id; 271 conn->cid = cid;
272 conn->cid = htonl(real_conn_id);
273 write_unlock_bh(&conn->trans->conn_lock); 272 write_unlock_bh(&conn->trans->conn_lock);
274 _leave(" [CONNID %x CID %x]", real_conn_id, ntohl(conn->cid)); 273 _leave(" [CID %x]", cid);
275 return; 274 return;
276 275
277 /* we found a connection with the proposed ID - walk the tree from that 276 /* we found a connection with the proposed ID - walk the tree from that
278 * point looking for the next unused ID */ 277 * point looking for the next unused ID */
279id_exists: 278id_exists:
280 for (;;) { 279 for (;;) {
281 real_conn_id += RXRPC_CID_INC; 280 cid += RXRPC_CID_INC;
282 if (real_conn_id < RXRPC_CID_INC) { 281 if (cid < RXRPC_CID_INC) {
283 real_conn_id = RXRPC_CID_INC; 282 cid = RXRPC_CID_INC;
284 conn->trans->conn_idcounter = real_conn_id; 283 conn->trans->conn_idcounter = cid;
285 goto attempt_insertion; 284 goto attempt_insertion;
286 } 285 }
287 286
@@ -291,7 +290,7 @@ id_exists:
291 290
292 xconn = rb_entry(parent, struct rxrpc_connection, node); 291 xconn = rb_entry(parent, struct rxrpc_connection, node);
293 if (epoch < xconn->epoch || 292 if (epoch < xconn->epoch ||
294 real_conn_id < xconn->real_conn_id) 293 cid < xconn->cid)
295 goto attempt_insertion; 294 goto attempt_insertion;
296 } 295 }
297} 296}
@@ -334,7 +333,7 @@ static void rxrpc_add_call_ID_to_conn(struct rxrpc_connection *conn,
334 */ 333 */
335static int rxrpc_connect_exclusive(struct rxrpc_sock *rx, 334static int rxrpc_connect_exclusive(struct rxrpc_sock *rx,
336 struct rxrpc_transport *trans, 335 struct rxrpc_transport *trans,
337 __be16 service_id, 336 u16 service_id,
338 struct rxrpc_call *call, 337 struct rxrpc_call *call,
339 gfp_t gfp) 338 gfp_t gfp)
340{ 339{
@@ -404,11 +403,11 @@ found_channel:
404 conn->channels[chan] = call; 403 conn->channels[chan] = call;
405 call->conn = conn; 404 call->conn = conn;
406 call->channel = chan; 405 call->channel = chan;
407 call->cid = conn->cid | htonl(chan); 406 call->cid = conn->cid | chan;
408 call->call_id = htonl(++conn->call_counter); 407 call->call_id = ++conn->call_counter;
409 408
410 _net("CONNECT client on conn %d chan %d as call %x", 409 _net("CONNECT client on conn %d chan %d as call %x",
411 conn->debug_id, chan, ntohl(call->call_id)); 410 conn->debug_id, chan, call->call_id);
412 411
413 spin_unlock(&trans->client_lock); 412 spin_unlock(&trans->client_lock);
414 413
@@ -593,11 +592,11 @@ found_channel:
593 conn->channels[chan] = call; 592 conn->channels[chan] = call;
594 call->conn = conn; 593 call->conn = conn;
595 call->channel = chan; 594 call->channel = chan;
596 call->cid = conn->cid | htonl(chan); 595 call->cid = conn->cid | chan;
597 call->call_id = htonl(++conn->call_counter); 596 call->call_id = ++conn->call_counter;
598 597
599 _net("CONNECT client on conn %d chan %d as call %x", 598 _net("CONNECT client on conn %d chan %d as call %x",
600 conn->debug_id, chan, ntohl(call->call_id)); 599 conn->debug_id, chan, call->call_id);
601 600
602 ASSERTCMP(conn->avail_calls, <, RXRPC_MAXCALLS); 601 ASSERTCMP(conn->avail_calls, <, RXRPC_MAXCALLS);
603 spin_unlock(&trans->client_lock); 602 spin_unlock(&trans->client_lock);
@@ -620,21 +619,21 @@ interrupted:
620 */ 619 */
621struct rxrpc_connection * 620struct rxrpc_connection *
622rxrpc_incoming_connection(struct rxrpc_transport *trans, 621rxrpc_incoming_connection(struct rxrpc_transport *trans,
623 struct rxrpc_header *hdr, 622 struct rxrpc_host_header *hdr,
624 gfp_t gfp) 623 gfp_t gfp)
625{ 624{
626 struct rxrpc_connection *conn, *candidate = NULL; 625 struct rxrpc_connection *conn, *candidate = NULL;
627 struct rb_node *p, **pp; 626 struct rb_node *p, **pp;
628 const char *new = "old"; 627 const char *new = "old";
629 __be32 epoch; 628 __be32 epoch;
630 u32 conn_id; 629 u32 cid;
631 630
632 _enter(""); 631 _enter("");
633 632
634 ASSERT(hdr->flags & RXRPC_CLIENT_INITIATED); 633 ASSERT(hdr->flags & RXRPC_CLIENT_INITIATED);
635 634
636 epoch = hdr->epoch; 635 epoch = hdr->epoch;
637 conn_id = ntohl(hdr->cid) & RXRPC_CIDMASK; 636 cid = hdr->cid & RXRPC_CIDMASK;
638 637
639 /* search the connection list first */ 638 /* search the connection list first */
640 read_lock_bh(&trans->conn_lock); 639 read_lock_bh(&trans->conn_lock);
@@ -643,15 +642,15 @@ rxrpc_incoming_connection(struct rxrpc_transport *trans,
643 while (p) { 642 while (p) {
644 conn = rb_entry(p, struct rxrpc_connection, node); 643 conn = rb_entry(p, struct rxrpc_connection, node);
645 644
646 _debug("maybe %x", conn->real_conn_id); 645 _debug("maybe %x", conn->cid);
647 646
648 if (epoch < conn->epoch) 647 if (epoch < conn->epoch)
649 p = p->rb_left; 648 p = p->rb_left;
650 else if (epoch > conn->epoch) 649 else if (epoch > conn->epoch)
651 p = p->rb_right; 650 p = p->rb_right;
652 else if (conn_id < conn->real_conn_id) 651 else if (cid < conn->cid)
653 p = p->rb_left; 652 p = p->rb_left;
654 else if (conn_id > conn->real_conn_id) 653 else if (cid > conn->cid)
655 p = p->rb_right; 654 p = p->rb_right;
656 else 655 else
657 goto found_extant_connection; 656 goto found_extant_connection;
@@ -668,12 +667,11 @@ rxrpc_incoming_connection(struct rxrpc_transport *trans,
668 667
669 candidate->trans = trans; 668 candidate->trans = trans;
670 candidate->epoch = hdr->epoch; 669 candidate->epoch = hdr->epoch;
671 candidate->cid = hdr->cid & cpu_to_be32(RXRPC_CIDMASK); 670 candidate->cid = hdr->cid & RXRPC_CIDMASK;
672 candidate->service_id = hdr->serviceId; 671 candidate->service_id = hdr->serviceId;
673 candidate->security_ix = hdr->securityIndex; 672 candidate->security_ix = hdr->securityIndex;
674 candidate->in_clientflag = RXRPC_CLIENT_INITIATED; 673 candidate->in_clientflag = RXRPC_CLIENT_INITIATED;
675 candidate->out_clientflag = 0; 674 candidate->out_clientflag = 0;
676 candidate->real_conn_id = conn_id;
677 candidate->state = RXRPC_CONN_SERVER; 675 candidate->state = RXRPC_CONN_SERVER;
678 if (candidate->service_id) 676 if (candidate->service_id)
679 candidate->state = RXRPC_CONN_SERVER_UNSECURED; 677 candidate->state = RXRPC_CONN_SERVER_UNSECURED;
@@ -690,9 +688,9 @@ rxrpc_incoming_connection(struct rxrpc_transport *trans,
690 pp = &(*pp)->rb_left; 688 pp = &(*pp)->rb_left;
691 else if (epoch > conn->epoch) 689 else if (epoch > conn->epoch)
692 pp = &(*pp)->rb_right; 690 pp = &(*pp)->rb_right;
693 else if (conn_id < conn->real_conn_id) 691 else if (cid < conn->cid)
694 pp = &(*pp)->rb_left; 692 pp = &(*pp)->rb_left;
695 else if (conn_id > conn->real_conn_id) 693 else if (cid > conn->cid)
696 pp = &(*pp)->rb_right; 694 pp = &(*pp)->rb_right;
697 else 695 else
698 goto found_extant_second; 696 goto found_extant_second;
@@ -714,7 +712,7 @@ rxrpc_incoming_connection(struct rxrpc_transport *trans,
714 new = "new"; 712 new = "new";
715 713
716success: 714success:
717 _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->real_conn_id); 715 _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->cid);
718 716
719 _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage)); 717 _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
720 return conn; 718 return conn;
@@ -751,18 +749,17 @@ security_mismatch:
751 * packet 749 * packet
752 */ 750 */
753struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *trans, 751struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *trans,
754 struct rxrpc_header *hdr) 752 struct rxrpc_host_header *hdr)
755{ 753{
756 struct rxrpc_connection *conn; 754 struct rxrpc_connection *conn;
757 struct rb_node *p; 755 struct rb_node *p;
758 __be32 epoch; 756 u32 epoch, cid;
759 u32 conn_id;
760 757
761 _enter(",{%x,%x}", ntohl(hdr->cid), hdr->flags); 758 _enter(",{%x,%x}", hdr->cid, hdr->flags);
762 759
763 read_lock_bh(&trans->conn_lock); 760 read_lock_bh(&trans->conn_lock);
764 761
765 conn_id = ntohl(hdr->cid) & RXRPC_CIDMASK; 762 cid = hdr->cid & RXRPC_CIDMASK;
766 epoch = hdr->epoch; 763 epoch = hdr->epoch;
767 764
768 if (hdr->flags & RXRPC_CLIENT_INITIATED) 765 if (hdr->flags & RXRPC_CLIENT_INITIATED)
@@ -773,15 +770,15 @@ struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *trans,
773 while (p) { 770 while (p) {
774 conn = rb_entry(p, struct rxrpc_connection, node); 771 conn = rb_entry(p, struct rxrpc_connection, node);
775 772
776 _debug("maybe %x", conn->real_conn_id); 773 _debug("maybe %x", conn->cid);
777 774
778 if (epoch < conn->epoch) 775 if (epoch < conn->epoch)
779 p = p->rb_left; 776 p = p->rb_left;
780 else if (epoch > conn->epoch) 777 else if (epoch > conn->epoch)
781 p = p->rb_right; 778 p = p->rb_right;
782 else if (conn_id < conn->real_conn_id) 779 else if (cid < conn->cid)
783 p = p->rb_left; 780 p = p->rb_left;
784 else if (conn_id > conn->real_conn_id) 781 else if (cid > conn->cid)
785 p = p->rb_right; 782 p = p->rb_right;
786 else 783 else
787 goto found; 784 goto found;
diff --git a/net/rxrpc/ar-connevent.c b/net/rxrpc/ar-connevent.c
index e7ed43a54c41..1bdaaed8cdc4 100644
--- a/net/rxrpc/ar-connevent.c
+++ b/net/rxrpc/ar-connevent.c
@@ -42,9 +42,9 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
42 call->state = state; 42 call->state = state;
43 call->abort_code = abort_code; 43 call->abort_code = abort_code;
44 if (state == RXRPC_CALL_LOCALLY_ABORTED) 44 if (state == RXRPC_CALL_LOCALLY_ABORTED)
45 set_bit(RXRPC_CALL_CONN_ABORT, &call->events); 45 set_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events);
46 else 46 else
47 set_bit(RXRPC_CALL_RCVD_ABORT, &call->events); 47 set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events);
48 rxrpc_queue_call(call); 48 rxrpc_queue_call(call);
49 } 49 }
50 write_unlock(&call->state_lock); 50 write_unlock(&call->state_lock);
@@ -60,11 +60,12 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
60static int rxrpc_abort_connection(struct rxrpc_connection *conn, 60static int rxrpc_abort_connection(struct rxrpc_connection *conn,
61 u32 error, u32 abort_code) 61 u32 error, u32 abort_code)
62{ 62{
63 struct rxrpc_header hdr; 63 struct rxrpc_wire_header whdr;
64 struct msghdr msg; 64 struct msghdr msg;
65 struct kvec iov[2]; 65 struct kvec iov[2];
66 __be32 word; 66 __be32 word;
67 size_t len; 67 size_t len;
68 u32 serial;
68 int ret; 69 int ret;
69 70
70 _enter("%d,,%u,%u", conn->debug_id, error, abort_code); 71 _enter("%d,,%u,%u", conn->debug_id, error, abort_code);
@@ -89,28 +90,29 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
89 msg.msg_controllen = 0; 90 msg.msg_controllen = 0;
90 msg.msg_flags = 0; 91 msg.msg_flags = 0;
91 92
92 hdr.epoch = conn->epoch; 93 whdr.epoch = htonl(conn->epoch);
93 hdr.cid = conn->cid; 94 whdr.cid = htonl(conn->cid);
94 hdr.callNumber = 0; 95 whdr.callNumber = 0;
95 hdr.seq = 0; 96 whdr.seq = 0;
96 hdr.type = RXRPC_PACKET_TYPE_ABORT; 97 whdr.type = RXRPC_PACKET_TYPE_ABORT;
97 hdr.flags = conn->out_clientflag; 98 whdr.flags = conn->out_clientflag;
98 hdr.userStatus = 0; 99 whdr.userStatus = 0;
99 hdr.securityIndex = conn->security_ix; 100 whdr.securityIndex = conn->security_ix;
100 hdr._rsvd = 0; 101 whdr._rsvd = 0;
101 hdr.serviceId = conn->service_id; 102 whdr.serviceId = htons(conn->service_id);
102 103
103 word = htonl(abort_code); 104 word = htonl(abort_code);
104 105
105 iov[0].iov_base = &hdr; 106 iov[0].iov_base = &whdr;
106 iov[0].iov_len = sizeof(hdr); 107 iov[0].iov_len = sizeof(whdr);
107 iov[1].iov_base = &word; 108 iov[1].iov_base = &word;
108 iov[1].iov_len = sizeof(word); 109 iov[1].iov_len = sizeof(word);
109 110
110 len = iov[0].iov_len + iov[1].iov_len; 111 len = iov[0].iov_len + iov[1].iov_len;
111 112
112 hdr.serial = htonl(atomic_inc_return(&conn->serial)); 113 serial = atomic_inc_return(&conn->serial);
113 _proto("Tx CONN ABORT %%%u { %d }", ntohl(hdr.serial), abort_code); 114 whdr.serial = htonl(serial);
115 _proto("Tx CONN ABORT %%%u { %d }", serial, abort_code);
114 116
115 ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len); 117 ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len);
116 if (ret < 0) { 118 if (ret < 0) {
@@ -132,7 +134,7 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call)
132 if (call) { 134 if (call) {
133 read_lock(&call->state_lock); 135 read_lock(&call->state_lock);
134 if (call->state < RXRPC_CALL_COMPLETE && 136 if (call->state < RXRPC_CALL_COMPLETE &&
135 !test_and_set_bit(RXRPC_CALL_SECURED, &call->events)) 137 !test_and_set_bit(RXRPC_CALL_EV_SECURED, &call->events))
136 rxrpc_queue_call(call); 138 rxrpc_queue_call(call);
137 read_unlock(&call->state_lock); 139 read_unlock(&call->state_lock);
138 } 140 }
@@ -146,8 +148,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
146 u32 *_abort_code) 148 u32 *_abort_code)
147{ 149{
148 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 150 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
149 __be32 tmp; 151 __be32 wtmp;
150 u32 serial; 152 u32 abort_code;
151 int loop, ret; 153 int loop, ret;
152 154
153 if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { 155 if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
@@ -155,19 +157,18 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
155 return -ECONNABORTED; 157 return -ECONNABORTED;
156 } 158 }
157 159
158 serial = ntohl(sp->hdr.serial); 160 _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);
159
160 _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, serial);
161 161
162 switch (sp->hdr.type) { 162 switch (sp->hdr.type) {
163 case RXRPC_PACKET_TYPE_ABORT: 163 case RXRPC_PACKET_TYPE_ABORT:
164 if (skb_copy_bits(skb, 0, &tmp, sizeof(tmp)) < 0) 164 if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0)
165 return -EPROTO; 165 return -EPROTO;
166 _proto("Rx ABORT %%%u { ac=%d }", serial, ntohl(tmp)); 166 abort_code = ntohl(wtmp);
167 _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code);
167 168
168 conn->state = RXRPC_CONN_REMOTELY_ABORTED; 169 conn->state = RXRPC_CONN_REMOTELY_ABORTED;
169 rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, 170 rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED,
170 ntohl(tmp)); 171 abort_code);
171 return -ECONNABORTED; 172 return -ECONNABORTED;
172 173
173 case RXRPC_PACKET_TYPE_CHALLENGE: 174 case RXRPC_PACKET_TYPE_CHALLENGE:
@@ -335,7 +336,7 @@ void rxrpc_reject_packets(struct work_struct *work)
335 struct sockaddr_in sin; 336 struct sockaddr_in sin;
336 } sa; 337 } sa;
337 struct rxrpc_skb_priv *sp; 338 struct rxrpc_skb_priv *sp;
338 struct rxrpc_header hdr; 339 struct rxrpc_wire_header whdr;
339 struct rxrpc_local *local; 340 struct rxrpc_local *local;
340 struct sk_buff *skb; 341 struct sk_buff *skb;
341 struct msghdr msg; 342 struct msghdr msg;
@@ -348,11 +349,11 @@ void rxrpc_reject_packets(struct work_struct *work)
348 349
349 _enter("%d", local->debug_id); 350 _enter("%d", local->debug_id);
350 351
351 iov[0].iov_base = &hdr; 352 iov[0].iov_base = &whdr;
352 iov[0].iov_len = sizeof(hdr); 353 iov[0].iov_len = sizeof(whdr);
353 iov[1].iov_base = &code; 354 iov[1].iov_base = &code;
354 iov[1].iov_len = sizeof(code); 355 iov[1].iov_len = sizeof(code);
355 size = sizeof(hdr) + sizeof(code); 356 size = sizeof(whdr) + sizeof(code);
356 357
357 msg.msg_name = &sa; 358 msg.msg_name = &sa;
358 msg.msg_control = NULL; 359 msg.msg_control = NULL;
@@ -370,8 +371,8 @@ void rxrpc_reject_packets(struct work_struct *work)
370 break; 371 break;
371 } 372 }
372 373
373 memset(&hdr, 0, sizeof(hdr)); 374 memset(&whdr, 0, sizeof(whdr));
374 hdr.type = RXRPC_PACKET_TYPE_ABORT; 375 whdr.type = RXRPC_PACKET_TYPE_ABORT;
375 376
376 while ((skb = skb_dequeue(&local->reject_queue))) { 377 while ((skb = skb_dequeue(&local->reject_queue))) {
377 sp = rxrpc_skb(skb); 378 sp = rxrpc_skb(skb);
@@ -381,13 +382,13 @@ void rxrpc_reject_packets(struct work_struct *work)
381 sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; 382 sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
382 code = htonl(skb->priority); 383 code = htonl(skb->priority);
383 384
384 hdr.epoch = sp->hdr.epoch; 385 whdr.epoch = htonl(sp->hdr.epoch);
385 hdr.cid = sp->hdr.cid; 386 whdr.cid = htonl(sp->hdr.cid);
386 hdr.callNumber = sp->hdr.callNumber; 387 whdr.callNumber = htonl(sp->hdr.callNumber);
387 hdr.serviceId = sp->hdr.serviceId; 388 whdr.serviceId = htons(sp->hdr.serviceId);
388 hdr.flags = sp->hdr.flags; 389 whdr.flags = sp->hdr.flags;
389 hdr.flags ^= RXRPC_CLIENT_INITIATED; 390 whdr.flags ^= RXRPC_CLIENT_INITIATED;
390 hdr.flags &= RXRPC_CLIENT_INITIATED; 391 whdr.flags &= RXRPC_CLIENT_INITIATED;
391 392
392 kernel_sendmsg(local->socket, &msg, iov, 2, size); 393 kernel_sendmsg(local->socket, &msg, iov, 2, size);
393 break; 394 break;
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
index 0610efa83d72..3e82d6f0313c 100644
--- a/net/rxrpc/ar-error.c
+++ b/net/rxrpc/ar-error.c
@@ -115,7 +115,6 @@ void rxrpc_UDP_error_report(struct sock *sk)
115 /* pass the transport ref to error_handler to release */ 115 /* pass the transport ref to error_handler to release */
116 skb_queue_tail(&trans->error_queue, skb); 116 skb_queue_tail(&trans->error_queue, skb);
117 rxrpc_queue_work(&trans->error_handler); 117 rxrpc_queue_work(&trans->error_handler);
118
119 _leave(""); 118 _leave("");
120} 119}
121 120
@@ -152,28 +151,18 @@ void rxrpc_UDP_error_handler(struct work_struct *work)
152 switch (ee->ee_code) { 151 switch (ee->ee_code) {
153 case ICMP_NET_UNREACH: 152 case ICMP_NET_UNREACH:
154 _net("Rx Received ICMP Network Unreachable"); 153 _net("Rx Received ICMP Network Unreachable");
155 err = ENETUNREACH;
156 break; 154 break;
157 case ICMP_HOST_UNREACH: 155 case ICMP_HOST_UNREACH:
158 _net("Rx Received ICMP Host Unreachable"); 156 _net("Rx Received ICMP Host Unreachable");
159 err = EHOSTUNREACH;
160 break; 157 break;
161 case ICMP_PORT_UNREACH: 158 case ICMP_PORT_UNREACH:
162 _net("Rx Received ICMP Port Unreachable"); 159 _net("Rx Received ICMP Port Unreachable");
163 err = ECONNREFUSED;
164 break;
165 case ICMP_FRAG_NEEDED:
166 _net("Rx Received ICMP Fragmentation Needed (%d)",
167 ee->ee_info);
168 err = 0; /* dealt with elsewhere */
169 break; 160 break;
170 case ICMP_NET_UNKNOWN: 161 case ICMP_NET_UNKNOWN:
171 _net("Rx Received ICMP Unknown Network"); 162 _net("Rx Received ICMP Unknown Network");
172 err = ENETUNREACH;
173 break; 163 break;
174 case ICMP_HOST_UNKNOWN: 164 case ICMP_HOST_UNKNOWN:
175 _net("Rx Received ICMP Unknown Host"); 165 _net("Rx Received ICMP Unknown Host");
176 err = EHOSTUNREACH;
177 break; 166 break;
178 default: 167 default:
179 _net("Rx Received ICMP DestUnreach code=%u", 168 _net("Rx Received ICMP DestUnreach code=%u",
@@ -222,7 +211,7 @@ void rxrpc_UDP_error_handler(struct work_struct *work)
222 if (call->state != RXRPC_CALL_COMPLETE && 211 if (call->state != RXRPC_CALL_COMPLETE &&
223 call->state < RXRPC_CALL_NETWORK_ERROR) { 212 call->state < RXRPC_CALL_NETWORK_ERROR) {
224 call->state = RXRPC_CALL_NETWORK_ERROR; 213 call->state = RXRPC_CALL_NETWORK_ERROR;
225 set_bit(RXRPC_CALL_RCVD_ERROR, &call->events); 214 set_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events);
226 rxrpc_queue_call(call); 215 rxrpc_queue_call(call);
227 } 216 }
228 write_unlock(&call->state_lock); 217 write_unlock(&call->state_lock);
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 4505a691d88c..63ed75c40e29 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -231,7 +231,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
231 _debug("drain rx oos now"); 231 _debug("drain rx oos now");
232 read_lock(&call->state_lock); 232 read_lock(&call->state_lock);
233 if (call->state < RXRPC_CALL_COMPLETE && 233 if (call->state < RXRPC_CALL_COMPLETE &&
234 !test_and_set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events)) 234 !test_and_set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events))
235 rxrpc_queue_call(call); 235 rxrpc_queue_call(call);
236 read_unlock(&call->state_lock); 236 read_unlock(&call->state_lock);
237 } 237 }
@@ -287,12 +287,12 @@ static void rxrpc_assume_implicit_ackall(struct rxrpc_call *call, u32 serial)
287 call->acks_latest = serial; 287 call->acks_latest = serial;
288 288
289 _debug("implicit ACKALL %%%u", call->acks_latest); 289 _debug("implicit ACKALL %%%u", call->acks_latest);
290 set_bit(RXRPC_CALL_RCVD_ACKALL, &call->events); 290 set_bit(RXRPC_CALL_EV_RCVD_ACKALL, &call->events);
291 write_unlock_bh(&call->state_lock); 291 write_unlock_bh(&call->state_lock);
292 292
293 if (try_to_del_timer_sync(&call->resend_timer) >= 0) { 293 if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
294 clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events); 294 clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events);
295 clear_bit(RXRPC_CALL_RESEND, &call->events); 295 clear_bit(RXRPC_CALL_EV_RESEND, &call->events);
296 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); 296 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
297 } 297 }
298 break; 298 break;
@@ -310,8 +310,8 @@ static void rxrpc_assume_implicit_ackall(struct rxrpc_call *call, u32 serial)
310void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) 310void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
311{ 311{
312 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 312 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
313 __be32 _abort_code; 313 __be32 wtmp;
314 u32 serial, hi_serial, seq, abort_code; 314 u32 hi_serial, abort_code;
315 315
316 _enter("%p,%p", call, skb); 316 _enter("%p,%p", call, skb);
317 317
@@ -330,16 +330,15 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
330 330
331 /* track the latest serial number on this connection for ACK packet 331 /* track the latest serial number on this connection for ACK packet
332 * information */ 332 * information */
333 serial = ntohl(sp->hdr.serial);
334 hi_serial = atomic_read(&call->conn->hi_serial); 333 hi_serial = atomic_read(&call->conn->hi_serial);
335 while (serial > hi_serial) 334 while (sp->hdr.serial > hi_serial)
336 hi_serial = atomic_cmpxchg(&call->conn->hi_serial, hi_serial, 335 hi_serial = atomic_cmpxchg(&call->conn->hi_serial, hi_serial,
337 serial); 336 sp->hdr.serial);
338 337
339 /* request ACK generation for any ACK or DATA packet that requests 338 /* request ACK generation for any ACK or DATA packet that requests
340 * it */ 339 * it */
341 if (sp->hdr.flags & RXRPC_REQUEST_ACK) { 340 if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
342 _proto("ACK Requested on %%%u", serial); 341 _proto("ACK Requested on %%%u", sp->hdr.serial);
343 rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial, false); 342 rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial, false);
344 } 343 }
345 344
@@ -347,24 +346,23 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
347 case RXRPC_PACKET_TYPE_ABORT: 346 case RXRPC_PACKET_TYPE_ABORT:
348 _debug("abort"); 347 _debug("abort");
349 348
350 if (skb_copy_bits(skb, 0, &_abort_code, 349 if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0)
351 sizeof(_abort_code)) < 0)
352 goto protocol_error; 350 goto protocol_error;
353 351
354 abort_code = ntohl(_abort_code); 352 abort_code = ntohl(wtmp);
355 _proto("Rx ABORT %%%u { %x }", serial, abort_code); 353 _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code);
356 354
357 write_lock_bh(&call->state_lock); 355 write_lock_bh(&call->state_lock);
358 if (call->state < RXRPC_CALL_COMPLETE) { 356 if (call->state < RXRPC_CALL_COMPLETE) {
359 call->state = RXRPC_CALL_REMOTELY_ABORTED; 357 call->state = RXRPC_CALL_REMOTELY_ABORTED;
360 call->abort_code = abort_code; 358 call->abort_code = abort_code;
361 set_bit(RXRPC_CALL_RCVD_ABORT, &call->events); 359 set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events);
362 rxrpc_queue_call(call); 360 rxrpc_queue_call(call);
363 } 361 }
364 goto free_packet_unlock; 362 goto free_packet_unlock;
365 363
366 case RXRPC_PACKET_TYPE_BUSY: 364 case RXRPC_PACKET_TYPE_BUSY:
367 _proto("Rx BUSY %%%u", serial); 365 _proto("Rx BUSY %%%u", sp->hdr.serial);
368 366
369 if (call->conn->out_clientflag) 367 if (call->conn->out_clientflag)
370 goto protocol_error; 368 goto protocol_error;
@@ -373,7 +371,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
373 switch (call->state) { 371 switch (call->state) {
374 case RXRPC_CALL_CLIENT_SEND_REQUEST: 372 case RXRPC_CALL_CLIENT_SEND_REQUEST:
375 call->state = RXRPC_CALL_SERVER_BUSY; 373 call->state = RXRPC_CALL_SERVER_BUSY;
376 set_bit(RXRPC_CALL_RCVD_BUSY, &call->events); 374 set_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events);
377 rxrpc_queue_call(call); 375 rxrpc_queue_call(call);
378 case RXRPC_CALL_SERVER_BUSY: 376 case RXRPC_CALL_SERVER_BUSY:
379 goto free_packet_unlock; 377 goto free_packet_unlock;
@@ -382,15 +380,13 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
382 } 380 }
383 381
384 default: 382 default:
385 _proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], serial); 383 _proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], sp->hdr.serial);
386 goto protocol_error; 384 goto protocol_error;
387 385
388 case RXRPC_PACKET_TYPE_DATA: 386 case RXRPC_PACKET_TYPE_DATA:
389 seq = ntohl(sp->hdr.seq); 387 _proto("Rx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq);
390 388
391 _proto("Rx DATA %%%u { #%u }", serial, seq); 389 if (sp->hdr.seq == 0)
392
393 if (seq == 0)
394 goto protocol_error; 390 goto protocol_error;
395 391
396 call->ackr_prev_seq = sp->hdr.seq; 392 call->ackr_prev_seq = sp->hdr.seq;
@@ -398,9 +394,9 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
398 /* received data implicitly ACKs all of the request packets we 394 /* received data implicitly ACKs all of the request packets we
399 * sent when we're acting as a client */ 395 * sent when we're acting as a client */
400 if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) 396 if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
401 rxrpc_assume_implicit_ackall(call, serial); 397 rxrpc_assume_implicit_ackall(call, sp->hdr.serial);
402 398
403 switch (rxrpc_fast_process_data(call, skb, seq)) { 399 switch (rxrpc_fast_process_data(call, skb, sp->hdr.seq)) {
404 case 0: 400 case 0:
405 skb = NULL; 401 skb = NULL;
406 goto done; 402 goto done;
@@ -433,7 +429,7 @@ protocol_error_locked:
433 if (call->state <= RXRPC_CALL_COMPLETE) { 429 if (call->state <= RXRPC_CALL_COMPLETE) {
434 call->state = RXRPC_CALL_LOCALLY_ABORTED; 430 call->state = RXRPC_CALL_LOCALLY_ABORTED;
435 call->abort_code = RX_PROTOCOL_ERROR; 431 call->abort_code = RX_PROTOCOL_ERROR;
436 set_bit(RXRPC_CALL_ABORT, &call->events); 432 set_bit(RXRPC_CALL_EV_ABORT, &call->events);
437 rxrpc_queue_call(call); 433 rxrpc_queue_call(call);
438 } 434 }
439free_packet_unlock: 435free_packet_unlock:
@@ -481,12 +477,12 @@ static void rxrpc_process_jumbo_packet(struct rxrpc_call *call,
481 if (!pskb_pull(jumbo, sizeof(jhdr))) 477 if (!pskb_pull(jumbo, sizeof(jhdr)))
482 BUG(); 478 BUG();
483 479
484 sp->hdr.seq = htonl(ntohl(sp->hdr.seq) + 1); 480 sp->hdr.seq += 1;
485 sp->hdr.serial = htonl(ntohl(sp->hdr.serial) + 1); 481 sp->hdr.serial += 1;
486 sp->hdr.flags = jhdr.flags; 482 sp->hdr.flags = jhdr.flags;
487 sp->hdr._rsvd = jhdr._rsvd; 483 sp->hdr._rsvd = jhdr._rsvd;
488 484
489 _proto("Rx DATA Jumbo %%%u", ntohl(sp->hdr.serial) - 1); 485 _proto("Rx DATA Jumbo %%%u", sp->hdr.serial - 1);
490 486
491 rxrpc_fast_process_packet(call, part); 487 rxrpc_fast_process_packet(call, part);
492 part = NULL; 488 part = NULL;
@@ -505,7 +501,7 @@ protocol_error:
505 if (call->state <= RXRPC_CALL_COMPLETE) { 501 if (call->state <= RXRPC_CALL_COMPLETE) {
506 call->state = RXRPC_CALL_LOCALLY_ABORTED; 502 call->state = RXRPC_CALL_LOCALLY_ABORTED;
507 call->abort_code = RX_PROTOCOL_ERROR; 503 call->abort_code = RX_PROTOCOL_ERROR;
508 set_bit(RXRPC_CALL_ABORT, &call->events); 504 set_bit(RXRPC_CALL_EV_ABORT, &call->events);
509 rxrpc_queue_call(call); 505 rxrpc_queue_call(call);
510 } 506 }
511 write_unlock_bh(&call->state_lock); 507 write_unlock_bh(&call->state_lock);
@@ -530,7 +526,7 @@ static void rxrpc_post_packet_to_call(struct rxrpc_call *call,
530 read_lock(&call->state_lock); 526 read_lock(&call->state_lock);
531 switch (call->state) { 527 switch (call->state) {
532 case RXRPC_CALL_LOCALLY_ABORTED: 528 case RXRPC_CALL_LOCALLY_ABORTED:
533 if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events)) { 529 if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
534 rxrpc_queue_call(call); 530 rxrpc_queue_call(call);
535 goto free_unlock; 531 goto free_unlock;
536 } 532 }
@@ -546,7 +542,7 @@ static void rxrpc_post_packet_to_call(struct rxrpc_call *call,
546 /* resend last packet of a completed call */ 542 /* resend last packet of a completed call */
547 _debug("final ack again"); 543 _debug("final ack again");
548 rxrpc_get_call(call); 544 rxrpc_get_call(call);
549 set_bit(RXRPC_CALL_ACK_FINAL, &call->events); 545 set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events);
550 rxrpc_queue_call(call); 546 rxrpc_queue_call(call);
551 goto free_unlock; 547 goto free_unlock;
552 default: 548 default:
@@ -607,6 +603,35 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
607 rxrpc_queue_work(&local->event_processor); 603 rxrpc_queue_work(&local->event_processor);
608} 604}
609 605
606/*
607 * Extract the wire header from a packet and translate the byte order.
608 */
609static noinline
610int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
611{
612 struct rxrpc_wire_header whdr;
613
614 /* dig out the RxRPC connection details */
615 if (skb_copy_bits(skb, sizeof(struct udphdr), &whdr, sizeof(whdr)) < 0)
616 return -EBADMSG;
617 if (!pskb_pull(skb, sizeof(struct udphdr) + sizeof(whdr)))
618 BUG();
619
620 memset(sp, 0, sizeof(*sp));
621 sp->hdr.epoch = ntohl(whdr.epoch);
622 sp->hdr.cid = ntohl(whdr.cid);
623 sp->hdr.callNumber = ntohl(whdr.callNumber);
624 sp->hdr.seq = ntohl(whdr.seq);
625 sp->hdr.serial = ntohl(whdr.serial);
626 sp->hdr.flags = whdr.flags;
627 sp->hdr.type = whdr.type;
628 sp->hdr.userStatus = whdr.userStatus;
629 sp->hdr.securityIndex = whdr.securityIndex;
630 sp->hdr._rsvd = ntohs(whdr._rsvd);
631 sp->hdr.serviceId = ntohs(whdr.serviceId);
632 return 0;
633}
634
610static struct rxrpc_connection *rxrpc_conn_from_local(struct rxrpc_local *local, 635static struct rxrpc_connection *rxrpc_conn_from_local(struct rxrpc_local *local,
611 struct sk_buff *skb, 636 struct sk_buff *skb,
612 struct rxrpc_skb_priv *sp) 637 struct rxrpc_skb_priv *sp)
@@ -686,29 +711,25 @@ void rxrpc_data_ready(struct sock *sk)
686 711
687 UDP_INC_STATS_BH(&init_net, UDP_MIB_INDATAGRAMS, 0); 712 UDP_INC_STATS_BH(&init_net, UDP_MIB_INDATAGRAMS, 0);
688 713
689 /* the socket buffer we have is owned by UDP, with UDP's data all over 714 /* The socket buffer we have is owned by UDP, with UDP's data all over
690 * it, but we really want our own */ 715 * it, but we really want our own data there.
716 */
691 skb_orphan(skb); 717 skb_orphan(skb);
692 sp = rxrpc_skb(skb); 718 sp = rxrpc_skb(skb);
693 memset(sp, 0, sizeof(*sp));
694 719
695 _net("Rx UDP packet from %08x:%04hu", 720 _net("Rx UDP packet from %08x:%04hu",
696 ntohl(ip_hdr(skb)->saddr), ntohs(udp_hdr(skb)->source)); 721 ntohl(ip_hdr(skb)->saddr), ntohs(udp_hdr(skb)->source));
697 722
698 /* dig out the RxRPC connection details */ 723 /* dig out the RxRPC connection details */
699 if (skb_copy_bits(skb, sizeof(struct udphdr), &sp->hdr, 724 if (rxrpc_extract_header(sp, skb) < 0)
700 sizeof(sp->hdr)) < 0)
701 goto bad_message; 725 goto bad_message;
702 if (!pskb_pull(skb, sizeof(struct udphdr) + sizeof(sp->hdr)))
703 BUG();
704 726
705 _net("Rx RxRPC %s ep=%x call=%x:%x", 727 _net("Rx RxRPC %s ep=%x call=%x:%x",
706 sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient", 728 sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient",
707 ntohl(sp->hdr.epoch), 729 sp->hdr.epoch, sp->hdr.cid, sp->hdr.callNumber);
708 ntohl(sp->hdr.cid),
709 ntohl(sp->hdr.callNumber));
710 730
711 if (sp->hdr.type == 0 || sp->hdr.type >= RXRPC_N_PACKET_TYPES) { 731 if (sp->hdr.type >= RXRPC_N_PACKET_TYPES ||
732 !((RXRPC_SUPPORTED_PACKET_TYPES >> sp->hdr.type) & 1)) {
712 _proto("Rx Bad Packet Type %u", sp->hdr.type); 733 _proto("Rx Bad Packet Type %u", sp->hdr.type);
713 goto bad_message; 734 goto bad_message;
714 } 735 }
@@ -737,14 +758,9 @@ void rxrpc_data_ready(struct sock *sk)
737 rxrpc_put_connection(conn); 758 rxrpc_put_connection(conn);
738 } else { 759 } else {
739 struct rxrpc_call *call; 760 struct rxrpc_call *call;
740 u8 in_clientflag = 0; 761
741 762 call = rxrpc_find_call_hash(&sp->hdr, local,
742 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) 763 AF_INET, &ip_hdr(skb)->saddr);
743 in_clientflag = RXRPC_CLIENT_INITIATED;
744 call = rxrpc_find_call_hash(in_clientflag, sp->hdr.cid,
745 sp->hdr.callNumber, sp->hdr.epoch,
746 sp->hdr.serviceId, local, AF_INET,
747 (u8 *)&ip_hdr(skb)->saddr);
748 if (call) 764 if (call)
749 rxrpc_post_packet_to_call(call, skb); 765 rxrpc_post_packet_to_call(call, skb);
750 else 766 else
@@ -759,7 +775,7 @@ cant_route_call:
759 _debug("can't route call"); 775 _debug("can't route call");
760 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED && 776 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
761 sp->hdr.type == RXRPC_PACKET_TYPE_DATA) { 777 sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
762 if (sp->hdr.seq == cpu_to_be32(1)) { 778 if (sp->hdr.seq == 1) {
763 _debug("first packet"); 779 _debug("first packet");
764 skb_queue_tail(&local->accept_queue, skb); 780 skb_queue_tail(&local->accept_queue, skb);
765 rxrpc_queue_work(&local->acceptor); 781 rxrpc_queue_work(&local->acceptor);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 2934a73a5981..cd6cdbe87125 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -16,7 +16,7 @@
16 BUG_ON(atomic_read((X)) >> (sizeof(atomic_t) - 2) == \ 16 BUG_ON(atomic_read((X)) >> (sizeof(atomic_t) - 2) == \
17 (POISON_FREE << 8 | POISON_FREE)) 17 (POISON_FREE << 8 | POISON_FREE))
18#else 18#else
19#define CHECK_SLAB_OKAY(X) do {} while(0) 19#define CHECK_SLAB_OKAY(X) do {} while (0)
20#endif 20#endif
21 21
22#define FCRYPT_BSIZE 8 22#define FCRYPT_BSIZE 8
@@ -70,12 +70,31 @@ struct rxrpc_sock {
70#define RXRPC_SECURITY_MAX RXRPC_SECURITY_ENCRYPT 70#define RXRPC_SECURITY_MAX RXRPC_SECURITY_ENCRYPT
71 struct sockaddr_rxrpc srx; /* local address */ 71 struct sockaddr_rxrpc srx; /* local address */
72 sa_family_t proto; /* protocol created with */ 72 sa_family_t proto; /* protocol created with */
73 __be16 service_id; /* service ID of local/remote service */
74}; 73};
75 74
76#define rxrpc_sk(__sk) container_of((__sk), struct rxrpc_sock, sk) 75#define rxrpc_sk(__sk) container_of((__sk), struct rxrpc_sock, sk)
77 76
78/* 77/*
78 * CPU-byteorder normalised Rx packet header.
79 */
80struct rxrpc_host_header {
81 u32 epoch; /* client boot timestamp */
82 u32 cid; /* connection and channel ID */
83 u32 callNumber; /* call ID (0 for connection-level packets) */
84 u32 seq; /* sequence number of pkt in call stream */
85 u32 serial; /* serial number of pkt sent to network */
86 u8 type; /* packet type */
87 u8 flags; /* packet flags */
88 u8 userStatus; /* app-layer defined status */
89 u8 securityIndex; /* security protocol ID */
90 union {
91 u16 _rsvd; /* reserved */
92 u16 cksum; /* kerberos security checksum */
93 };
94 u16 serviceId; /* service ID */
95} __packed;
96
97/*
79 * RxRPC socket buffer private variables 98 * RxRPC socket buffer private variables
80 * - max 48 bytes (struct sk_buff::cb) 99 * - max 48 bytes (struct sk_buff::cb)
81 */ 100 */
@@ -89,7 +108,7 @@ struct rxrpc_skb_priv {
89 bool need_resend; /* T if needs resending */ 108 bool need_resend; /* T if needs resending */
90 }; 109 };
91 110
92 struct rxrpc_header hdr; /* RxRPC packet header from this packet */ 111 struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */
93}; 112};
94 113
95#define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb) 114#define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb)
@@ -230,7 +249,7 @@ struct rxrpc_conn_bundle {
230 atomic_t usage; 249 atomic_t usage;
231 int debug_id; /* debug ID for printks */ 250 int debug_id; /* debug ID for printks */
232 unsigned short num_conns; /* number of connections in this bundle */ 251 unsigned short num_conns; /* number of connections in this bundle */
233 __be16 service_id; /* service ID */ 252 u16 service_id; /* Service ID for this bundle */
234 u8 security_ix; /* security type */ 253 u8 security_ix; /* security type */
235}; 254};
236 255
@@ -252,7 +271,7 @@ struct rxrpc_connection {
252 struct rxrpc_security *security; /* applied security module */ 271 struct rxrpc_security *security; /* applied security module */
253 struct key *key; /* security for this connection (client) */ 272 struct key *key; /* security for this connection (client) */
254 struct key *server_key; /* security for this service */ 273 struct key *server_key; /* security for this service */
255 struct crypto_blkcipher *cipher; /* encryption handle */ 274 struct crypto_skcipher *cipher; /* encryption handle */
256 struct rxrpc_crypt csum_iv; /* packet checksum base */ 275 struct rxrpc_crypt csum_iv; /* packet checksum base */
257 unsigned long events; 276 unsigned long events;
258#define RXRPC_CONN_CHALLENGE 0 /* send challenge packet */ 277#define RXRPC_CONN_CHALLENGE 0 /* send challenge packet */
@@ -260,7 +279,6 @@ struct rxrpc_connection {
260 rwlock_t lock; /* access lock */ 279 rwlock_t lock; /* access lock */
261 spinlock_t state_lock; /* state-change lock */ 280 spinlock_t state_lock; /* state-change lock */
262 atomic_t usage; 281 atomic_t usage;
263 u32 real_conn_id; /* connection ID (host-endian) */
264 enum { /* current state of connection */ 282 enum { /* current state of connection */
265 RXRPC_CONN_UNUSED, /* - connection not yet attempted */ 283 RXRPC_CONN_UNUSED, /* - connection not yet attempted */
266 RXRPC_CONN_CLIENT, /* - client connection */ 284 RXRPC_CONN_CLIENT, /* - client connection */
@@ -282,17 +300,76 @@ struct rxrpc_connection {
282 u8 security_size; /* security header size */ 300 u8 security_size; /* security header size */
283 u32 security_level; /* security level negotiated */ 301 u32 security_level; /* security level negotiated */
284 u32 security_nonce; /* response re-use preventer */ 302 u32 security_nonce; /* response re-use preventer */
285 303 u32 epoch; /* epoch of this connection */
286 /* the following are all in net order */ 304 u32 cid; /* connection ID */
287 __be32 epoch; /* epoch of this connection */ 305 u16 service_id; /* service ID for this connection */
288 __be32 cid; /* connection ID */
289 __be16 service_id; /* service ID */
290 u8 security_ix; /* security type */ 306 u8 security_ix; /* security type */
291 u8 in_clientflag; /* RXRPC_CLIENT_INITIATED if we are server */ 307 u8 in_clientflag; /* RXRPC_CLIENT_INITIATED if we are server */
292 u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ 308 u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */
293}; 309};
294 310
295/* 311/*
312 * Flags in call->flags.
313 */
314enum rxrpc_call_flag {
315 RXRPC_CALL_RELEASED, /* call has been released - no more message to userspace */
316 RXRPC_CALL_TERMINAL_MSG, /* call has given the socket its final message */
317 RXRPC_CALL_RCVD_LAST, /* all packets received */
318 RXRPC_CALL_RUN_RTIMER, /* Tx resend timer started */
319 RXRPC_CALL_TX_SOFT_ACK, /* sent some soft ACKs */
320 RXRPC_CALL_PROC_BUSY, /* the processor is busy */
321 RXRPC_CALL_INIT_ACCEPT, /* acceptance was initiated */
322 RXRPC_CALL_HAS_USERID, /* has a user ID attached */
323 RXRPC_CALL_EXPECT_OOS, /* expect out of sequence packets */
324};
325
326/*
327 * Events that can be raised on a call.
328 */
329enum rxrpc_call_event {
330 RXRPC_CALL_EV_RCVD_ACKALL, /* ACKALL or reply received */
331 RXRPC_CALL_EV_RCVD_BUSY, /* busy packet received */
332 RXRPC_CALL_EV_RCVD_ABORT, /* abort packet received */
333 RXRPC_CALL_EV_RCVD_ERROR, /* network error received */
334 RXRPC_CALL_EV_ACK_FINAL, /* need to generate final ACK (and release call) */
335 RXRPC_CALL_EV_ACK, /* need to generate ACK */
336 RXRPC_CALL_EV_REJECT_BUSY, /* need to generate busy message */
337 RXRPC_CALL_EV_ABORT, /* need to generate abort */
338 RXRPC_CALL_EV_CONN_ABORT, /* local connection abort generated */
339 RXRPC_CALL_EV_RESEND_TIMER, /* Tx resend timer expired */
340 RXRPC_CALL_EV_RESEND, /* Tx resend required */
341 RXRPC_CALL_EV_DRAIN_RX_OOS, /* drain the Rx out of sequence queue */
342 RXRPC_CALL_EV_LIFE_TIMER, /* call's lifetimer ran out */
343 RXRPC_CALL_EV_ACCEPTED, /* incoming call accepted by userspace app */
344 RXRPC_CALL_EV_SECURED, /* incoming call's connection is now secure */
345 RXRPC_CALL_EV_POST_ACCEPT, /* need to post an "accept?" message to the app */
346 RXRPC_CALL_EV_RELEASE, /* need to release the call's resources */
347};
348
349/*
350 * The states that a call can be in.
351 */
352enum rxrpc_call_state {
353 RXRPC_CALL_CLIENT_SEND_REQUEST, /* - client sending request phase */
354 RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */
355 RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */
356 RXRPC_CALL_CLIENT_FINAL_ACK, /* - client sending final ACK phase */
357 RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */
358 RXRPC_CALL_SERVER_ACCEPTING, /* - server accepting request */
359 RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */
360 RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */
361 RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */
362 RXRPC_CALL_SERVER_AWAIT_ACK, /* - server awaiting final ACK */
363 RXRPC_CALL_COMPLETE, /* - call completed */
364 RXRPC_CALL_SERVER_BUSY, /* - call rejected by busy server */
365 RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */
366 RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */
367 RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */
368 RXRPC_CALL_DEAD, /* - call is dead */
369 NR__RXRPC_CALL_STATES
370};
371
372/*
296 * RxRPC call definition 373 * RxRPC call definition
297 * - matched by { connection, call_id } 374 * - matched by { connection, call_id }
298 */ 375 */
@@ -317,57 +394,13 @@ struct rxrpc_call {
317 unsigned long user_call_ID; /* user-defined call ID */ 394 unsigned long user_call_ID; /* user-defined call ID */
318 unsigned long creation_jif; /* time of call creation */ 395 unsigned long creation_jif; /* time of call creation */
319 unsigned long flags; 396 unsigned long flags;
320#define RXRPC_CALL_RELEASED 0 /* call has been released - no more message to userspace */
321#define RXRPC_CALL_TERMINAL_MSG 1 /* call has given the socket its final message */
322#define RXRPC_CALL_RCVD_LAST 2 /* all packets received */
323#define RXRPC_CALL_RUN_RTIMER 3 /* Tx resend timer started */
324#define RXRPC_CALL_TX_SOFT_ACK 4 /* sent some soft ACKs */
325#define RXRPC_CALL_PROC_BUSY 5 /* the processor is busy */
326#define RXRPC_CALL_INIT_ACCEPT 6 /* acceptance was initiated */
327#define RXRPC_CALL_HAS_USERID 7 /* has a user ID attached */
328#define RXRPC_CALL_EXPECT_OOS 8 /* expect out of sequence packets */
329 unsigned long events; 397 unsigned long events;
330#define RXRPC_CALL_RCVD_ACKALL 0 /* ACKALL or reply received */
331#define RXRPC_CALL_RCVD_BUSY 1 /* busy packet received */
332#define RXRPC_CALL_RCVD_ABORT 2 /* abort packet received */
333#define RXRPC_CALL_RCVD_ERROR 3 /* network error received */
334#define RXRPC_CALL_ACK_FINAL 4 /* need to generate final ACK (and release call) */
335#define RXRPC_CALL_ACK 5 /* need to generate ACK */
336#define RXRPC_CALL_REJECT_BUSY 6 /* need to generate busy message */
337#define RXRPC_CALL_ABORT 7 /* need to generate abort */
338#define RXRPC_CALL_CONN_ABORT 8 /* local connection abort generated */
339#define RXRPC_CALL_RESEND_TIMER 9 /* Tx resend timer expired */
340#define RXRPC_CALL_RESEND 10 /* Tx resend required */
341#define RXRPC_CALL_DRAIN_RX_OOS 11 /* drain the Rx out of sequence queue */
342#define RXRPC_CALL_LIFE_TIMER 12 /* call's lifetimer ran out */
343#define RXRPC_CALL_ACCEPTED 13 /* incoming call accepted by userspace app */
344#define RXRPC_CALL_SECURED 14 /* incoming call's connection is now secure */
345#define RXRPC_CALL_POST_ACCEPT 15 /* need to post an "accept?" message to the app */
346#define RXRPC_CALL_RELEASE 16 /* need to release the call's resources */
347
348 spinlock_t lock; 398 spinlock_t lock;
349 rwlock_t state_lock; /* lock for state transition */ 399 rwlock_t state_lock; /* lock for state transition */
350 atomic_t usage; 400 atomic_t usage;
351 atomic_t sequence; /* Tx data packet sequence counter */ 401 atomic_t sequence; /* Tx data packet sequence counter */
352 u32 abort_code; /* local/remote abort code */ 402 u32 abort_code; /* local/remote abort code */
353 enum { /* current state of call */ 403 enum rxrpc_call_state state : 8; /* current state of call */
354 RXRPC_CALL_CLIENT_SEND_REQUEST, /* - client sending request phase */
355 RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */
356 RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */
357 RXRPC_CALL_CLIENT_FINAL_ACK, /* - client sending final ACK phase */
358 RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */
359 RXRPC_CALL_SERVER_ACCEPTING, /* - server accepting request */
360 RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */
361 RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */
362 RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */
363 RXRPC_CALL_SERVER_AWAIT_ACK, /* - server awaiting final ACK */
364 RXRPC_CALL_COMPLETE, /* - call completed */
365 RXRPC_CALL_SERVER_BUSY, /* - call rejected by busy server */
366 RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */
367 RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */
368 RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */
369 RXRPC_CALL_DEAD, /* - call is dead */
370 } state;
371 int debug_id; /* debug ID for printks */ 404 int debug_id; /* debug ID for printks */
372 u8 channel; /* connection channel occupied by this call */ 405 u8 channel; /* connection channel occupied by this call */
373 406
@@ -389,9 +422,9 @@ struct rxrpc_call {
389 rxrpc_seq_t rx_data_eaten; /* last data seq ID consumed by recvmsg */ 422 rxrpc_seq_t rx_data_eaten; /* last data seq ID consumed by recvmsg */
390 rxrpc_seq_t rx_first_oos; /* first packet in rx_oos_queue (or 0) */ 423 rxrpc_seq_t rx_first_oos; /* first packet in rx_oos_queue (or 0) */
391 rxrpc_seq_t ackr_win_top; /* top of ACK window (rx_data_eaten is bottom) */ 424 rxrpc_seq_t ackr_win_top; /* top of ACK window (rx_data_eaten is bottom) */
392 rxrpc_seq_net_t ackr_prev_seq; /* previous sequence number received */ 425 rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */
393 u8 ackr_reason; /* reason to ACK */ 426 u8 ackr_reason; /* reason to ACK */
394 __be32 ackr_serial; /* serial of packet being ACK'd */ 427 rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
395 atomic_t ackr_not_idle; /* number of packets in Rx queue */ 428 atomic_t ackr_not_idle; /* number of packets in Rx queue */
396 429
397 /* received packet records, 1 bit per record */ 430 /* received packet records, 1 bit per record */
@@ -403,11 +436,10 @@ struct rxrpc_call {
403 u8 in_clientflag; /* Copy of conn->in_clientflag for hashing */ 436 u8 in_clientflag; /* Copy of conn->in_clientflag for hashing */
404 struct rxrpc_local *local; /* Local endpoint. Used for hashing. */ 437 struct rxrpc_local *local; /* Local endpoint. Used for hashing. */
405 sa_family_t proto; /* Frame protocol */ 438 sa_family_t proto; /* Frame protocol */
406 /* the following should all be in net order */ 439 u32 call_id; /* call ID on connection */
407 __be32 cid; /* connection ID + channel index */ 440 u32 cid; /* connection ID plus channel index */
408 __be32 call_id; /* call ID on connection */ 441 u32 epoch; /* epoch of this connection */
409 __be32 epoch; /* epoch of this connection */ 442 u16 service_id; /* service ID */
410 __be16 service_id; /* service ID */
411 union { /* Peer IP address for hashing */ 443 union { /* Peer IP address for hashing */
412 __be32 ipv4_addr; 444 __be32 ipv4_addr;
413 __u8 ipv6_addr[16]; /* Anticipates eventual IPv6 support */ 445 __u8 ipv6_addr[16]; /* Anticipates eventual IPv6 support */
@@ -423,7 +455,7 @@ static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code)
423 if (call->state < RXRPC_CALL_COMPLETE) { 455 if (call->state < RXRPC_CALL_COMPLETE) {
424 call->abort_code = abort_code; 456 call->abort_code = abort_code;
425 call->state = RXRPC_CALL_LOCALLY_ABORTED; 457 call->state = RXRPC_CALL_LOCALLY_ABORTED;
426 set_bit(RXRPC_CALL_ABORT, &call->events); 458 set_bit(RXRPC_CALL_EV_ABORT, &call->events);
427 } 459 }
428 write_unlock_bh(&call->state_lock); 460 write_unlock_bh(&call->state_lock);
429} 461}
@@ -432,7 +464,7 @@ static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code)
432 * af_rxrpc.c 464 * af_rxrpc.c
433 */ 465 */
434extern atomic_t rxrpc_n_skbs; 466extern atomic_t rxrpc_n_skbs;
435extern __be32 rxrpc_epoch; 467extern u32 rxrpc_epoch;
436extern atomic_t rxrpc_debug_id; 468extern atomic_t rxrpc_debug_id;
437extern struct workqueue_struct *rxrpc_workqueue; 469extern struct workqueue_struct *rxrpc_workqueue;
438 470
@@ -446,35 +478,35 @@ int rxrpc_reject_call(struct rxrpc_sock *);
446/* 478/*
447 * ar-ack.c 479 * ar-ack.c
448 */ 480 */
449extern unsigned rxrpc_requested_ack_delay; 481extern unsigned int rxrpc_requested_ack_delay;
450extern unsigned rxrpc_soft_ack_delay; 482extern unsigned int rxrpc_soft_ack_delay;
451extern unsigned rxrpc_idle_ack_delay; 483extern unsigned int rxrpc_idle_ack_delay;
452extern unsigned rxrpc_rx_window_size; 484extern unsigned int rxrpc_rx_window_size;
453extern unsigned rxrpc_rx_mtu; 485extern unsigned int rxrpc_rx_mtu;
454extern unsigned rxrpc_rx_jumbo_max; 486extern unsigned int rxrpc_rx_jumbo_max;
455 487
456void __rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool); 488void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool);
457void rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool); 489void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool);
458void rxrpc_process_call(struct work_struct *); 490void rxrpc_process_call(struct work_struct *);
459 491
460/* 492/*
461 * ar-call.c 493 * ar-call.c
462 */ 494 */
463extern unsigned rxrpc_max_call_lifetime; 495extern unsigned int rxrpc_max_call_lifetime;
464extern unsigned rxrpc_dead_call_expiry; 496extern unsigned int rxrpc_dead_call_expiry;
465extern struct kmem_cache *rxrpc_call_jar; 497extern struct kmem_cache *rxrpc_call_jar;
466extern struct list_head rxrpc_calls; 498extern struct list_head rxrpc_calls;
467extern rwlock_t rxrpc_call_lock; 499extern rwlock_t rxrpc_call_lock;
468 500
469struct rxrpc_call *rxrpc_find_call_hash(u8, __be32, __be32, __be32, 501struct rxrpc_call *rxrpc_find_call_hash(struct rxrpc_host_header *,
470 __be16, void *, sa_family_t, const u8 *); 502 void *, sa_family_t, const void *);
471struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *, 503struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *,
472 struct rxrpc_transport *, 504 struct rxrpc_transport *,
473 struct rxrpc_conn_bundle *, 505 struct rxrpc_conn_bundle *,
474 unsigned long, int, gfp_t); 506 unsigned long, int, gfp_t);
475struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *, 507struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *,
476 struct rxrpc_connection *, 508 struct rxrpc_connection *,
477 struct rxrpc_header *, gfp_t); 509 struct rxrpc_host_header *, gfp_t);
478struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *, unsigned long); 510struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *, unsigned long);
479void rxrpc_release_call(struct rxrpc_call *); 511void rxrpc_release_call(struct rxrpc_call *);
480void rxrpc_release_calls_on_socket(struct rxrpc_sock *); 512void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
@@ -484,22 +516,22 @@ void __exit rxrpc_destroy_all_calls(void);
484/* 516/*
485 * ar-connection.c 517 * ar-connection.c
486 */ 518 */
487extern unsigned rxrpc_connection_expiry; 519extern unsigned int rxrpc_connection_expiry;
488extern struct list_head rxrpc_connections; 520extern struct list_head rxrpc_connections;
489extern rwlock_t rxrpc_connection_lock; 521extern rwlock_t rxrpc_connection_lock;
490 522
491struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *, 523struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *,
492 struct rxrpc_transport *, 524 struct rxrpc_transport *,
493 struct key *, __be16, gfp_t); 525 struct key *, u16, gfp_t);
494void rxrpc_put_bundle(struct rxrpc_transport *, struct rxrpc_conn_bundle *); 526void rxrpc_put_bundle(struct rxrpc_transport *, struct rxrpc_conn_bundle *);
495int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_transport *, 527int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_transport *,
496 struct rxrpc_conn_bundle *, struct rxrpc_call *, gfp_t); 528 struct rxrpc_conn_bundle *, struct rxrpc_call *, gfp_t);
497void rxrpc_put_connection(struct rxrpc_connection *); 529void rxrpc_put_connection(struct rxrpc_connection *);
498void __exit rxrpc_destroy_all_connections(void); 530void __exit rxrpc_destroy_all_connections(void);
499struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *, 531struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *,
500 struct rxrpc_header *); 532 struct rxrpc_host_header *);
501extern struct rxrpc_connection * 533extern struct rxrpc_connection *
502rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_header *, 534rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_host_header *,
503 gfp_t); 535 gfp_t);
504 536
505/* 537/*
@@ -547,7 +579,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t,
547/* 579/*
548 * ar-output.c 580 * ar-output.c
549 */ 581 */
550extern unsigned rxrpc_resend_timeout; 582extern unsigned int rxrpc_resend_timeout;
551 583
552int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *); 584int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *);
553int rxrpc_client_sendmsg(struct rxrpc_sock *, struct rxrpc_transport *, 585int rxrpc_client_sendmsg(struct rxrpc_sock *, struct rxrpc_transport *,
@@ -595,7 +627,7 @@ void rxrpc_packet_destructor(struct sk_buff *);
595/* 627/*
596 * ar-transport.c 628 * ar-transport.c
597 */ 629 */
598extern unsigned rxrpc_transport_expiry; 630extern unsigned int rxrpc_transport_expiry;
599 631
600struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *, 632struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *,
601 struct rxrpc_peer *, gfp_t); 633 struct rxrpc_peer *, gfp_t);
@@ -694,7 +726,7 @@ do { \
694 printk(KERN_ERR "RxRPC: Assertion failed\n"); \ 726 printk(KERN_ERR "RxRPC: Assertion failed\n"); \
695 BUG(); \ 727 BUG(); \
696 } \ 728 } \
697} while(0) 729} while (0)
698 730
699#define ASSERTCMP(X, OP, Y) \ 731#define ASSERTCMP(X, OP, Y) \
700do { \ 732do { \
@@ -707,7 +739,7 @@ do { \
707 (unsigned long)(X), (unsigned long)(Y)); \ 739 (unsigned long)(X), (unsigned long)(Y)); \
708 BUG(); \ 740 BUG(); \
709 } \ 741 } \
710} while(0) 742} while (0)
711 743
712#define ASSERTIF(C, X) \ 744#define ASSERTIF(C, X) \
713do { \ 745do { \
@@ -716,7 +748,7 @@ do { \
716 printk(KERN_ERR "RxRPC: Assertion failed\n"); \ 748 printk(KERN_ERR "RxRPC: Assertion failed\n"); \
717 BUG(); \ 749 BUG(); \
718 } \ 750 } \
719} while(0) 751} while (0)
720 752
721#define ASSERTIFCMP(C, X, OP, Y) \ 753#define ASSERTIFCMP(C, X, OP, Y) \
722do { \ 754do { \
@@ -729,25 +761,25 @@ do { \
729 (unsigned long)(X), (unsigned long)(Y)); \ 761 (unsigned long)(X), (unsigned long)(Y)); \
730 BUG(); \ 762 BUG(); \
731 } \ 763 } \
732} while(0) 764} while (0)
733 765
734#else 766#else
735 767
736#define ASSERT(X) \ 768#define ASSERT(X) \
737do { \ 769do { \
738} while(0) 770} while (0)
739 771
740#define ASSERTCMP(X, OP, Y) \ 772#define ASSERTCMP(X, OP, Y) \
741do { \ 773do { \
742} while(0) 774} while (0)
743 775
744#define ASSERTIF(C, X) \ 776#define ASSERTIF(C, X) \
745do { \ 777do { \
746} while(0) 778} while (0)
747 779
748#define ASSERTIFCMP(C, X, OP, Y) \ 780#define ASSERTIFCMP(C, X, OP, Y) \
749do { \ 781do { \
750} while(0) 782} while (0)
751 783
752#endif /* __KDEBUGALL */ 784#endif /* __KDEBUGALL */
753 785
@@ -804,9 +836,9 @@ do { \
804 CHECK_SLAB_OKAY(&(CALL)->usage); \ 836 CHECK_SLAB_OKAY(&(CALL)->usage); \
805 if (atomic_inc_return(&(CALL)->usage) == 1) \ 837 if (atomic_inc_return(&(CALL)->usage) == 1) \
806 BUG(); \ 838 BUG(); \
807} while(0) 839} while (0)
808 840
809#define rxrpc_put_call(CALL) \ 841#define rxrpc_put_call(CALL) \
810do { \ 842do { \
811 __rxrpc_put_call(CALL); \ 843 __rxrpc_put_call(CALL); \
812} while(0) 844} while (0)
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 3f6571651d32..3fb492eedeb9 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -12,11 +12,11 @@
12 * "afs@CAMBRIDGE.REDHAT.COM> 12 * "afs@CAMBRIDGE.REDHAT.COM>
13 */ 13 */
14 14
15#include <crypto/skcipher.h>
15#include <linux/module.h> 16#include <linux/module.h>
16#include <linux/net.h> 17#include <linux/net.h>
17#include <linux/skbuff.h> 18#include <linux/skbuff.h>
18#include <linux/key-type.h> 19#include <linux/key-type.h>
19#include <linux/crypto.h>
20#include <linux/ctype.h> 20#include <linux/ctype.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <net/sock.h> 22#include <net/sock.h>
@@ -824,7 +824,7 @@ static void rxrpc_free_preparse(struct key_preparsed_payload *prep)
824 */ 824 */
825static int rxrpc_preparse_s(struct key_preparsed_payload *prep) 825static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
826{ 826{
827 struct crypto_blkcipher *ci; 827 struct crypto_skcipher *ci;
828 828
829 _enter("%zu", prep->datalen); 829 _enter("%zu", prep->datalen);
830 830
@@ -833,13 +833,13 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
833 833
834 memcpy(&prep->payload.data[2], prep->data, 8); 834 memcpy(&prep->payload.data[2], prep->data, 8);
835 835
836 ci = crypto_alloc_blkcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC); 836 ci = crypto_alloc_skcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC);
837 if (IS_ERR(ci)) { 837 if (IS_ERR(ci)) {
838 _leave(" = %ld", PTR_ERR(ci)); 838 _leave(" = %ld", PTR_ERR(ci));
839 return PTR_ERR(ci); 839 return PTR_ERR(ci);
840 } 840 }
841 841
842 if (crypto_blkcipher_setkey(ci, prep->data, 8) < 0) 842 if (crypto_skcipher_setkey(ci, prep->data, 8) < 0)
843 BUG(); 843 BUG();
844 844
845 prep->payload.data[0] = ci; 845 prep->payload.data[0] = ci;
@@ -853,7 +853,7 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
853static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep) 853static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep)
854{ 854{
855 if (prep->payload.data[0]) 855 if (prep->payload.data[0])
856 crypto_free_blkcipher(prep->payload.data[0]); 856 crypto_free_skcipher(prep->payload.data[0]);
857} 857}
858 858
859/* 859/*
@@ -870,7 +870,7 @@ static void rxrpc_destroy(struct key *key)
870static void rxrpc_destroy_s(struct key *key) 870static void rxrpc_destroy_s(struct key *key)
871{ 871{
872 if (key->payload.data[0]) { 872 if (key->payload.data[0]) {
873 crypto_free_blkcipher(key->payload.data[0]); 873 crypto_free_skcipher(key->payload.data[0]);
874 key->payload.data[0] = NULL; 874 key->payload.data[0] = NULL;
875 } 875 }
876} 876}
diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c
index 78483b4602bf..4e1e6db0050b 100644
--- a/net/rxrpc/ar-local.c
+++ b/net/rxrpc/ar-local.c
@@ -323,9 +323,11 @@ void __exit rxrpc_destroy_all_locals(void)
323 * Reply to a version request 323 * Reply to a version request
324 */ 324 */
325static void rxrpc_send_version_request(struct rxrpc_local *local, 325static void rxrpc_send_version_request(struct rxrpc_local *local,
326 struct rxrpc_header *hdr, 326 struct rxrpc_host_header *hdr,
327 struct sk_buff *skb) 327 struct sk_buff *skb)
328{ 328{
329 struct rxrpc_wire_header whdr;
330 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
329 struct sockaddr_in sin; 331 struct sockaddr_in sin;
330 struct msghdr msg; 332 struct msghdr msg;
331 struct kvec iov[2]; 333 struct kvec iov[2];
@@ -344,15 +346,20 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
344 msg.msg_controllen = 0; 346 msg.msg_controllen = 0;
345 msg.msg_flags = 0; 347 msg.msg_flags = 0;
346 348
347 hdr->seq = 0; 349 whdr.epoch = htonl(sp->hdr.epoch);
348 hdr->serial = 0; 350 whdr.cid = htonl(sp->hdr.cid);
349 hdr->type = RXRPC_PACKET_TYPE_VERSION; 351 whdr.callNumber = htonl(sp->hdr.callNumber);
350 hdr->flags = RXRPC_LAST_PACKET | (~hdr->flags & RXRPC_CLIENT_INITIATED); 352 whdr.seq = 0;
351 hdr->userStatus = 0; 353 whdr.serial = 0;
352 hdr->_rsvd = 0; 354 whdr.type = RXRPC_PACKET_TYPE_VERSION;
353 355 whdr.flags = RXRPC_LAST_PACKET | (~hdr->flags & RXRPC_CLIENT_INITIATED);
354 iov[0].iov_base = hdr; 356 whdr.userStatus = 0;
355 iov[0].iov_len = sizeof(*hdr); 357 whdr.securityIndex = 0;
358 whdr._rsvd = 0;
359 whdr.serviceId = htons(sp->hdr.serviceId);
360
361 iov[0].iov_base = &whdr;
362 iov[0].iov_len = sizeof(whdr);
356 iov[1].iov_base = (char *)rxrpc_version_string; 363 iov[1].iov_base = (char *)rxrpc_version_string;
357 iov[1].iov_len = sizeof(rxrpc_version_string); 364 iov[1].iov_len = sizeof(rxrpc_version_string);
358 365
@@ -383,7 +390,7 @@ static void rxrpc_process_local_events(struct work_struct *work)
383 while ((skb = skb_dequeue(&local->event_queue))) { 390 while ((skb = skb_dequeue(&local->event_queue))) {
384 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 391 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
385 392
386 kdebug("{%d},{%u}", local->debug_id, sp->hdr.type); 393 _debug("{%d},{%u}", local->debug_id, sp->hdr.type);
387 394
388 switch (sp->hdr.type) { 395 switch (sp->hdr.type) {
389 case RXRPC_PACKET_TYPE_VERSION: 396 case RXRPC_PACKET_TYPE_VERSION:
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index 14c4e12c47b0..d36fb6e1a29c 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -21,7 +21,7 @@
21/* 21/*
22 * Time till packet resend (in jiffies). 22 * Time till packet resend (in jiffies).
23 */ 23 */
24unsigned rxrpc_resend_timeout = 4 * HZ; 24unsigned int rxrpc_resend_timeout = 4 * HZ;
25 25
26static int rxrpc_send_data(struct rxrpc_sock *rx, 26static int rxrpc_send_data(struct rxrpc_sock *rx,
27 struct rxrpc_call *call, 27 struct rxrpc_call *call,
@@ -111,11 +111,11 @@ static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code)
111 if (call->state <= RXRPC_CALL_COMPLETE) { 111 if (call->state <= RXRPC_CALL_COMPLETE) {
112 call->state = RXRPC_CALL_LOCALLY_ABORTED; 112 call->state = RXRPC_CALL_LOCALLY_ABORTED;
113 call->abort_code = abort_code; 113 call->abort_code = abort_code;
114 set_bit(RXRPC_CALL_ABORT, &call->events); 114 set_bit(RXRPC_CALL_EV_ABORT, &call->events);
115 del_timer_sync(&call->resend_timer); 115 del_timer_sync(&call->resend_timer);
116 del_timer_sync(&call->ack_timer); 116 del_timer_sync(&call->ack_timer);
117 clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events); 117 clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events);
118 clear_bit(RXRPC_CALL_ACK, &call->events); 118 clear_bit(RXRPC_CALL_EV_ACK, &call->events);
119 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); 119 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
120 rxrpc_queue_call(call); 120 rxrpc_queue_call(call);
121 } 121 }
@@ -136,7 +136,7 @@ int rxrpc_client_sendmsg(struct rxrpc_sock *rx, struct rxrpc_transport *trans,
136 struct rxrpc_call *call; 136 struct rxrpc_call *call;
137 unsigned long user_call_ID = 0; 137 unsigned long user_call_ID = 0;
138 struct key *key; 138 struct key *key;
139 __be16 service_id; 139 u16 service_id;
140 u32 abort_code = 0; 140 u32 abort_code = 0;
141 int ret; 141 int ret;
142 142
@@ -151,11 +151,11 @@ int rxrpc_client_sendmsg(struct rxrpc_sock *rx, struct rxrpc_transport *trans,
151 151
152 bundle = NULL; 152 bundle = NULL;
153 if (trans) { 153 if (trans) {
154 service_id = rx->service_id; 154 service_id = rx->srx.srx_service;
155 if (msg->msg_name) { 155 if (msg->msg_name) {
156 DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, 156 DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx,
157 msg->msg_name); 157 msg->msg_name);
158 service_id = htons(srx->srx_service); 158 service_id = srx->srx_service;
159 } 159 }
160 key = rx->key; 160 key = rx->key;
161 if (key && !rx->key->payload.data[0]) 161 if (key && !rx->key->payload.data[0])
@@ -348,7 +348,7 @@ int rxrpc_send_packet(struct rxrpc_transport *trans, struct sk_buff *skb)
348 348
349 /* send the packet with the don't fragment bit set if we currently 349 /* send the packet with the don't fragment bit set if we currently
350 * think it's small enough */ 350 * think it's small enough */
351 if (skb->len - sizeof(struct rxrpc_header) < trans->peer->maxdata) { 351 if (skb->len - sizeof(struct rxrpc_wire_header) < trans->peer->maxdata) {
352 down_read(&trans->local->defrag_sem); 352 down_read(&trans->local->defrag_sem);
353 /* send the packet by UDP 353 /* send the packet by UDP
354 * - returns -EMSGSIZE if UDP would have to fragment the packet 354 * - returns -EMSGSIZE if UDP would have to fragment the packet
@@ -401,7 +401,8 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
401 int ret; 401 int ret;
402 402
403 _enter(",{%d},%ld", 403 _enter(",{%d},%ld",
404 CIRC_SPACE(call->acks_head, call->acks_tail, call->acks_winsz), 404 CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail),
405 call->acks_winsz),
405 *timeo); 406 *timeo);
406 407
407 add_wait_queue(&call->tx_waitq, &myself); 408 add_wait_queue(&call->tx_waitq, &myself);
@@ -409,7 +410,7 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
409 for (;;) { 410 for (;;) {
410 set_current_state(TASK_INTERRUPTIBLE); 411 set_current_state(TASK_INTERRUPTIBLE);
411 ret = 0; 412 ret = 0;
412 if (CIRC_SPACE(call->acks_head, call->acks_tail, 413 if (CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail),
413 call->acks_winsz) > 0) 414 call->acks_winsz) > 0)
414 break; 415 break;
415 if (signal_pending(current)) { 416 if (signal_pending(current)) {
@@ -437,7 +438,7 @@ static inline void rxrpc_instant_resend(struct rxrpc_call *call)
437 if (try_to_del_timer_sync(&call->resend_timer) >= 0) { 438 if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
438 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); 439 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
439 if (call->state < RXRPC_CALL_COMPLETE && 440 if (call->state < RXRPC_CALL_COMPLETE &&
440 !test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events)) 441 !test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events))
441 rxrpc_queue_call(call); 442 rxrpc_queue_call(call);
442 } 443 }
443 read_unlock_bh(&call->state_lock); 444 read_unlock_bh(&call->state_lock);
@@ -480,8 +481,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
480 write_unlock_bh(&call->state_lock); 481 write_unlock_bh(&call->state_lock);
481 } 482 }
482 483
483 _proto("Tx DATA %%%u { #%u }", 484 _proto("Tx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq);
484 ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
485 485
486 sp->need_resend = false; 486 sp->need_resend = false;
487 sp->resend_at = jiffies + rxrpc_resend_timeout; 487 sp->resend_at = jiffies + rxrpc_resend_timeout;
@@ -513,6 +513,29 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
513} 513}
514 514
515/* 515/*
516 * Convert a host-endian header into a network-endian header.
517 */
518static void rxrpc_insert_header(struct sk_buff *skb)
519{
520 struct rxrpc_wire_header whdr;
521 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
522
523 whdr.epoch = htonl(sp->hdr.epoch);
524 whdr.cid = htonl(sp->hdr.cid);
525 whdr.callNumber = htonl(sp->hdr.callNumber);
526 whdr.seq = htonl(sp->hdr.seq);
527 whdr.serial = htonl(sp->hdr.serial);
528 whdr.type = sp->hdr.type;
529 whdr.flags = sp->hdr.flags;
530 whdr.userStatus = sp->hdr.userStatus;
531 whdr.securityIndex = sp->hdr.securityIndex;
532 whdr._rsvd = htons(sp->hdr._rsvd);
533 whdr.serviceId = htons(sp->hdr.serviceId);
534
535 memcpy(skb->head, &whdr, sizeof(whdr));
536}
537
538/*
516 * send data through a socket 539 * send data through a socket
517 * - must be called in process context 540 * - must be called in process context
518 * - caller holds the socket locked 541 * - caller holds the socket locked
@@ -548,7 +571,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
548 571
549 _debug("alloc"); 572 _debug("alloc");
550 573
551 if (CIRC_SPACE(call->acks_head, call->acks_tail, 574 if (CIRC_SPACE(call->acks_head,
575 ACCESS_ONCE(call->acks_tail),
552 call->acks_winsz) <= 0) { 576 call->acks_winsz) <= 0) {
553 ret = -EAGAIN; 577 ret = -EAGAIN;
554 if (msg->msg_flags & MSG_DONTWAIT) 578 if (msg->msg_flags & MSG_DONTWAIT)
@@ -650,22 +674,22 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
650 674
651 seq = atomic_inc_return(&call->sequence); 675 seq = atomic_inc_return(&call->sequence);
652 676
653 sp->hdr.epoch = conn->epoch; 677 sp->hdr.epoch = conn->epoch;
654 sp->hdr.cid = call->cid; 678 sp->hdr.cid = call->cid;
655 sp->hdr.callNumber = call->call_id; 679 sp->hdr.callNumber = call->call_id;
656 sp->hdr.seq = htonl(seq); 680 sp->hdr.seq = seq;
657 sp->hdr.serial = 681 sp->hdr.serial = atomic_inc_return(&conn->serial);
658 htonl(atomic_inc_return(&conn->serial)); 682 sp->hdr.type = RXRPC_PACKET_TYPE_DATA;
659 sp->hdr.type = RXRPC_PACKET_TYPE_DATA;
660 sp->hdr.userStatus = 0; 683 sp->hdr.userStatus = 0;
661 sp->hdr.securityIndex = conn->security_ix; 684 sp->hdr.securityIndex = conn->security_ix;
662 sp->hdr._rsvd = 0; 685 sp->hdr._rsvd = 0;
663 sp->hdr.serviceId = conn->service_id; 686 sp->hdr.serviceId = call->service_id;
664 687
665 sp->hdr.flags = conn->out_clientflag; 688 sp->hdr.flags = conn->out_clientflag;
666 if (msg_data_left(msg) == 0 && !more) 689 if (msg_data_left(msg) == 0 && !more)
667 sp->hdr.flags |= RXRPC_LAST_PACKET; 690 sp->hdr.flags |= RXRPC_LAST_PACKET;
668 else if (CIRC_SPACE(call->acks_head, call->acks_tail, 691 else if (CIRC_SPACE(call->acks_head,
692 ACCESS_ONCE(call->acks_tail),
669 call->acks_winsz) > 1) 693 call->acks_winsz) > 1)
670 sp->hdr.flags |= RXRPC_MORE_PACKETS; 694 sp->hdr.flags |= RXRPC_MORE_PACKETS;
671 if (more && seq & 1) 695 if (more && seq & 1)
@@ -673,12 +697,11 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
673 697
674 ret = rxrpc_secure_packet( 698 ret = rxrpc_secure_packet(
675 call, skb, skb->mark, 699 call, skb, skb->mark,
676 skb->head + sizeof(struct rxrpc_header)); 700 skb->head + sizeof(struct rxrpc_wire_header));
677 if (ret < 0) 701 if (ret < 0)
678 goto out; 702 goto out;
679 703
680 memcpy(skb->head, &sp->hdr, 704 rxrpc_insert_header(skb);
681 sizeof(struct rxrpc_header));
682 rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more); 705 rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more);
683 skb = NULL; 706 skb = NULL;
684 } 707 }
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index bebaa43484bc..dc089b1976aa 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -92,7 +92,7 @@ static struct rxrpc_peer *rxrpc_alloc_peer(struct sockaddr_rxrpc *srx,
92 BUG(); 92 BUG();
93 } 93 }
94 94
95 peer->hdrsize += sizeof(struct rxrpc_header); 95 peer->hdrsize += sizeof(struct rxrpc_wire_header);
96 peer->maxdata = peer->mtu - peer->hdrsize; 96 peer->maxdata = peer->mtu - peer->hdrsize;
97 } 97 }
98 98
diff --git a/net/rxrpc/ar-proc.c b/net/rxrpc/ar-proc.c
index 38047f713f2c..525b2ba5a8f4 100644
--- a/net/rxrpc/ar-proc.c
+++ b/net/rxrpc/ar-proc.c
@@ -74,9 +74,9 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
74 " %-8.8s %08x %lx\n", 74 " %-8.8s %08x %lx\n",
75 lbuff, 75 lbuff,
76 rbuff, 76 rbuff,
77 ntohs(call->conn->service_id), 77 call->conn->service_id,
78 ntohl(call->conn->cid), 78 call->cid,
79 ntohl(call->call_id), 79 call->call_id,
80 call->conn->in_clientflag ? "Svc" : "Clt", 80 call->conn->in_clientflag ? "Svc" : "Clt",
81 atomic_read(&call->usage), 81 atomic_read(&call->usage),
82 rxrpc_call_states[call->state], 82 rxrpc_call_states[call->state],
@@ -157,8 +157,8 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
157 " %s %08x %08x %08x\n", 157 " %s %08x %08x %08x\n",
158 lbuff, 158 lbuff,
159 rbuff, 159 rbuff,
160 ntohs(conn->service_id), 160 conn->service_id,
161 ntohl(conn->cid), 161 conn->cid,
162 conn->call_counter, 162 conn->call_counter,
163 conn->in_clientflag ? "Svc" : "Clt", 163 conn->in_clientflag ? "Svc" : "Clt",
164 atomic_read(&conn->usage), 164 atomic_read(&conn->usage),
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index b92beded7459..64facba24a45 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -33,7 +33,7 @@ void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call)
33 33
34 read_lock_bh(&call->state_lock); 34 read_lock_bh(&call->state_lock);
35 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && 35 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
36 !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events)) 36 !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
37 rxrpc_queue_call(call); 37 rxrpc_queue_call(call);
38 read_unlock_bh(&call->state_lock); 38 read_unlock_bh(&call->state_lock);
39} 39}
@@ -158,7 +158,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
158 goto receive_non_data_message; 158 goto receive_non_data_message;
159 159
160 _debug("recvmsg DATA #%u { %d, %d }", 160 _debug("recvmsg DATA #%u { %d, %d }",
161 ntohl(sp->hdr.seq), skb->len, sp->offset); 161 sp->hdr.seq, skb->len, sp->offset);
162 162
163 if (!continue_call) { 163 if (!continue_call) {
164 /* only set the control data once per recvmsg() */ 164 /* only set the control data once per recvmsg() */
@@ -169,11 +169,11 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
169 ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags)); 169 ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
170 } 170 }
171 171
172 ASSERTCMP(ntohl(sp->hdr.seq), >=, call->rx_data_recv); 172 ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv);
173 ASSERTCMP(ntohl(sp->hdr.seq), <=, call->rx_data_recv + 1); 173 ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1);
174 call->rx_data_recv = ntohl(sp->hdr.seq); 174 call->rx_data_recv = sp->hdr.seq;
175 175
176 ASSERTCMP(ntohl(sp->hdr.seq), >, call->rx_data_eaten); 176 ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten);
177 177
178 offset = sp->offset; 178 offset = sp->offset;
179 copy = skb->len - offset; 179 copy = skb->len - offset;
@@ -364,11 +364,11 @@ void rxrpc_kernel_data_delivered(struct sk_buff *skb)
364 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 364 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
365 struct rxrpc_call *call = sp->call; 365 struct rxrpc_call *call = sp->call;
366 366
367 ASSERTCMP(ntohl(sp->hdr.seq), >=, call->rx_data_recv); 367 ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv);
368 ASSERTCMP(ntohl(sp->hdr.seq), <=, call->rx_data_recv + 1); 368 ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1);
369 call->rx_data_recv = ntohl(sp->hdr.seq); 369 call->rx_data_recv = sp->hdr.seq;
370 370
371 ASSERTCMP(ntohl(sp->hdr.seq), >, call->rx_data_eaten); 371 ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten);
372 rxrpc_free_skb(skb); 372 rxrpc_free_skb(skb);
373} 373}
374 374
diff --git a/net/rxrpc/ar-security.c b/net/rxrpc/ar-security.c
index 8334474eb26c..ceff6394a65f 100644
--- a/net/rxrpc/ar-security.c
+++ b/net/rxrpc/ar-security.c
@@ -167,11 +167,11 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
167 struct rxrpc_sock *rx; 167 struct rxrpc_sock *rx;
168 struct key *key; 168 struct key *key;
169 key_ref_t kref; 169 key_ref_t kref;
170 char kdesc[5+1+3+1]; 170 char kdesc[5 + 1 + 3 + 1];
171 171
172 _enter(""); 172 _enter("");
173 173
174 sprintf(kdesc, "%u:%u", ntohs(conn->service_id), conn->security_ix); 174 sprintf(kdesc, "%u:%u", conn->service_id, conn->security_ix);
175 175
176 sec = rxrpc_security_lookup(conn->security_ix); 176 sec = rxrpc_security_lookup(conn->security_ix);
177 if (!sec) { 177 if (!sec) {
@@ -182,7 +182,7 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
182 /* find the service */ 182 /* find the service */
183 read_lock_bh(&local->services_lock); 183 read_lock_bh(&local->services_lock);
184 list_for_each_entry(rx, &local->services, listen_link) { 184 list_for_each_entry(rx, &local->services, listen_link) {
185 if (rx->service_id == conn->service_id) 185 if (rx->srx.srx_service == conn->service_id)
186 goto found_service; 186 goto found_service;
187 } 187 }
188 188
diff --git a/net/rxrpc/ar-skbuff.c b/net/rxrpc/ar-skbuff.c
index 4cfab49e329d..62a267472fce 100644
--- a/net/rxrpc/ar-skbuff.c
+++ b/net/rxrpc/ar-skbuff.c
@@ -34,7 +34,7 @@ static void rxrpc_request_final_ACK(struct rxrpc_call *call)
34 /* get an extra ref on the call for the final-ACK generator to 34 /* get an extra ref on the call for the final-ACK generator to
35 * release */ 35 * release */
36 rxrpc_get_call(call); 36 rxrpc_get_call(call);
37 set_bit(RXRPC_CALL_ACK_FINAL, &call->events); 37 set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events);
38 if (try_to_del_timer_sync(&call->ack_timer) >= 0) 38 if (try_to_del_timer_sync(&call->ack_timer) >= 0)
39 rxrpc_queue_call(call); 39 rxrpc_queue_call(call);
40 break; 40 break;
@@ -59,7 +59,7 @@ static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
59 59
60 spin_lock_bh(&call->lock); 60 spin_lock_bh(&call->lock);
61 61
62 _debug("hard ACK #%u", ntohl(sp->hdr.seq)); 62 _debug("hard ACK #%u", sp->hdr.seq);
63 63
64 for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) { 64 for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) {
65 call->ackr_window[loop] >>= 1; 65 call->ackr_window[loop] >>= 1;
@@ -67,7 +67,7 @@ static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
67 call->ackr_window[loop + 1] << (BITS_PER_LONG - 1); 67 call->ackr_window[loop + 1] << (BITS_PER_LONG - 1);
68 } 68 }
69 69
70 seq = ntohl(sp->hdr.seq); 70 seq = sp->hdr.seq;
71 ASSERTCMP(seq, ==, call->rx_data_eaten + 1); 71 ASSERTCMP(seq, ==, call->rx_data_eaten + 1);
72 call->rx_data_eaten = seq; 72 call->rx_data_eaten = seq;
73 73
@@ -133,5 +133,4 @@ void rxrpc_kernel_free_skb(struct sk_buff *skb)
133{ 133{
134 rxrpc_free_skb(skb); 134 rxrpc_free_skb(skb);
135} 135}
136
137EXPORT_SYMBOL(rxrpc_kernel_free_skb); 136EXPORT_SYMBOL(rxrpc_kernel_free_skb);
diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c
index 9946467f16b4..66a1a5676446 100644
--- a/net/rxrpc/ar-transport.c
+++ b/net/rxrpc/ar-transport.c
@@ -20,7 +20,7 @@
20/* 20/*
21 * Time after last use at which transport record is cleaned up. 21 * Time after last use at which transport record is cleaned up.
22 */ 22 */
23unsigned rxrpc_transport_expiry = 3600 * 24; 23unsigned int rxrpc_transport_expiry = 3600 * 24;
24 24
25static void rxrpc_transport_reaper(struct work_struct *work); 25static void rxrpc_transport_reaper(struct work_struct *work);
26 26
@@ -51,6 +51,7 @@ static struct rxrpc_transport *rxrpc_alloc_transport(struct rxrpc_local *local,
51 spin_lock_init(&trans->client_lock); 51 spin_lock_init(&trans->client_lock);
52 rwlock_init(&trans->conn_lock); 52 rwlock_init(&trans->conn_lock);
53 atomic_set(&trans->usage, 1); 53 atomic_set(&trans->usage, 1);
54 trans->conn_idcounter = peer->srx.srx_service << 16;
54 trans->debug_id = atomic_inc_return(&rxrpc_debug_id); 55 trans->debug_id = atomic_inc_return(&rxrpc_debug_id);
55 56
56 if (peer->srx.transport.family == AF_INET) { 57 if (peer->srx.transport.family == AF_INET) {
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index d7a9ab5a9d9c..f0aeb8163688 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -9,11 +9,11 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#include <crypto/skcipher.h>
12#include <linux/module.h> 13#include <linux/module.h>
13#include <linux/net.h> 14#include <linux/net.h>
14#include <linux/skbuff.h> 15#include <linux/skbuff.h>
15#include <linux/udp.h> 16#include <linux/udp.h>
16#include <linux/crypto.h>
17#include <linux/scatterlist.h> 17#include <linux/scatterlist.h>
18#include <linux/ctype.h> 18#include <linux/ctype.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
@@ -53,7 +53,7 @@ MODULE_LICENSE("GPL");
53 * alloc routine, but since we have it to hand, we use it to decrypt RESPONSE 53 * alloc routine, but since we have it to hand, we use it to decrypt RESPONSE
54 * packets 54 * packets
55 */ 55 */
56static struct crypto_blkcipher *rxkad_ci; 56static struct crypto_skcipher *rxkad_ci;
57static DEFINE_MUTEX(rxkad_ci_mutex); 57static DEFINE_MUTEX(rxkad_ci_mutex);
58 58
59/* 59/*
@@ -61,7 +61,7 @@ static DEFINE_MUTEX(rxkad_ci_mutex);
61 */ 61 */
62static int rxkad_init_connection_security(struct rxrpc_connection *conn) 62static int rxkad_init_connection_security(struct rxrpc_connection *conn)
63{ 63{
64 struct crypto_blkcipher *ci; 64 struct crypto_skcipher *ci;
65 struct rxrpc_key_token *token; 65 struct rxrpc_key_token *token;
66 int ret; 66 int ret;
67 67
@@ -70,15 +70,15 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn)
70 token = conn->key->payload.data[0]; 70 token = conn->key->payload.data[0];
71 conn->security_ix = token->security_index; 71 conn->security_ix = token->security_index;
72 72
73 ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC); 73 ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
74 if (IS_ERR(ci)) { 74 if (IS_ERR(ci)) {
75 _debug("no cipher"); 75 _debug("no cipher");
76 ret = PTR_ERR(ci); 76 ret = PTR_ERR(ci);
77 goto error; 77 goto error;
78 } 78 }
79 79
80 if (crypto_blkcipher_setkey(ci, token->kad->session_key, 80 if (crypto_skcipher_setkey(ci, token->kad->session_key,
81 sizeof(token->kad->session_key)) < 0) 81 sizeof(token->kad->session_key)) < 0)
82 BUG(); 82 BUG();
83 83
84 switch (conn->security_level) { 84 switch (conn->security_level) {
@@ -113,7 +113,7 @@ error:
113static void rxkad_prime_packet_security(struct rxrpc_connection *conn) 113static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
114{ 114{
115 struct rxrpc_key_token *token; 115 struct rxrpc_key_token *token;
116 struct blkcipher_desc desc; 116 SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
117 struct scatterlist sg[2]; 117 struct scatterlist sg[2];
118 struct rxrpc_crypt iv; 118 struct rxrpc_crypt iv;
119 struct { 119 struct {
@@ -128,21 +128,23 @@ static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
128 token = conn->key->payload.data[0]; 128 token = conn->key->payload.data[0];
129 memcpy(&iv, token->kad->session_key, sizeof(iv)); 129 memcpy(&iv, token->kad->session_key, sizeof(iv));
130 130
131 desc.tfm = conn->cipher; 131 tmpbuf.x[0] = htonl(conn->epoch);
132 desc.info = iv.x; 132 tmpbuf.x[1] = htonl(conn->cid);
133 desc.flags = 0;
134
135 tmpbuf.x[0] = conn->epoch;
136 tmpbuf.x[1] = conn->cid;
137 tmpbuf.x[2] = 0; 133 tmpbuf.x[2] = 0;
138 tmpbuf.x[3] = htonl(conn->security_ix); 134 tmpbuf.x[3] = htonl(conn->security_ix);
139 135
140 sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf)); 136 sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
141 sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf)); 137 sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
142 crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf)); 138
139 skcipher_request_set_tfm(req, conn->cipher);
140 skcipher_request_set_callback(req, 0, NULL, NULL);
141 skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
142
143 crypto_skcipher_encrypt(req);
144 skcipher_request_zero(req);
143 145
144 memcpy(&conn->csum_iv, &tmpbuf.x[2], sizeof(conn->csum_iv)); 146 memcpy(&conn->csum_iv, &tmpbuf.x[2], sizeof(conn->csum_iv));
145 ASSERTCMP(conn->csum_iv.n[0], ==, tmpbuf.x[2]); 147 ASSERTCMP((u32 __force)conn->csum_iv.n[0], ==, (u32 __force)tmpbuf.x[2]);
146 148
147 _leave(""); 149 _leave("");
148} 150}
@@ -156,7 +158,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
156 void *sechdr) 158 void *sechdr)
157{ 159{
158 struct rxrpc_skb_priv *sp; 160 struct rxrpc_skb_priv *sp;
159 struct blkcipher_desc desc; 161 SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
160 struct rxrpc_crypt iv; 162 struct rxrpc_crypt iv;
161 struct scatterlist sg[2]; 163 struct scatterlist sg[2];
162 struct { 164 struct {
@@ -169,21 +171,24 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
169 171
170 _enter(""); 172 _enter("");
171 173
172 check = ntohl(sp->hdr.seq ^ sp->hdr.callNumber); 174 check = sp->hdr.seq ^ sp->hdr.callNumber;
173 data_size |= (u32) check << 16; 175 data_size |= (u32)check << 16;
174 176
175 tmpbuf.hdr.data_size = htonl(data_size); 177 tmpbuf.hdr.data_size = htonl(data_size);
176 memcpy(&tmpbuf.first, sechdr + 4, sizeof(tmpbuf.first)); 178 memcpy(&tmpbuf.first, sechdr + 4, sizeof(tmpbuf.first));
177 179
178 /* start the encryption afresh */ 180 /* start the encryption afresh */
179 memset(&iv, 0, sizeof(iv)); 181 memset(&iv, 0, sizeof(iv));
180 desc.tfm = call->conn->cipher;
181 desc.info = iv.x;
182 desc.flags = 0;
183 182
184 sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf)); 183 sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
185 sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf)); 184 sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
186 crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf)); 185
186 skcipher_request_set_tfm(req, call->conn->cipher);
187 skcipher_request_set_callback(req, 0, NULL, NULL);
188 skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
189
190 crypto_skcipher_encrypt(req);
191 skcipher_request_zero(req);
187 192
188 memcpy(sechdr, &tmpbuf, sizeof(tmpbuf)); 193 memcpy(sechdr, &tmpbuf, sizeof(tmpbuf));
189 194
@@ -195,81 +200,91 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
195 * wholly encrypt a packet (level 2 security) 200 * wholly encrypt a packet (level 2 security)
196 */ 201 */
197static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, 202static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
198 struct sk_buff *skb, 203 struct sk_buff *skb,
199 u32 data_size, 204 u32 data_size,
200 void *sechdr) 205 void *sechdr)
201{ 206{
202 const struct rxrpc_key_token *token; 207 const struct rxrpc_key_token *token;
203 struct rxkad_level2_hdr rxkhdr 208 struct rxkad_level2_hdr rxkhdr
204 __attribute__((aligned(8))); /* must be all on one page */ 209 __attribute__((aligned(8))); /* must be all on one page */
205 struct rxrpc_skb_priv *sp; 210 struct rxrpc_skb_priv *sp;
206 struct blkcipher_desc desc; 211 SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
207 struct rxrpc_crypt iv; 212 struct rxrpc_crypt iv;
208 struct scatterlist sg[16]; 213 struct scatterlist sg[16];
209 struct sk_buff *trailer; 214 struct sk_buff *trailer;
210 unsigned int len; 215 unsigned int len;
211 u16 check; 216 u16 check;
212 int nsg; 217 int nsg;
218 int err;
213 219
214 sp = rxrpc_skb(skb); 220 sp = rxrpc_skb(skb);
215 221
216 _enter(""); 222 _enter("");
217 223
218 check = ntohl(sp->hdr.seq ^ sp->hdr.callNumber); 224 check = sp->hdr.seq ^ sp->hdr.callNumber;
219 225
220 rxkhdr.data_size = htonl(data_size | (u32) check << 16); 226 rxkhdr.data_size = htonl(data_size | (u32)check << 16);
221 rxkhdr.checksum = 0; 227 rxkhdr.checksum = 0;
222 228
223 /* encrypt from the session key */ 229 /* encrypt from the session key */
224 token = call->conn->key->payload.data[0]; 230 token = call->conn->key->payload.data[0];
225 memcpy(&iv, token->kad->session_key, sizeof(iv)); 231 memcpy(&iv, token->kad->session_key, sizeof(iv));
226 desc.tfm = call->conn->cipher;
227 desc.info = iv.x;
228 desc.flags = 0;
229 232
230 sg_init_one(&sg[0], sechdr, sizeof(rxkhdr)); 233 sg_init_one(&sg[0], sechdr, sizeof(rxkhdr));
231 sg_init_one(&sg[1], &rxkhdr, sizeof(rxkhdr)); 234 sg_init_one(&sg[1], &rxkhdr, sizeof(rxkhdr));
232 crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(rxkhdr)); 235
236 skcipher_request_set_tfm(req, call->conn->cipher);
237 skcipher_request_set_callback(req, 0, NULL, NULL);
238 skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(rxkhdr), iv.x);
239
240 crypto_skcipher_encrypt(req);
233 241
234 /* we want to encrypt the skbuff in-place */ 242 /* we want to encrypt the skbuff in-place */
235 nsg = skb_cow_data(skb, 0, &trailer); 243 nsg = skb_cow_data(skb, 0, &trailer);
244 err = -ENOMEM;
236 if (nsg < 0 || nsg > 16) 245 if (nsg < 0 || nsg > 16)
237 return -ENOMEM; 246 goto out;
238 247
239 len = data_size + call->conn->size_align - 1; 248 len = data_size + call->conn->size_align - 1;
240 len &= ~(call->conn->size_align - 1); 249 len &= ~(call->conn->size_align - 1);
241 250
242 sg_init_table(sg, nsg); 251 sg_init_table(sg, nsg);
243 skb_to_sgvec(skb, sg, 0, len); 252 skb_to_sgvec(skb, sg, 0, len);
244 crypto_blkcipher_encrypt_iv(&desc, sg, sg, len); 253
254 skcipher_request_set_crypt(req, sg, sg, len, iv.x);
255
256 crypto_skcipher_encrypt(req);
245 257
246 _leave(" = 0"); 258 _leave(" = 0");
247 return 0; 259 err = 0;
260
261out:
262 skcipher_request_zero(req);
263 return err;
248} 264}
249 265
250/* 266/*
251 * checksum an RxRPC packet header 267 * checksum an RxRPC packet header
252 */ 268 */
253static int rxkad_secure_packet(const struct rxrpc_call *call, 269static int rxkad_secure_packet(const struct rxrpc_call *call,
254 struct sk_buff *skb, 270 struct sk_buff *skb,
255 size_t data_size, 271 size_t data_size,
256 void *sechdr) 272 void *sechdr)
257{ 273{
258 struct rxrpc_skb_priv *sp; 274 struct rxrpc_skb_priv *sp;
259 struct blkcipher_desc desc; 275 SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
260 struct rxrpc_crypt iv; 276 struct rxrpc_crypt iv;
261 struct scatterlist sg[2]; 277 struct scatterlist sg[2];
262 struct { 278 struct {
263 __be32 x[2]; 279 __be32 x[2];
264 } tmpbuf __attribute__((aligned(8))); /* must all be in same page */ 280 } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
265 __be32 x; 281 u32 x, y;
266 u32 y;
267 int ret; 282 int ret;
268 283
269 sp = rxrpc_skb(skb); 284 sp = rxrpc_skb(skb);
270 285
271 _enter("{%d{%x}},{#%u},%zu,", 286 _enter("{%d{%x}},{#%u},%zu,",
272 call->debug_id, key_serial(call->conn->key), ntohl(sp->hdr.seq), 287 call->debug_id, key_serial(call->conn->key), sp->hdr.seq,
273 data_size); 288 data_size);
274 289
275 if (!call->conn->cipher) 290 if (!call->conn->cipher)
@@ -281,25 +296,28 @@ static int rxkad_secure_packet(const struct rxrpc_call *call,
281 296
282 /* continue encrypting from where we left off */ 297 /* continue encrypting from where we left off */
283 memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); 298 memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
284 desc.tfm = call->conn->cipher;
285 desc.info = iv.x;
286 desc.flags = 0;
287 299
288 /* calculate the security checksum */ 300 /* calculate the security checksum */
289 x = htonl(call->channel << (32 - RXRPC_CIDSHIFT)); 301 x = call->channel << (32 - RXRPC_CIDSHIFT);
290 x |= sp->hdr.seq & cpu_to_be32(0x3fffffff); 302 x |= sp->hdr.seq & 0x3fffffff;
291 tmpbuf.x[0] = sp->hdr.callNumber; 303 tmpbuf.x[0] = htonl(sp->hdr.callNumber);
292 tmpbuf.x[1] = x; 304 tmpbuf.x[1] = htonl(x);
293 305
294 sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf)); 306 sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
295 sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf)); 307 sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
296 crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf)); 308
309 skcipher_request_set_tfm(req, call->conn->cipher);
310 skcipher_request_set_callback(req, 0, NULL, NULL);
311 skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
312
313 crypto_skcipher_encrypt(req);
314 skcipher_request_zero(req);
297 315
298 y = ntohl(tmpbuf.x[1]); 316 y = ntohl(tmpbuf.x[1]);
299 y = (y >> 16) & 0xffff; 317 y = (y >> 16) & 0xffff;
300 if (y == 0) 318 if (y == 0)
301 y = 1; /* zero checksums are not permitted */ 319 y = 1; /* zero checksums are not permitted */
302 sp->hdr.cksum = htons(y); 320 sp->hdr.cksum = y;
303 321
304 switch (call->conn->security_level) { 322 switch (call->conn->security_level) {
305 case RXRPC_SECURITY_PLAIN: 323 case RXRPC_SECURITY_PLAIN:
@@ -330,7 +348,7 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
330{ 348{
331 struct rxkad_level1_hdr sechdr; 349 struct rxkad_level1_hdr sechdr;
332 struct rxrpc_skb_priv *sp; 350 struct rxrpc_skb_priv *sp;
333 struct blkcipher_desc desc; 351 SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
334 struct rxrpc_crypt iv; 352 struct rxrpc_crypt iv;
335 struct scatterlist sg[16]; 353 struct scatterlist sg[16];
336 struct sk_buff *trailer; 354 struct sk_buff *trailer;
@@ -352,11 +370,13 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
352 370
353 /* start the decryption afresh */ 371 /* start the decryption afresh */
354 memset(&iv, 0, sizeof(iv)); 372 memset(&iv, 0, sizeof(iv));
355 desc.tfm = call->conn->cipher;
356 desc.info = iv.x;
357 desc.flags = 0;
358 373
359 crypto_blkcipher_decrypt_iv(&desc, sg, sg, 8); 374 skcipher_request_set_tfm(req, call->conn->cipher);
375 skcipher_request_set_callback(req, 0, NULL, NULL);
376 skcipher_request_set_crypt(req, sg, sg, 8, iv.x);
377
378 crypto_skcipher_decrypt(req);
379 skcipher_request_zero(req);
360 380
361 /* remove the decrypted packet length */ 381 /* remove the decrypted packet length */
362 if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) 382 if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0)
@@ -368,7 +388,7 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
368 data_size = buf & 0xffff; 388 data_size = buf & 0xffff;
369 389
370 check = buf >> 16; 390 check = buf >> 16;
371 check ^= ntohl(sp->hdr.seq ^ sp->hdr.callNumber); 391 check ^= sp->hdr.seq ^ sp->hdr.callNumber;
372 check &= 0xffff; 392 check &= 0xffff;
373 if (check != 0) { 393 if (check != 0) {
374 *_abort_code = RXKADSEALEDINCON; 394 *_abort_code = RXKADSEALEDINCON;
@@ -405,7 +425,7 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
405 const struct rxrpc_key_token *token; 425 const struct rxrpc_key_token *token;
406 struct rxkad_level2_hdr sechdr; 426 struct rxkad_level2_hdr sechdr;
407 struct rxrpc_skb_priv *sp; 427 struct rxrpc_skb_priv *sp;
408 struct blkcipher_desc desc; 428 SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
409 struct rxrpc_crypt iv; 429 struct rxrpc_crypt iv;
410 struct scatterlist _sg[4], *sg; 430 struct scatterlist _sg[4], *sg;
411 struct sk_buff *trailer; 431 struct sk_buff *trailer;
@@ -435,11 +455,13 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
435 /* decrypt from the session key */ 455 /* decrypt from the session key */
436 token = call->conn->key->payload.data[0]; 456 token = call->conn->key->payload.data[0];
437 memcpy(&iv, token->kad->session_key, sizeof(iv)); 457 memcpy(&iv, token->kad->session_key, sizeof(iv));
438 desc.tfm = call->conn->cipher;
439 desc.info = iv.x;
440 desc.flags = 0;
441 458
442 crypto_blkcipher_decrypt_iv(&desc, sg, sg, skb->len); 459 skcipher_request_set_tfm(req, call->conn->cipher);
460 skcipher_request_set_callback(req, 0, NULL, NULL);
461 skcipher_request_set_crypt(req, sg, sg, skb->len, iv.x);
462
463 crypto_skcipher_decrypt(req);
464 skcipher_request_zero(req);
443 if (sg != _sg) 465 if (sg != _sg)
444 kfree(sg); 466 kfree(sg);
445 467
@@ -453,7 +475,7 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
453 data_size = buf & 0xffff; 475 data_size = buf & 0xffff;
454 476
455 check = buf >> 16; 477 check = buf >> 16;
456 check ^= ntohl(sp->hdr.seq ^ sp->hdr.callNumber); 478 check ^= sp->hdr.seq ^ sp->hdr.callNumber;
457 check &= 0xffff; 479 check &= 0xffff;
458 if (check != 0) { 480 if (check != 0) {
459 *_abort_code = RXKADSEALEDINCON; 481 *_abort_code = RXKADSEALEDINCON;
@@ -487,23 +509,21 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
487 struct sk_buff *skb, 509 struct sk_buff *skb,
488 u32 *_abort_code) 510 u32 *_abort_code)
489{ 511{
490 struct blkcipher_desc desc; 512 SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
491 struct rxrpc_skb_priv *sp; 513 struct rxrpc_skb_priv *sp;
492 struct rxrpc_crypt iv; 514 struct rxrpc_crypt iv;
493 struct scatterlist sg[2]; 515 struct scatterlist sg[2];
494 struct { 516 struct {
495 __be32 x[2]; 517 __be32 x[2];
496 } tmpbuf __attribute__((aligned(8))); /* must all be in same page */ 518 } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
497 __be32 x; 519 u16 cksum;
498 __be16 cksum; 520 u32 x, y;
499 u32 y;
500 int ret; 521 int ret;
501 522
502 sp = rxrpc_skb(skb); 523 sp = rxrpc_skb(skb);
503 524
504 _enter("{%d{%x}},{#%u}", 525 _enter("{%d{%x}},{#%u}",
505 call->debug_id, key_serial(call->conn->key), 526 call->debug_id, key_serial(call->conn->key), sp->hdr.seq);
506 ntohl(sp->hdr.seq));
507 527
508 if (!call->conn->cipher) 528 if (!call->conn->cipher)
509 return 0; 529 return 0;
@@ -516,26 +536,28 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
516 536
517 /* continue encrypting from where we left off */ 537 /* continue encrypting from where we left off */
518 memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); 538 memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
519 desc.tfm = call->conn->cipher;
520 desc.info = iv.x;
521 desc.flags = 0;
522 539
523 /* validate the security checksum */ 540 /* validate the security checksum */
524 x = htonl(call->channel << (32 - RXRPC_CIDSHIFT)); 541 x = call->channel << (32 - RXRPC_CIDSHIFT);
525 x |= sp->hdr.seq & cpu_to_be32(0x3fffffff); 542 x |= sp->hdr.seq & 0x3fffffff;
526 tmpbuf.x[0] = call->call_id; 543 tmpbuf.x[0] = htonl(call->call_id);
527 tmpbuf.x[1] = x; 544 tmpbuf.x[1] = htonl(x);
528 545
529 sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf)); 546 sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
530 sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf)); 547 sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
531 crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf)); 548
549 skcipher_request_set_tfm(req, call->conn->cipher);
550 skcipher_request_set_callback(req, 0, NULL, NULL);
551 skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
552
553 crypto_skcipher_encrypt(req);
554 skcipher_request_zero(req);
532 555
533 y = ntohl(tmpbuf.x[1]); 556 y = ntohl(tmpbuf.x[1]);
534 y = (y >> 16) & 0xffff; 557 cksum = (y >> 16) & 0xffff;
535 if (y == 0) 558 if (cksum == 0)
536 y = 1; /* zero checksums are not permitted */ 559 cksum = 1; /* zero checksums are not permitted */
537 560
538 cksum = htons(y);
539 if (sp->hdr.cksum != cksum) { 561 if (sp->hdr.cksum != cksum) {
540 *_abort_code = RXKADSEALEDINCON; 562 *_abort_code = RXKADSEALEDINCON;
541 _leave(" = -EPROTO [csum failed]"); 563 _leave(" = -EPROTO [csum failed]");
@@ -567,10 +589,11 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
567static int rxkad_issue_challenge(struct rxrpc_connection *conn) 589static int rxkad_issue_challenge(struct rxrpc_connection *conn)
568{ 590{
569 struct rxkad_challenge challenge; 591 struct rxkad_challenge challenge;
570 struct rxrpc_header hdr; 592 struct rxrpc_wire_header whdr;
571 struct msghdr msg; 593 struct msghdr msg;
572 struct kvec iov[2]; 594 struct kvec iov[2];
573 size_t len; 595 size_t len;
596 u32 serial;
574 int ret; 597 int ret;
575 598
576 _enter("{%d,%x}", conn->debug_id, key_serial(conn->key)); 599 _enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
@@ -592,26 +615,27 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
592 msg.msg_controllen = 0; 615 msg.msg_controllen = 0;
593 msg.msg_flags = 0; 616 msg.msg_flags = 0;
594 617
595 hdr.epoch = conn->epoch; 618 whdr.epoch = htonl(conn->epoch);
596 hdr.cid = conn->cid; 619 whdr.cid = htonl(conn->cid);
597 hdr.callNumber = 0; 620 whdr.callNumber = 0;
598 hdr.seq = 0; 621 whdr.seq = 0;
599 hdr.type = RXRPC_PACKET_TYPE_CHALLENGE; 622 whdr.type = RXRPC_PACKET_TYPE_CHALLENGE;
600 hdr.flags = conn->out_clientflag; 623 whdr.flags = conn->out_clientflag;
601 hdr.userStatus = 0; 624 whdr.userStatus = 0;
602 hdr.securityIndex = conn->security_ix; 625 whdr.securityIndex = conn->security_ix;
603 hdr._rsvd = 0; 626 whdr._rsvd = 0;
604 hdr.serviceId = conn->service_id; 627 whdr.serviceId = htons(conn->service_id);
605 628
606 iov[0].iov_base = &hdr; 629 iov[0].iov_base = &whdr;
607 iov[0].iov_len = sizeof(hdr); 630 iov[0].iov_len = sizeof(whdr);
608 iov[1].iov_base = &challenge; 631 iov[1].iov_base = &challenge;
609 iov[1].iov_len = sizeof(challenge); 632 iov[1].iov_len = sizeof(challenge);
610 633
611 len = iov[0].iov_len + iov[1].iov_len; 634 len = iov[0].iov_len + iov[1].iov_len;
612 635
613 hdr.serial = htonl(atomic_inc_return(&conn->serial)); 636 serial = atomic_inc_return(&conn->serial);
614 _proto("Tx CHALLENGE %%%u", ntohl(hdr.serial)); 637 whdr.serial = htonl(serial);
638 _proto("Tx CHALLENGE %%%u", serial);
615 639
616 ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len); 640 ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len);
617 if (ret < 0) { 641 if (ret < 0) {
@@ -627,13 +651,15 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
627 * send a Kerberos security response 651 * send a Kerberos security response
628 */ 652 */
629static int rxkad_send_response(struct rxrpc_connection *conn, 653static int rxkad_send_response(struct rxrpc_connection *conn,
630 struct rxrpc_header *hdr, 654 struct rxrpc_host_header *hdr,
631 struct rxkad_response *resp, 655 struct rxkad_response *resp,
632 const struct rxkad_key *s2) 656 const struct rxkad_key *s2)
633{ 657{
658 struct rxrpc_wire_header whdr;
634 struct msghdr msg; 659 struct msghdr msg;
635 struct kvec iov[3]; 660 struct kvec iov[3];
636 size_t len; 661 size_t len;
662 u32 serial;
637 int ret; 663 int ret;
638 664
639 _enter(""); 665 _enter("");
@@ -644,24 +670,26 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
644 msg.msg_controllen = 0; 670 msg.msg_controllen = 0;
645 msg.msg_flags = 0; 671 msg.msg_flags = 0;
646 672
647 hdr->epoch = conn->epoch; 673 memset(&whdr, 0, sizeof(whdr));
648 hdr->seq = 0; 674 whdr.epoch = htonl(hdr->epoch);
649 hdr->type = RXRPC_PACKET_TYPE_RESPONSE; 675 whdr.cid = htonl(hdr->cid);
650 hdr->flags = conn->out_clientflag; 676 whdr.type = RXRPC_PACKET_TYPE_RESPONSE;
651 hdr->userStatus = 0; 677 whdr.flags = conn->out_clientflag;
652 hdr->_rsvd = 0; 678 whdr.securityIndex = hdr->securityIndex;
679 whdr.serviceId = htons(hdr->serviceId);
653 680
654 iov[0].iov_base = hdr; 681 iov[0].iov_base = &whdr;
655 iov[0].iov_len = sizeof(*hdr); 682 iov[0].iov_len = sizeof(whdr);
656 iov[1].iov_base = resp; 683 iov[1].iov_base = resp;
657 iov[1].iov_len = sizeof(*resp); 684 iov[1].iov_len = sizeof(*resp);
658 iov[2].iov_base = (void *) s2->ticket; 685 iov[2].iov_base = (void *)s2->ticket;
659 iov[2].iov_len = s2->ticket_len; 686 iov[2].iov_len = s2->ticket_len;
660 687
661 len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len; 688 len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len;
662 689
663 hdr->serial = htonl(atomic_inc_return(&conn->serial)); 690 serial = atomic_inc_return(&conn->serial);
664 _proto("Tx RESPONSE %%%u", ntohl(hdr->serial)); 691 whdr.serial = htonl(serial);
692 _proto("Tx RESPONSE %%%u", serial);
665 693
666 ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 3, len); 694 ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 3, len);
667 if (ret < 0) { 695 if (ret < 0) {
@@ -718,18 +746,21 @@ static void rxkad_encrypt_response(struct rxrpc_connection *conn,
718 struct rxkad_response *resp, 746 struct rxkad_response *resp,
719 const struct rxkad_key *s2) 747 const struct rxkad_key *s2)
720{ 748{
721 struct blkcipher_desc desc; 749 SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
722 struct rxrpc_crypt iv; 750 struct rxrpc_crypt iv;
723 struct scatterlist sg[2]; 751 struct scatterlist sg[2];
724 752
725 /* continue encrypting from where we left off */ 753 /* continue encrypting from where we left off */
726 memcpy(&iv, s2->session_key, sizeof(iv)); 754 memcpy(&iv, s2->session_key, sizeof(iv));
727 desc.tfm = conn->cipher;
728 desc.info = iv.x;
729 desc.flags = 0;
730 755
731 rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted)); 756 rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted));
732 crypto_blkcipher_encrypt_iv(&desc, sg, sg, sizeof(resp->encrypted)); 757
758 skcipher_request_set_tfm(req, conn->cipher);
759 skcipher_request_set_callback(req, 0, NULL, NULL);
760 skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
761
762 crypto_skcipher_encrypt(req);
763 skcipher_request_zero(req);
733} 764}
734 765
735/* 766/*
@@ -770,7 +801,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
770 min_level = ntohl(challenge.min_level); 801 min_level = ntohl(challenge.min_level);
771 802
772 _proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }", 803 _proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }",
773 ntohl(sp->hdr.serial), version, nonce, min_level); 804 sp->hdr.serial, version, nonce, min_level);
774 805
775 abort_code = RXKADINCONSISTENCY; 806 abort_code = RXKADINCONSISTENCY;
776 if (version != RXKAD_VERSION) 807 if (version != RXKAD_VERSION)
@@ -785,22 +816,23 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
785 /* build the response packet */ 816 /* build the response packet */
786 memset(&resp, 0, sizeof(resp)); 817 memset(&resp, 0, sizeof(resp));
787 818
788 resp.version = RXKAD_VERSION; 819 resp.version = htonl(RXKAD_VERSION);
789 resp.encrypted.epoch = conn->epoch; 820 resp.encrypted.epoch = htonl(conn->epoch);
790 resp.encrypted.cid = conn->cid; 821 resp.encrypted.cid = htonl(conn->cid);
791 resp.encrypted.securityIndex = htonl(conn->security_ix); 822 resp.encrypted.securityIndex = htonl(conn->security_ix);
823 resp.encrypted.inc_nonce = htonl(nonce + 1);
824 resp.encrypted.level = htonl(conn->security_level);
825 resp.kvno = htonl(token->kad->kvno);
826 resp.ticket_len = htonl(token->kad->ticket_len);
827
792 resp.encrypted.call_id[0] = 828 resp.encrypted.call_id[0] =
793 (conn->channels[0] ? conn->channels[0]->call_id : 0); 829 htonl(conn->channels[0] ? conn->channels[0]->call_id : 0);
794 resp.encrypted.call_id[1] = 830 resp.encrypted.call_id[1] =
795 (conn->channels[1] ? conn->channels[1]->call_id : 0); 831 htonl(conn->channels[1] ? conn->channels[1]->call_id : 0);
796 resp.encrypted.call_id[2] = 832 resp.encrypted.call_id[2] =
797 (conn->channels[2] ? conn->channels[2]->call_id : 0); 833 htonl(conn->channels[2] ? conn->channels[2]->call_id : 0);
798 resp.encrypted.call_id[3] = 834 resp.encrypted.call_id[3] =
799 (conn->channels[3] ? conn->channels[3]->call_id : 0); 835 htonl(conn->channels[3] ? conn->channels[3]->call_id : 0);
800 resp.encrypted.inc_nonce = htonl(nonce + 1);
801 resp.encrypted.level = htonl(conn->security_level);
802 resp.kvno = htonl(token->kad->kvno);
803 resp.ticket_len = htonl(token->kad->ticket_len);
804 836
805 /* calculate the response checksum and then do the encryption */ 837 /* calculate the response checksum and then do the encryption */
806 rxkad_calc_response_checksum(&resp); 838 rxkad_calc_response_checksum(&resp);
@@ -822,7 +854,7 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
822 time_t *_expiry, 854 time_t *_expiry,
823 u32 *_abort_code) 855 u32 *_abort_code)
824{ 856{
825 struct blkcipher_desc desc; 857 struct skcipher_request *req;
826 struct rxrpc_crypt iv, key; 858 struct rxrpc_crypt iv, key;
827 struct scatterlist sg[1]; 859 struct scatterlist sg[1];
828 struct in_addr addr; 860 struct in_addr addr;
@@ -853,12 +885,21 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
853 885
854 memcpy(&iv, &conn->server_key->payload.data[2], sizeof(iv)); 886 memcpy(&iv, &conn->server_key->payload.data[2], sizeof(iv));
855 887
856 desc.tfm = conn->server_key->payload.data[0]; 888 req = skcipher_request_alloc(conn->server_key->payload.data[0],
857 desc.info = iv.x; 889 GFP_NOFS);
858 desc.flags = 0; 890 if (!req) {
891 *_abort_code = RXKADNOAUTH;
892 ret = -ENOMEM;
893 goto error;
894 }
859 895
860 sg_init_one(&sg[0], ticket, ticket_len); 896 sg_init_one(&sg[0], ticket, ticket_len);
861 crypto_blkcipher_decrypt_iv(&desc, sg, sg, ticket_len); 897
898 skcipher_request_set_callback(req, 0, NULL, NULL);
899 skcipher_request_set_crypt(req, sg, sg, ticket_len, iv.x);
900
901 crypto_skcipher_decrypt(req);
902 skcipher_request_free(req);
862 903
863 p = ticket; 904 p = ticket;
864 end = p + ticket_len; 905 end = p + ticket_len;
@@ -966,7 +1007,7 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
966 struct rxkad_response *resp, 1007 struct rxkad_response *resp,
967 const struct rxrpc_crypt *session_key) 1008 const struct rxrpc_crypt *session_key)
968{ 1009{
969 struct blkcipher_desc desc; 1010 SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci);
970 struct scatterlist sg[2]; 1011 struct scatterlist sg[2];
971 struct rxrpc_crypt iv; 1012 struct rxrpc_crypt iv;
972 1013
@@ -976,17 +1017,21 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
976 ASSERT(rxkad_ci != NULL); 1017 ASSERT(rxkad_ci != NULL);
977 1018
978 mutex_lock(&rxkad_ci_mutex); 1019 mutex_lock(&rxkad_ci_mutex);
979 if (crypto_blkcipher_setkey(rxkad_ci, session_key->x, 1020 if (crypto_skcipher_setkey(rxkad_ci, session_key->x,
980 sizeof(*session_key)) < 0) 1021 sizeof(*session_key)) < 0)
981 BUG(); 1022 BUG();
982 1023
983 memcpy(&iv, session_key, sizeof(iv)); 1024 memcpy(&iv, session_key, sizeof(iv));
984 desc.tfm = rxkad_ci;
985 desc.info = iv.x;
986 desc.flags = 0;
987 1025
988 rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted)); 1026 rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted));
989 crypto_blkcipher_decrypt_iv(&desc, sg, sg, sizeof(resp->encrypted)); 1027
1028 skcipher_request_set_tfm(req, rxkad_ci);
1029 skcipher_request_set_callback(req, 0, NULL, NULL);
1030 skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
1031
1032 crypto_skcipher_decrypt(req);
1033 skcipher_request_zero(req);
1034
990 mutex_unlock(&rxkad_ci_mutex); 1035 mutex_unlock(&rxkad_ci_mutex);
991 1036
992 _leave(""); 1037 _leave("");
@@ -1022,7 +1067,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
1022 kvno = ntohl(response.kvno); 1067 kvno = ntohl(response.kvno);
1023 sp = rxrpc_skb(skb); 1068 sp = rxrpc_skb(skb);
1024 _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }", 1069 _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }",
1025 ntohl(sp->hdr.serial), version, kvno, ticket_len); 1070 sp->hdr.serial, version, kvno, ticket_len);
1026 1071
1027 abort_code = RXKADINCONSISTENCY; 1072 abort_code = RXKADINCONSISTENCY;
1028 if (version != RXKAD_VERSION) 1073 if (version != RXKAD_VERSION)
@@ -1058,9 +1103,9 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
1058 rxkad_decrypt_response(conn, &response, &session_key); 1103 rxkad_decrypt_response(conn, &response, &session_key);
1059 1104
1060 abort_code = RXKADSEALEDINCON; 1105 abort_code = RXKADSEALEDINCON;
1061 if (response.encrypted.epoch != conn->epoch) 1106 if (ntohl(response.encrypted.epoch) != conn->epoch)
1062 goto protocol_error_free; 1107 goto protocol_error_free;
1063 if (response.encrypted.cid != conn->cid) 1108 if (ntohl(response.encrypted.cid) != conn->cid)
1064 goto protocol_error_free; 1109 goto protocol_error_free;
1065 if (ntohl(response.encrypted.securityIndex) != conn->security_ix) 1110 if (ntohl(response.encrypted.securityIndex) != conn->security_ix)
1066 goto protocol_error_free; 1111 goto protocol_error_free;
@@ -1077,7 +1122,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
1077 goto protocol_error_free; 1122 goto protocol_error_free;
1078 1123
1079 abort_code = RXKADOUTOFSEQUENCE; 1124 abort_code = RXKADOUTOFSEQUENCE;
1080 if (response.encrypted.inc_nonce != htonl(conn->security_nonce + 1)) 1125 if (ntohl(response.encrypted.inc_nonce) != conn->security_nonce + 1)
1081 goto protocol_error_free; 1126 goto protocol_error_free;
1082 1127
1083 abort_code = RXKADLEVELFAIL; 1128 abort_code = RXKADLEVELFAIL;
@@ -1115,7 +1160,7 @@ static void rxkad_clear(struct rxrpc_connection *conn)
1115 _enter(""); 1160 _enter("");
1116 1161
1117 if (conn->cipher) 1162 if (conn->cipher)
1118 crypto_free_blkcipher(conn->cipher); 1163 crypto_free_skcipher(conn->cipher);
1119} 1164}
1120 1165
1121/* 1166/*
@@ -1141,7 +1186,7 @@ static __init int rxkad_init(void)
1141 1186
1142 /* pin the cipher we need so that the crypto layer doesn't invoke 1187 /* pin the cipher we need so that the crypto layer doesn't invoke
1143 * keventd to go get it */ 1188 * keventd to go get it */
1144 rxkad_ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC); 1189 rxkad_ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
1145 if (IS_ERR(rxkad_ci)) 1190 if (IS_ERR(rxkad_ci))
1146 return PTR_ERR(rxkad_ci); 1191 return PTR_ERR(rxkad_ci);
1147 1192
@@ -1155,7 +1200,7 @@ static __exit void rxkad_exit(void)
1155 _enter(""); 1200 _enter("");
1156 1201
1157 rxrpc_unregister_security(&rxkad); 1202 rxrpc_unregister_security(&rxkad);
1158 crypto_free_blkcipher(rxkad_ci); 1203 crypto_free_skcipher(rxkad_ci);
1159} 1204}
1160 1205
1161module_exit(rxkad_exit); 1206module_exit(rxkad_exit);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 50a98a910eb1..d20ed575acf4 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -15,11 +15,11 @@
15#include "ar-internal.h" 15#include "ar-internal.h"
16 16
17static struct ctl_table_header *rxrpc_sysctl_reg_table; 17static struct ctl_table_header *rxrpc_sysctl_reg_table;
18static const unsigned zero = 0; 18static const unsigned int zero = 0;
19static const unsigned one = 1; 19static const unsigned int one = 1;
20static const unsigned four = 4; 20static const unsigned int four = 4;
21static const unsigned n_65535 = 65535; 21static const unsigned int n_65535 = 65535;
22static const unsigned n_max_acks = RXRPC_MAXACKS; 22static const unsigned int n_max_acks = RXRPC_MAXACKS;
23 23
24/* 24/*
25 * RxRPC operating parameters. 25 * RxRPC operating parameters.
@@ -32,7 +32,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
32 { 32 {
33 .procname = "req_ack_delay", 33 .procname = "req_ack_delay",
34 .data = &rxrpc_requested_ack_delay, 34 .data = &rxrpc_requested_ack_delay,
35 .maxlen = sizeof(unsigned), 35 .maxlen = sizeof(unsigned int),
36 .mode = 0644, 36 .mode = 0644,
37 .proc_handler = proc_dointvec_ms_jiffies, 37 .proc_handler = proc_dointvec_ms_jiffies,
38 .extra1 = (void *)&zero, 38 .extra1 = (void *)&zero,
@@ -40,7 +40,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
40 { 40 {
41 .procname = "soft_ack_delay", 41 .procname = "soft_ack_delay",
42 .data = &rxrpc_soft_ack_delay, 42 .data = &rxrpc_soft_ack_delay,
43 .maxlen = sizeof(unsigned), 43 .maxlen = sizeof(unsigned int),
44 .mode = 0644, 44 .mode = 0644,
45 .proc_handler = proc_dointvec_ms_jiffies, 45 .proc_handler = proc_dointvec_ms_jiffies,
46 .extra1 = (void *)&one, 46 .extra1 = (void *)&one,
@@ -48,7 +48,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
48 { 48 {
49 .procname = "idle_ack_delay", 49 .procname = "idle_ack_delay",
50 .data = &rxrpc_idle_ack_delay, 50 .data = &rxrpc_idle_ack_delay,
51 .maxlen = sizeof(unsigned), 51 .maxlen = sizeof(unsigned int),
52 .mode = 0644, 52 .mode = 0644,
53 .proc_handler = proc_dointvec_ms_jiffies, 53 .proc_handler = proc_dointvec_ms_jiffies,
54 .extra1 = (void *)&one, 54 .extra1 = (void *)&one,
@@ -56,7 +56,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
56 { 56 {
57 .procname = "resend_timeout", 57 .procname = "resend_timeout",
58 .data = &rxrpc_resend_timeout, 58 .data = &rxrpc_resend_timeout,
59 .maxlen = sizeof(unsigned), 59 .maxlen = sizeof(unsigned int),
60 .mode = 0644, 60 .mode = 0644,
61 .proc_handler = proc_dointvec_ms_jiffies, 61 .proc_handler = proc_dointvec_ms_jiffies,
62 .extra1 = (void *)&one, 62 .extra1 = (void *)&one,
@@ -66,7 +66,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
66 { 66 {
67 .procname = "max_call_lifetime", 67 .procname = "max_call_lifetime",
68 .data = &rxrpc_max_call_lifetime, 68 .data = &rxrpc_max_call_lifetime,
69 .maxlen = sizeof(unsigned), 69 .maxlen = sizeof(unsigned int),
70 .mode = 0644, 70 .mode = 0644,
71 .proc_handler = proc_dointvec_jiffies, 71 .proc_handler = proc_dointvec_jiffies,
72 .extra1 = (void *)&one, 72 .extra1 = (void *)&one,
@@ -74,7 +74,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
74 { 74 {
75 .procname = "dead_call_expiry", 75 .procname = "dead_call_expiry",
76 .data = &rxrpc_dead_call_expiry, 76 .data = &rxrpc_dead_call_expiry,
77 .maxlen = sizeof(unsigned), 77 .maxlen = sizeof(unsigned int),
78 .mode = 0644, 78 .mode = 0644,
79 .proc_handler = proc_dointvec_jiffies, 79 .proc_handler = proc_dointvec_jiffies,
80 .extra1 = (void *)&one, 80 .extra1 = (void *)&one,
@@ -84,7 +84,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
84 { 84 {
85 .procname = "connection_expiry", 85 .procname = "connection_expiry",
86 .data = &rxrpc_connection_expiry, 86 .data = &rxrpc_connection_expiry,
87 .maxlen = sizeof(unsigned), 87 .maxlen = sizeof(unsigned int),
88 .mode = 0644, 88 .mode = 0644,
89 .proc_handler = proc_dointvec_minmax, 89 .proc_handler = proc_dointvec_minmax,
90 .extra1 = (void *)&one, 90 .extra1 = (void *)&one,
@@ -92,7 +92,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
92 { 92 {
93 .procname = "transport_expiry", 93 .procname = "transport_expiry",
94 .data = &rxrpc_transport_expiry, 94 .data = &rxrpc_transport_expiry,
95 .maxlen = sizeof(unsigned), 95 .maxlen = sizeof(unsigned int),
96 .mode = 0644, 96 .mode = 0644,
97 .proc_handler = proc_dointvec_minmax, 97 .proc_handler = proc_dointvec_minmax,
98 .extra1 = (void *)&one, 98 .extra1 = (void *)&one,
@@ -102,7 +102,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
102 { 102 {
103 .procname = "rx_window_size", 103 .procname = "rx_window_size",
104 .data = &rxrpc_rx_window_size, 104 .data = &rxrpc_rx_window_size,
105 .maxlen = sizeof(unsigned), 105 .maxlen = sizeof(unsigned int),
106 .mode = 0644, 106 .mode = 0644,
107 .proc_handler = proc_dointvec_minmax, 107 .proc_handler = proc_dointvec_minmax,
108 .extra1 = (void *)&one, 108 .extra1 = (void *)&one,
@@ -111,16 +111,16 @@ static struct ctl_table rxrpc_sysctl_table[] = {
111 { 111 {
112 .procname = "rx_mtu", 112 .procname = "rx_mtu",
113 .data = &rxrpc_rx_mtu, 113 .data = &rxrpc_rx_mtu,
114 .maxlen = sizeof(unsigned), 114 .maxlen = sizeof(unsigned int),
115 .mode = 0644, 115 .mode = 0644,
116 .proc_handler = proc_dointvec_minmax, 116 .proc_handler = proc_dointvec_minmax,
117 .extra1 = (void *)&one, 117 .extra1 = (void *)&one,
118 .extra1 = (void *)&n_65535, 118 .extra2 = (void *)&n_65535,
119 }, 119 },
120 { 120 {
121 .procname = "rx_jumbo_max", 121 .procname = "rx_jumbo_max",
122 .data = &rxrpc_rx_jumbo_max, 122 .data = &rxrpc_rx_jumbo_max,
123 .maxlen = sizeof(unsigned), 123 .maxlen = sizeof(unsigned int),
124 .mode = 0644, 124 .mode = 0644,
125 .proc_handler = proc_dointvec_minmax, 125 .proc_handler = proc_dointvec_minmax,
126 .extra1 = (void *)&one, 126 .extra1 = (void *)&one,
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 82830824fb1f..b148302bbaf2 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -739,6 +739,28 @@ config NET_ACT_CONNMARK
739 To compile this code as a module, choose M here: the 739 To compile this code as a module, choose M here: the
740 module will be called act_connmark. 740 module will be called act_connmark.
741 741
742config NET_ACT_IFE
743 tristate "Inter-FE action based on IETF ForCES InterFE LFB"
744 depends on NET_CLS_ACT
745 ---help---
746 Say Y here to allow for sourcing and terminating metadata
747 For details refer to netdev01 paper:
748 "Distributing Linux Traffic Control Classifier-Action Subsystem"
749 Authors: Jamal Hadi Salim and Damascene M. Joachimpillai
750
751 To compile this code as a module, choose M here: the
752 module will be called act_ife.
753
754config NET_IFE_SKBMARK
755 tristate "Support to encoding decoding skb mark on IFE action"
756 depends on NET_ACT_IFE
757 ---help---
758
759config NET_IFE_SKBPRIO
760 tristate "Support to encoding decoding skb prio on IFE action"
761 depends on NET_ACT_IFE
762 ---help---
763
742config NET_CLS_IND 764config NET_CLS_IND
743 bool "Incoming device classification" 765 bool "Incoming device classification"
744 depends on NET_CLS_U32 || NET_CLS_FW 766 depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 690c1689e090..84bddb373517 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -19,6 +19,9 @@ obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o
19obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o 19obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o
20obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o 20obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o
21obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o 21obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o
22obj-$(CONFIG_NET_ACT_IFE) += act_ife.o
23obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o
24obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o
22obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o 25obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
23obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o 26obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
24obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o 27obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 06e7c4a37245..96066665e376 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -36,10 +36,9 @@ static void free_tcf(struct rcu_head *head)
36 kfree(p); 36 kfree(p);
37} 37}
38 38
39static void tcf_hash_destroy(struct tc_action *a) 39static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *a)
40{ 40{
41 struct tcf_common *p = a->priv; 41 struct tcf_common *p = a->priv;
42 struct tcf_hashinfo *hinfo = a->ops->hinfo;
43 42
44 spin_lock_bh(&hinfo->lock); 43 spin_lock_bh(&hinfo->lock);
45 hlist_del(&p->tcfc_head); 44 hlist_del(&p->tcfc_head);
@@ -68,8 +67,8 @@ int __tcf_hash_release(struct tc_action *a, bool bind, bool strict)
68 if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) { 67 if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) {
69 if (a->ops->cleanup) 68 if (a->ops->cleanup)
70 a->ops->cleanup(a, bind); 69 a->ops->cleanup(a, bind);
71 tcf_hash_destroy(a); 70 tcf_hash_destroy(a->hinfo, a);
72 ret = 1; 71 ret = ACT_P_DELETED;
73 } 72 }
74 } 73 }
75 74
@@ -77,10 +76,9 @@ int __tcf_hash_release(struct tc_action *a, bool bind, bool strict)
77} 76}
78EXPORT_SYMBOL(__tcf_hash_release); 77EXPORT_SYMBOL(__tcf_hash_release);
79 78
80static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, 79static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
81 struct tc_action *a) 80 struct netlink_callback *cb, struct tc_action *a)
82{ 81{
83 struct tcf_hashinfo *hinfo = a->ops->hinfo;
84 struct hlist_head *head; 82 struct hlist_head *head;
85 struct tcf_common *p; 83 struct tcf_common *p;
86 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; 84 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
@@ -126,9 +124,9 @@ nla_put_failure:
126 goto done; 124 goto done;
127} 125}
128 126
129static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a) 127static int tcf_del_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
128 struct tc_action *a)
130{ 129{
131 struct tcf_hashinfo *hinfo = a->ops->hinfo;
132 struct hlist_head *head; 130 struct hlist_head *head;
133 struct hlist_node *n; 131 struct hlist_node *n;
134 struct tcf_common *p; 132 struct tcf_common *p;
@@ -163,18 +161,24 @@ nla_put_failure:
163 return ret; 161 return ret;
164} 162}
165 163
166static int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, 164int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
167 int type, struct tc_action *a) 165 struct netlink_callback *cb, int type,
166 struct tc_action *a)
168{ 167{
168 struct tcf_hashinfo *hinfo = tn->hinfo;
169
170 a->hinfo = hinfo;
171
169 if (type == RTM_DELACTION) { 172 if (type == RTM_DELACTION) {
170 return tcf_del_walker(skb, a); 173 return tcf_del_walker(hinfo, skb, a);
171 } else if (type == RTM_GETACTION) { 174 } else if (type == RTM_GETACTION) {
172 return tcf_dump_walker(skb, cb, a); 175 return tcf_dump_walker(hinfo, skb, cb, a);
173 } else { 176 } else {
174 WARN(1, "tcf_generic_walker: unknown action %d\n", type); 177 WARN(1, "tcf_generic_walker: unknown action %d\n", type);
175 return -EINVAL; 178 return -EINVAL;
176 } 179 }
177} 180}
181EXPORT_SYMBOL(tcf_generic_walker);
178 182
179static struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo) 183static struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
180{ 184{
@@ -191,8 +195,9 @@ static struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
191 return p; 195 return p;
192} 196}
193 197
194u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo) 198u32 tcf_hash_new_index(struct tc_action_net *tn)
195{ 199{
200 struct tcf_hashinfo *hinfo = tn->hinfo;
196 u32 val = hinfo->index; 201 u32 val = hinfo->index;
197 202
198 do { 203 do {
@@ -205,28 +210,31 @@ u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo)
205} 210}
206EXPORT_SYMBOL(tcf_hash_new_index); 211EXPORT_SYMBOL(tcf_hash_new_index);
207 212
208int tcf_hash_search(struct tc_action *a, u32 index) 213int tcf_hash_search(struct tc_action_net *tn, struct tc_action *a, u32 index)
209{ 214{
210 struct tcf_hashinfo *hinfo = a->ops->hinfo; 215 struct tcf_hashinfo *hinfo = tn->hinfo;
211 struct tcf_common *p = tcf_hash_lookup(index, hinfo); 216 struct tcf_common *p = tcf_hash_lookup(index, hinfo);
212 217
213 if (p) { 218 if (p) {
214 a->priv = p; 219 a->priv = p;
220 a->hinfo = hinfo;
215 return 1; 221 return 1;
216 } 222 }
217 return 0; 223 return 0;
218} 224}
219EXPORT_SYMBOL(tcf_hash_search); 225EXPORT_SYMBOL(tcf_hash_search);
220 226
221int tcf_hash_check(u32 index, struct tc_action *a, int bind) 227int tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a,
228 int bind)
222{ 229{
223 struct tcf_hashinfo *hinfo = a->ops->hinfo; 230 struct tcf_hashinfo *hinfo = tn->hinfo;
224 struct tcf_common *p = NULL; 231 struct tcf_common *p = NULL;
225 if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) { 232 if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) {
226 if (bind) 233 if (bind)
227 p->tcfc_bindcnt++; 234 p->tcfc_bindcnt++;
228 p->tcfc_refcnt++; 235 p->tcfc_refcnt++;
229 a->priv = p; 236 a->priv = p;
237 a->hinfo = hinfo;
230 return 1; 238 return 1;
231 } 239 }
232 return 0; 240 return 0;
@@ -243,11 +251,11 @@ void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
243} 251}
244EXPORT_SYMBOL(tcf_hash_cleanup); 252EXPORT_SYMBOL(tcf_hash_cleanup);
245 253
246int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, 254int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
247 int size, int bind, bool cpustats) 255 struct tc_action *a, int size, int bind, bool cpustats)
248{ 256{
249 struct tcf_hashinfo *hinfo = a->ops->hinfo;
250 struct tcf_common *p = kzalloc(size, GFP_KERNEL); 257 struct tcf_common *p = kzalloc(size, GFP_KERNEL);
258 struct tcf_hashinfo *hinfo = tn->hinfo;
251 int err = -ENOMEM; 259 int err = -ENOMEM;
252 260
253 if (unlikely(!p)) 261 if (unlikely(!p))
@@ -272,7 +280,7 @@ err2:
272 } 280 }
273 spin_lock_init(&p->tcfc_lock); 281 spin_lock_init(&p->tcfc_lock);
274 INIT_HLIST_NODE(&p->tcfc_head); 282 INIT_HLIST_NODE(&p->tcfc_head);
275 p->tcfc_index = index ? index : tcf_hash_new_index(hinfo); 283 p->tcfc_index = index ? index : tcf_hash_new_index(tn);
276 p->tcfc_tm.install = jiffies; 284 p->tcfc_tm.install = jiffies;
277 p->tcfc_tm.lastuse = jiffies; 285 p->tcfc_tm.lastuse = jiffies;
278 if (est) { 286 if (est) {
@@ -286,14 +294,15 @@ err2:
286 } 294 }
287 295
288 a->priv = (void *) p; 296 a->priv = (void *) p;
297 a->hinfo = hinfo;
289 return 0; 298 return 0;
290} 299}
291EXPORT_SYMBOL(tcf_hash_create); 300EXPORT_SYMBOL(tcf_hash_create);
292 301
293void tcf_hash_insert(struct tc_action *a) 302void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a)
294{ 303{
295 struct tcf_common *p = a->priv; 304 struct tcf_common *p = a->priv;
296 struct tcf_hashinfo *hinfo = a->ops->hinfo; 305 struct tcf_hashinfo *hinfo = tn->hinfo;
297 unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); 306 unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
298 307
299 spin_lock_bh(&hinfo->lock); 308 spin_lock_bh(&hinfo->lock);
@@ -302,59 +311,78 @@ void tcf_hash_insert(struct tc_action *a)
302} 311}
303EXPORT_SYMBOL(tcf_hash_insert); 312EXPORT_SYMBOL(tcf_hash_insert);
304 313
314void tcf_hashinfo_destroy(const struct tc_action_ops *ops,
315 struct tcf_hashinfo *hinfo)
316{
317 struct tc_action a = {
318 .ops = ops,
319 .hinfo = hinfo,
320 };
321 int i;
322
323 for (i = 0; i < hinfo->hmask + 1; i++) {
324 struct tcf_common *p;
325 struct hlist_node *n;
326
327 hlist_for_each_entry_safe(p, n, &hinfo->htab[i], tcfc_head) {
328 int ret;
329
330 a.priv = p;
331 ret = __tcf_hash_release(&a, false, true);
332 if (ret == ACT_P_DELETED)
333 module_put(ops->owner);
334 else if (ret < 0)
335 return;
336 }
337 }
338 kfree(hinfo->htab);
339}
340EXPORT_SYMBOL(tcf_hashinfo_destroy);
341
305static LIST_HEAD(act_base); 342static LIST_HEAD(act_base);
306static DEFINE_RWLOCK(act_mod_lock); 343static DEFINE_RWLOCK(act_mod_lock);
307 344
308int tcf_register_action(struct tc_action_ops *act, unsigned int mask) 345int tcf_register_action(struct tc_action_ops *act,
346 struct pernet_operations *ops)
309{ 347{
310 struct tc_action_ops *a; 348 struct tc_action_ops *a;
311 int err; 349 int ret;
312 350
313 /* Must supply act, dump and init */ 351 if (!act->act || !act->dump || !act->init || !act->walk || !act->lookup)
314 if (!act->act || !act->dump || !act->init)
315 return -EINVAL; 352 return -EINVAL;
316 353
317 /* Supply defaults */
318 if (!act->lookup)
319 act->lookup = tcf_hash_search;
320 if (!act->walk)
321 act->walk = tcf_generic_walker;
322
323 act->hinfo = kmalloc(sizeof(struct tcf_hashinfo), GFP_KERNEL);
324 if (!act->hinfo)
325 return -ENOMEM;
326 err = tcf_hashinfo_init(act->hinfo, mask);
327 if (err) {
328 kfree(act->hinfo);
329 return err;
330 }
331
332 write_lock(&act_mod_lock); 354 write_lock(&act_mod_lock);
333 list_for_each_entry(a, &act_base, head) { 355 list_for_each_entry(a, &act_base, head) {
334 if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) { 356 if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
335 write_unlock(&act_mod_lock); 357 write_unlock(&act_mod_lock);
336 tcf_hashinfo_destroy(act->hinfo);
337 kfree(act->hinfo);
338 return -EEXIST; 358 return -EEXIST;
339 } 359 }
340 } 360 }
341 list_add_tail(&act->head, &act_base); 361 list_add_tail(&act->head, &act_base);
342 write_unlock(&act_mod_lock); 362 write_unlock(&act_mod_lock);
363
364 ret = register_pernet_subsys(ops);
365 if (ret) {
366 tcf_unregister_action(act, ops);
367 return ret;
368 }
369
343 return 0; 370 return 0;
344} 371}
345EXPORT_SYMBOL(tcf_register_action); 372EXPORT_SYMBOL(tcf_register_action);
346 373
347int tcf_unregister_action(struct tc_action_ops *act) 374int tcf_unregister_action(struct tc_action_ops *act,
375 struct pernet_operations *ops)
348{ 376{
349 struct tc_action_ops *a; 377 struct tc_action_ops *a;
350 int err = -ENOENT; 378 int err = -ENOENT;
351 379
380 unregister_pernet_subsys(ops);
381
352 write_lock(&act_mod_lock); 382 write_lock(&act_mod_lock);
353 list_for_each_entry(a, &act_base, head) { 383 list_for_each_entry(a, &act_base, head) {
354 if (a == act) { 384 if (a == act) {
355 list_del(&act->head); 385 list_del(&act->head);
356 tcf_hashinfo_destroy(act->hinfo);
357 kfree(act->hinfo);
358 err = 0; 386 err = 0;
359 break; 387 break;
360 } 388 }
@@ -721,8 +749,8 @@ static struct tc_action *create_a(int i)
721 return act; 749 return act;
722} 750}
723 751
724static struct tc_action * 752static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
725tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid) 753 struct nlmsghdr *n, u32 portid)
726{ 754{
727 struct nlattr *tb[TCA_ACT_MAX + 1]; 755 struct nlattr *tb[TCA_ACT_MAX + 1];
728 struct tc_action *a; 756 struct tc_action *a;
@@ -749,7 +777,7 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid)
749 if (a->ops == NULL) /* could happen in batch of actions */ 777 if (a->ops == NULL) /* could happen in batch of actions */
750 goto err_free; 778 goto err_free;
751 err = -ENOENT; 779 err = -ENOENT;
752 if (a->ops->lookup(a, index) == 0) 780 if (a->ops->lookup(net, a, index) == 0)
753 goto err_mod; 781 goto err_mod;
754 782
755 module_put(a->ops->owner); 783 module_put(a->ops->owner);
@@ -819,7 +847,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
819 if (nest == NULL) 847 if (nest == NULL)
820 goto out_module_put; 848 goto out_module_put;
821 849
822 err = a.ops->walk(skb, &dcb, RTM_DELACTION, &a); 850 err = a.ops->walk(net, skb, &dcb, RTM_DELACTION, &a);
823 if (err < 0) 851 if (err < 0)
824 goto out_module_put; 852 goto out_module_put;
825 if (err == 0) 853 if (err == 0)
@@ -897,7 +925,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
897 } 925 }
898 926
899 for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { 927 for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
900 act = tcf_action_get_1(tb[i], n, portid); 928 act = tcf_action_get_1(net, tb[i], n, portid);
901 if (IS_ERR(act)) { 929 if (IS_ERR(act)) {
902 ret = PTR_ERR(act); 930 ret = PTR_ERR(act);
903 goto err; 931 goto err;
@@ -1044,6 +1072,7 @@ find_dump_kind(const struct nlmsghdr *n)
1044static int 1072static int
1045tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) 1073tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1046{ 1074{
1075 struct net *net = sock_net(skb->sk);
1047 struct nlmsghdr *nlh; 1076 struct nlmsghdr *nlh;
1048 unsigned char *b = skb_tail_pointer(skb); 1077 unsigned char *b = skb_tail_pointer(skb);
1049 struct nlattr *nest; 1078 struct nlattr *nest;
@@ -1078,7 +1107,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1078 if (nest == NULL) 1107 if (nest == NULL)
1079 goto out_module_put; 1108 goto out_module_put;
1080 1109
1081 ret = a_o->walk(skb, cb, RTM_GETACTION, &a); 1110 ret = a_o->walk(net, skb, cb, RTM_GETACTION, &a);
1082 if (ret < 0) 1111 if (ret < 0)
1083 goto out_module_put; 1112 goto out_module_put;
1084 1113
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 0bc6f912f870..8c9f1f0459ab 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -33,6 +33,8 @@ struct tcf_bpf_cfg {
33 bool is_ebpf; 33 bool is_ebpf;
34}; 34};
35 35
36static int bpf_net_id;
37
36static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, 38static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
37 struct tcf_result *res) 39 struct tcf_result *res)
38{ 40{
@@ -275,6 +277,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
275 struct nlattr *est, struct tc_action *act, 277 struct nlattr *est, struct tc_action *act,
276 int replace, int bind) 278 int replace, int bind)
277{ 279{
280 struct tc_action_net *tn = net_generic(net, bpf_net_id);
278 struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; 281 struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
279 struct tcf_bpf_cfg cfg, old; 282 struct tcf_bpf_cfg cfg, old;
280 struct tc_act_bpf *parm; 283 struct tc_act_bpf *parm;
@@ -294,8 +297,8 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
294 297
295 parm = nla_data(tb[TCA_ACT_BPF_PARMS]); 298 parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
296 299
297 if (!tcf_hash_check(parm->index, act, bind)) { 300 if (!tcf_hash_check(tn, parm->index, act, bind)) {
298 ret = tcf_hash_create(parm->index, est, act, 301 ret = tcf_hash_create(tn, parm->index, est, act,
299 sizeof(*prog), bind, true); 302 sizeof(*prog), bind, true);
300 if (ret < 0) 303 if (ret < 0)
301 return ret; 304 return ret;
@@ -344,7 +347,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
344 rcu_assign_pointer(prog->filter, cfg.filter); 347 rcu_assign_pointer(prog->filter, cfg.filter);
345 348
346 if (res == ACT_P_CREATED) { 349 if (res == ACT_P_CREATED) {
347 tcf_hash_insert(act); 350 tcf_hash_insert(tn, act);
348 } else { 351 } else {
349 /* make sure the program being replaced is no longer executing */ 352 /* make sure the program being replaced is no longer executing */
350 synchronize_rcu(); 353 synchronize_rcu();
@@ -367,6 +370,22 @@ static void tcf_bpf_cleanup(struct tc_action *act, int bind)
367 tcf_bpf_cfg_cleanup(&tmp); 370 tcf_bpf_cfg_cleanup(&tmp);
368} 371}
369 372
373static int tcf_bpf_walker(struct net *net, struct sk_buff *skb,
374 struct netlink_callback *cb, int type,
375 struct tc_action *a)
376{
377 struct tc_action_net *tn = net_generic(net, bpf_net_id);
378
379 return tcf_generic_walker(tn, skb, cb, type, a);
380}
381
382static int tcf_bpf_search(struct net *net, struct tc_action *a, u32 index)
383{
384 struct tc_action_net *tn = net_generic(net, bpf_net_id);
385
386 return tcf_hash_search(tn, a, index);
387}
388
370static struct tc_action_ops act_bpf_ops __read_mostly = { 389static struct tc_action_ops act_bpf_ops __read_mostly = {
371 .kind = "bpf", 390 .kind = "bpf",
372 .type = TCA_ACT_BPF, 391 .type = TCA_ACT_BPF,
@@ -375,16 +394,39 @@ static struct tc_action_ops act_bpf_ops __read_mostly = {
375 .dump = tcf_bpf_dump, 394 .dump = tcf_bpf_dump,
376 .cleanup = tcf_bpf_cleanup, 395 .cleanup = tcf_bpf_cleanup,
377 .init = tcf_bpf_init, 396 .init = tcf_bpf_init,
397 .walk = tcf_bpf_walker,
398 .lookup = tcf_bpf_search,
399};
400
401static __net_init int bpf_init_net(struct net *net)
402{
403 struct tc_action_net *tn = net_generic(net, bpf_net_id);
404
405 return tc_action_net_init(tn, &act_bpf_ops, BPF_TAB_MASK);
406}
407
408static void __net_exit bpf_exit_net(struct net *net)
409{
410 struct tc_action_net *tn = net_generic(net, bpf_net_id);
411
412 tc_action_net_exit(tn);
413}
414
415static struct pernet_operations bpf_net_ops = {
416 .init = bpf_init_net,
417 .exit = bpf_exit_net,
418 .id = &bpf_net_id,
419 .size = sizeof(struct tc_action_net),
378}; 420};
379 421
380static int __init bpf_init_module(void) 422static int __init bpf_init_module(void)
381{ 423{
382 return tcf_register_action(&act_bpf_ops, BPF_TAB_MASK); 424 return tcf_register_action(&act_bpf_ops, &bpf_net_ops);
383} 425}
384 426
385static void __exit bpf_cleanup_module(void) 427static void __exit bpf_cleanup_module(void)
386{ 428{
387 tcf_unregister_action(&act_bpf_ops); 429 tcf_unregister_action(&act_bpf_ops, &bpf_net_ops);
388} 430}
389 431
390module_init(bpf_init_module); 432module_init(bpf_init_module);
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index bb41699c6c49..c0ed93ce2391 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -30,6 +30,8 @@
30 30
31#define CONNMARK_TAB_MASK 3 31#define CONNMARK_TAB_MASK 3
32 32
33static int connmark_net_id;
34
33static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, 35static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
34 struct tcf_result *res) 36 struct tcf_result *res)
35{ 37{
@@ -97,6 +99,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
97 struct nlattr *est, struct tc_action *a, 99 struct nlattr *est, struct tc_action *a,
98 int ovr, int bind) 100 int ovr, int bind)
99{ 101{
102 struct tc_action_net *tn = net_generic(net, connmark_net_id);
100 struct nlattr *tb[TCA_CONNMARK_MAX + 1]; 103 struct nlattr *tb[TCA_CONNMARK_MAX + 1];
101 struct tcf_connmark_info *ci; 104 struct tcf_connmark_info *ci;
102 struct tc_connmark *parm; 105 struct tc_connmark *parm;
@@ -111,9 +114,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
111 114
112 parm = nla_data(tb[TCA_CONNMARK_PARMS]); 115 parm = nla_data(tb[TCA_CONNMARK_PARMS]);
113 116
114 if (!tcf_hash_check(parm->index, a, bind)) { 117 if (!tcf_hash_check(tn, parm->index, a, bind)) {
115 ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), 118 ret = tcf_hash_create(tn, parm->index, est, a,
116 bind, false); 119 sizeof(*ci), bind, false);
117 if (ret) 120 if (ret)
118 return ret; 121 return ret;
119 122
@@ -122,7 +125,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
122 ci->net = net; 125 ci->net = net;
123 ci->zone = parm->zone; 126 ci->zone = parm->zone;
124 127
125 tcf_hash_insert(a); 128 tcf_hash_insert(tn, a);
126 ret = ACT_P_CREATED; 129 ret = ACT_P_CREATED;
127 } else { 130 } else {
128 ci = to_connmark(a); 131 ci = to_connmark(a);
@@ -169,6 +172,22 @@ nla_put_failure:
169 return -1; 172 return -1;
170} 173}
171 174
175static int tcf_connmark_walker(struct net *net, struct sk_buff *skb,
176 struct netlink_callback *cb, int type,
177 struct tc_action *a)
178{
179 struct tc_action_net *tn = net_generic(net, connmark_net_id);
180
181 return tcf_generic_walker(tn, skb, cb, type, a);
182}
183
184static int tcf_connmark_search(struct net *net, struct tc_action *a, u32 index)
185{
186 struct tc_action_net *tn = net_generic(net, connmark_net_id);
187
188 return tcf_hash_search(tn, a, index);
189}
190
172static struct tc_action_ops act_connmark_ops = { 191static struct tc_action_ops act_connmark_ops = {
173 .kind = "connmark", 192 .kind = "connmark",
174 .type = TCA_ACT_CONNMARK, 193 .type = TCA_ACT_CONNMARK,
@@ -176,16 +195,39 @@ static struct tc_action_ops act_connmark_ops = {
176 .act = tcf_connmark, 195 .act = tcf_connmark,
177 .dump = tcf_connmark_dump, 196 .dump = tcf_connmark_dump,
178 .init = tcf_connmark_init, 197 .init = tcf_connmark_init,
198 .walk = tcf_connmark_walker,
199 .lookup = tcf_connmark_search,
200};
201
202static __net_init int connmark_init_net(struct net *net)
203{
204 struct tc_action_net *tn = net_generic(net, connmark_net_id);
205
206 return tc_action_net_init(tn, &act_connmark_ops, CONNMARK_TAB_MASK);
207}
208
209static void __net_exit connmark_exit_net(struct net *net)
210{
211 struct tc_action_net *tn = net_generic(net, connmark_net_id);
212
213 tc_action_net_exit(tn);
214}
215
216static struct pernet_operations connmark_net_ops = {
217 .init = connmark_init_net,
218 .exit = connmark_exit_net,
219 .id = &connmark_net_id,
220 .size = sizeof(struct tc_action_net),
179}; 221};
180 222
181static int __init connmark_init_module(void) 223static int __init connmark_init_module(void)
182{ 224{
183 return tcf_register_action(&act_connmark_ops, CONNMARK_TAB_MASK); 225 return tcf_register_action(&act_connmark_ops, &connmark_net_ops);
184} 226}
185 227
186static void __exit connmark_cleanup_module(void) 228static void __exit connmark_cleanup_module(void)
187{ 229{
188 tcf_unregister_action(&act_connmark_ops); 230 tcf_unregister_action(&act_connmark_ops, &connmark_net_ops);
189} 231}
190 232
191module_init(connmark_init_module); 233module_init(connmark_init_module);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index b07c535ba8e7..d22426cdebc0 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -42,9 +42,13 @@ static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
42 [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, 42 [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
43}; 43};
44 44
45static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est, 45static int csum_net_id;
46 struct tc_action *a, int ovr, int bind) 46
47static int tcf_csum_init(struct net *net, struct nlattr *nla,
48 struct nlattr *est, struct tc_action *a, int ovr,
49 int bind)
47{ 50{
51 struct tc_action_net *tn = net_generic(net, csum_net_id);
48 struct nlattr *tb[TCA_CSUM_MAX + 1]; 52 struct nlattr *tb[TCA_CSUM_MAX + 1];
49 struct tc_csum *parm; 53 struct tc_csum *parm;
50 struct tcf_csum *p; 54 struct tcf_csum *p;
@@ -61,9 +65,9 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
61 return -EINVAL; 65 return -EINVAL;
62 parm = nla_data(tb[TCA_CSUM_PARMS]); 66 parm = nla_data(tb[TCA_CSUM_PARMS]);
63 67
64 if (!tcf_hash_check(parm->index, a, bind)) { 68 if (!tcf_hash_check(tn, parm->index, a, bind)) {
65 ret = tcf_hash_create(parm->index, est, a, sizeof(*p), 69 ret = tcf_hash_create(tn, parm->index, est, a,
66 bind, false); 70 sizeof(*p), bind, false);
67 if (ret) 71 if (ret)
68 return ret; 72 return ret;
69 ret = ACT_P_CREATED; 73 ret = ACT_P_CREATED;
@@ -82,7 +86,7 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
82 spin_unlock_bh(&p->tcf_lock); 86 spin_unlock_bh(&p->tcf_lock);
83 87
84 if (ret == ACT_P_CREATED) 88 if (ret == ACT_P_CREATED)
85 tcf_hash_insert(a); 89 tcf_hash_insert(tn, a);
86 90
87 return ret; 91 return ret;
88} 92}
@@ -105,9 +109,7 @@ static void *tcf_csum_skb_nextlayer(struct sk_buff *skb,
105 int hl = ihl + jhl; 109 int hl = ihl + jhl;
106 110
107 if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) || 111 if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) ||
108 (skb_cloned(skb) && 112 skb_try_make_writable(skb, hl + ntkoff))
109 !skb_clone_writable(skb, hl + ntkoff) &&
110 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
111 return NULL; 113 return NULL;
112 else 114 else
113 return (void *)(skb_network_header(skb) + ihl); 115 return (void *)(skb_network_header(skb) + ihl);
@@ -365,9 +367,7 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
365 } 367 }
366 368
367 if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) { 369 if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) {
368 if (skb_cloned(skb) && 370 if (skb_try_make_writable(skb, sizeof(*iph) + ntkoff))
369 !skb_clone_writable(skb, sizeof(*iph) + ntkoff) &&
370 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
371 goto fail; 371 goto fail;
372 372
373 ip_send_check(ip_hdr(skb)); 373 ip_send_check(ip_hdr(skb));
@@ -559,6 +559,22 @@ nla_put_failure:
559 return -1; 559 return -1;
560} 560}
561 561
562static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
563 struct netlink_callback *cb, int type,
564 struct tc_action *a)
565{
566 struct tc_action_net *tn = net_generic(net, csum_net_id);
567
568 return tcf_generic_walker(tn, skb, cb, type, a);
569}
570
571static int tcf_csum_search(struct net *net, struct tc_action *a, u32 index)
572{
573 struct tc_action_net *tn = net_generic(net, csum_net_id);
574
575 return tcf_hash_search(tn, a, index);
576}
577
562static struct tc_action_ops act_csum_ops = { 578static struct tc_action_ops act_csum_ops = {
563 .kind = "csum", 579 .kind = "csum",
564 .type = TCA_ACT_CSUM, 580 .type = TCA_ACT_CSUM,
@@ -566,6 +582,29 @@ static struct tc_action_ops act_csum_ops = {
566 .act = tcf_csum, 582 .act = tcf_csum,
567 .dump = tcf_csum_dump, 583 .dump = tcf_csum_dump,
568 .init = tcf_csum_init, 584 .init = tcf_csum_init,
585 .walk = tcf_csum_walker,
586 .lookup = tcf_csum_search,
587};
588
589static __net_init int csum_init_net(struct net *net)
590{
591 struct tc_action_net *tn = net_generic(net, csum_net_id);
592
593 return tc_action_net_init(tn, &act_csum_ops, CSUM_TAB_MASK);
594}
595
596static void __net_exit csum_exit_net(struct net *net)
597{
598 struct tc_action_net *tn = net_generic(net, csum_net_id);
599
600 tc_action_net_exit(tn);
601}
602
603static struct pernet_operations csum_net_ops = {
604 .init = csum_init_net,
605 .exit = csum_exit_net,
606 .id = &csum_net_id,
607 .size = sizeof(struct tc_action_net),
569}; 608};
570 609
571MODULE_DESCRIPTION("Checksum updating actions"); 610MODULE_DESCRIPTION("Checksum updating actions");
@@ -573,12 +612,12 @@ MODULE_LICENSE("GPL");
573 612
574static int __init csum_init_module(void) 613static int __init csum_init_module(void)
575{ 614{
576 return tcf_register_action(&act_csum_ops, CSUM_TAB_MASK); 615 return tcf_register_action(&act_csum_ops, &csum_net_ops);
577} 616}
578 617
579static void __exit csum_cleanup_module(void) 618static void __exit csum_cleanup_module(void)
580{ 619{
581 tcf_unregister_action(&act_csum_ops); 620 tcf_unregister_action(&act_csum_ops, &csum_net_ops);
582} 621}
583 622
584module_init(csum_init_module); 623module_init(csum_init_module);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 5c1b05170736..887fc1f209ff 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -25,6 +25,8 @@
25 25
26#define GACT_TAB_MASK 15 26#define GACT_TAB_MASK 15
27 27
28static int gact_net_id;
29
28#ifdef CONFIG_GACT_PROB 30#ifdef CONFIG_GACT_PROB
29static int gact_net_rand(struct tcf_gact *gact) 31static int gact_net_rand(struct tcf_gact *gact)
30{ 32{
@@ -57,6 +59,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
57 struct nlattr *est, struct tc_action *a, 59 struct nlattr *est, struct tc_action *a,
58 int ovr, int bind) 60 int ovr, int bind)
59{ 61{
62 struct tc_action_net *tn = net_generic(net, gact_net_id);
60 struct nlattr *tb[TCA_GACT_MAX + 1]; 63 struct nlattr *tb[TCA_GACT_MAX + 1];
61 struct tc_gact *parm; 64 struct tc_gact *parm;
62 struct tcf_gact *gact; 65 struct tcf_gact *gact;
@@ -88,9 +91,9 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
88 } 91 }
89#endif 92#endif
90 93
91 if (!tcf_hash_check(parm->index, a, bind)) { 94 if (!tcf_hash_check(tn, parm->index, a, bind)) {
92 ret = tcf_hash_create(parm->index, est, a, sizeof(*gact), 95 ret = tcf_hash_create(tn, parm->index, est, a,
93 bind, true); 96 sizeof(*gact), bind, true);
94 if (ret) 97 if (ret)
95 return ret; 98 return ret;
96 ret = ACT_P_CREATED; 99 ret = ACT_P_CREATED;
@@ -118,7 +121,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
118 } 121 }
119#endif 122#endif
120 if (ret == ACT_P_CREATED) 123 if (ret == ACT_P_CREATED)
121 tcf_hash_insert(a); 124 tcf_hash_insert(tn, a);
122 return ret; 125 return ret;
123} 126}
124 127
@@ -183,6 +186,22 @@ nla_put_failure:
183 return -1; 186 return -1;
184} 187}
185 188
189static int tcf_gact_walker(struct net *net, struct sk_buff *skb,
190 struct netlink_callback *cb, int type,
191 struct tc_action *a)
192{
193 struct tc_action_net *tn = net_generic(net, gact_net_id);
194
195 return tcf_generic_walker(tn, skb, cb, type, a);
196}
197
198static int tcf_gact_search(struct net *net, struct tc_action *a, u32 index)
199{
200 struct tc_action_net *tn = net_generic(net, gact_net_id);
201
202 return tcf_hash_search(tn, a, index);
203}
204
186static struct tc_action_ops act_gact_ops = { 205static struct tc_action_ops act_gact_ops = {
187 .kind = "gact", 206 .kind = "gact",
188 .type = TCA_ACT_GACT, 207 .type = TCA_ACT_GACT,
@@ -190,6 +209,29 @@ static struct tc_action_ops act_gact_ops = {
190 .act = tcf_gact, 209 .act = tcf_gact,
191 .dump = tcf_gact_dump, 210 .dump = tcf_gact_dump,
192 .init = tcf_gact_init, 211 .init = tcf_gact_init,
212 .walk = tcf_gact_walker,
213 .lookup = tcf_gact_search,
214};
215
216static __net_init int gact_init_net(struct net *net)
217{
218 struct tc_action_net *tn = net_generic(net, gact_net_id);
219
220 return tc_action_net_init(tn, &act_gact_ops, GACT_TAB_MASK);
221}
222
223static void __net_exit gact_exit_net(struct net *net)
224{
225 struct tc_action_net *tn = net_generic(net, gact_net_id);
226
227 tc_action_net_exit(tn);
228}
229
230static struct pernet_operations gact_net_ops = {
231 .init = gact_init_net,
232 .exit = gact_exit_net,
233 .id = &gact_net_id,
234 .size = sizeof(struct tc_action_net),
193}; 235};
194 236
195MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); 237MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
@@ -203,12 +245,13 @@ static int __init gact_init_module(void)
203#else 245#else
204 pr_info("GACT probability NOT on\n"); 246 pr_info("GACT probability NOT on\n");
205#endif 247#endif
206 return tcf_register_action(&act_gact_ops, GACT_TAB_MASK); 248
249 return tcf_register_action(&act_gact_ops, &gact_net_ops);
207} 250}
208 251
209static void __exit gact_cleanup_module(void) 252static void __exit gact_cleanup_module(void)
210{ 253{
211 tcf_unregister_action(&act_gact_ops); 254 tcf_unregister_action(&act_gact_ops, &gact_net_ops);
212} 255}
213 256
214module_init(gact_init_module); 257module_init(gact_init_module);
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
new file mode 100644
index 000000000000..343d011aa818
--- /dev/null
+++ b/net/sched/act_ife.c
@@ -0,0 +1,876 @@
1/*
2 * net/sched/ife.c Inter-FE action based on ForCES WG InterFE LFB
3 *
4 * Refer to:
5 * draft-ietf-forces-interfelfb-03
6 * and
7 * netdev01 paper:
8 * "Distributing Linux Traffic Control Classifier-Action
9 * Subsystem"
10 * Authors: Jamal Hadi Salim and Damascene M. Joachimpillai
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * copyright Jamal Hadi Salim (2015)
18 *
19*/
20
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/string.h>
24#include <linux/errno.h>
25#include <linux/skbuff.h>
26#include <linux/rtnetlink.h>
27#include <linux/module.h>
28#include <linux/init.h>
29#include <net/net_namespace.h>
30#include <net/netlink.h>
31#include <net/pkt_sched.h>
32#include <uapi/linux/tc_act/tc_ife.h>
33#include <net/tc_act/tc_ife.h>
34#include <linux/etherdevice.h>
35
36#define IFE_TAB_MASK 15
37
38static int ife_net_id;
39static int max_metacnt = IFE_META_MAX + 1;
40
41static const struct nla_policy ife_policy[TCA_IFE_MAX + 1] = {
42 [TCA_IFE_PARMS] = { .len = sizeof(struct tc_ife)},
43 [TCA_IFE_DMAC] = { .len = ETH_ALEN},
44 [TCA_IFE_SMAC] = { .len = ETH_ALEN},
45 [TCA_IFE_TYPE] = { .type = NLA_U16},
46};
47
48/* Caller takes care of presenting data in network order
49*/
50int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval)
51{
52 u32 *tlv = (u32 *)(skbdata);
53 u16 totlen = nla_total_size(dlen); /*alignment + hdr */
54 char *dptr = (char *)tlv + NLA_HDRLEN;
55 u32 htlv = attrtype << 16 | totlen;
56
57 *tlv = htonl(htlv);
58 memset(dptr, 0, totlen - NLA_HDRLEN);
59 memcpy(dptr, dval, dlen);
60
61 return totlen;
62}
63EXPORT_SYMBOL_GPL(ife_tlv_meta_encode);
64
65int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi)
66{
67 if (mi->metaval)
68 return nla_put_u32(skb, mi->metaid, *(u32 *)mi->metaval);
69 else
70 return nla_put(skb, mi->metaid, 0, NULL);
71}
72EXPORT_SYMBOL_GPL(ife_get_meta_u32);
73
74int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi)
75{
76 if (metaval || mi->metaval)
77 return 8; /* T+L+V == 2+2+4 */
78
79 return 0;
80}
81EXPORT_SYMBOL_GPL(ife_check_meta_u32);
82
83int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi)
84{
85 u32 edata = metaval;
86
87 if (mi->metaval)
88 edata = *(u32 *)mi->metaval;
89 else if (metaval)
90 edata = metaval;
91
92 if (!edata) /* will not encode */
93 return 0;
94
95 edata = htonl(edata);
96 return ife_tlv_meta_encode(skbdata, mi->metaid, 4, &edata);
97}
98EXPORT_SYMBOL_GPL(ife_encode_meta_u32);
99
100int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi)
101{
102 if (mi->metaval)
103 return nla_put_u16(skb, mi->metaid, *(u16 *)mi->metaval);
104 else
105 return nla_put(skb, mi->metaid, 0, NULL);
106}
107EXPORT_SYMBOL_GPL(ife_get_meta_u16);
108
109int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval)
110{
111 mi->metaval = kmemdup(metaval, sizeof(u32), GFP_KERNEL);
112 if (!mi->metaval)
113 return -ENOMEM;
114
115 return 0;
116}
117EXPORT_SYMBOL_GPL(ife_alloc_meta_u32);
118
119int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval)
120{
121 mi->metaval = kmemdup(metaval, sizeof(u16), GFP_KERNEL);
122 if (!mi->metaval)
123 return -ENOMEM;
124
125 return 0;
126}
127EXPORT_SYMBOL_GPL(ife_alloc_meta_u16);
128
129void ife_release_meta_gen(struct tcf_meta_info *mi)
130{
131 kfree(mi->metaval);
132}
133EXPORT_SYMBOL_GPL(ife_release_meta_gen);
134
135int ife_validate_meta_u32(void *val, int len)
136{
137 if (len == 4)
138 return 0;
139
140 return -EINVAL;
141}
142EXPORT_SYMBOL_GPL(ife_validate_meta_u32);
143
144int ife_validate_meta_u16(void *val, int len)
145{
146 /* length will include padding */
147 if (len == NLA_ALIGN(2))
148 return 0;
149
150 return -EINVAL;
151}
152EXPORT_SYMBOL_GPL(ife_validate_meta_u16);
153
154static LIST_HEAD(ifeoplist);
155static DEFINE_RWLOCK(ife_mod_lock);
156
157static struct tcf_meta_ops *find_ife_oplist(u16 metaid)
158{
159 struct tcf_meta_ops *o;
160
161 read_lock(&ife_mod_lock);
162 list_for_each_entry(o, &ifeoplist, list) {
163 if (o->metaid == metaid) {
164 if (!try_module_get(o->owner))
165 o = NULL;
166 read_unlock(&ife_mod_lock);
167 return o;
168 }
169 }
170 read_unlock(&ife_mod_lock);
171
172 return NULL;
173}
174
175int register_ife_op(struct tcf_meta_ops *mops)
176{
177 struct tcf_meta_ops *m;
178
179 if (!mops->metaid || !mops->metatype || !mops->name ||
180 !mops->check_presence || !mops->encode || !mops->decode ||
181 !mops->get || !mops->alloc)
182 return -EINVAL;
183
184 write_lock(&ife_mod_lock);
185
186 list_for_each_entry(m, &ifeoplist, list) {
187 if (m->metaid == mops->metaid ||
188 (strcmp(mops->name, m->name) == 0)) {
189 write_unlock(&ife_mod_lock);
190 return -EEXIST;
191 }
192 }
193
194 if (!mops->release)
195 mops->release = ife_release_meta_gen;
196
197 list_add_tail(&mops->list, &ifeoplist);
198 write_unlock(&ife_mod_lock);
199 return 0;
200}
201EXPORT_SYMBOL_GPL(unregister_ife_op);
202
203int unregister_ife_op(struct tcf_meta_ops *mops)
204{
205 struct tcf_meta_ops *m;
206 int err = -ENOENT;
207
208 write_lock(&ife_mod_lock);
209 list_for_each_entry(m, &ifeoplist, list) {
210 if (m->metaid == mops->metaid) {
211 list_del(&mops->list);
212 err = 0;
213 break;
214 }
215 }
216 write_unlock(&ife_mod_lock);
217
218 return err;
219}
220EXPORT_SYMBOL_GPL(register_ife_op);
221
222static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len)
223{
224 int ret = 0;
225 /* XXX: unfortunately cant use nla_policy at this point
226 * because a length of 0 is valid in the case of
227 * "allow". "use" semantics do enforce for proper
228 * length and i couldve use nla_policy but it makes it hard
229 * to use it just for that..
230 */
231 if (ops->validate)
232 return ops->validate(val, len);
233
234 if (ops->metatype == NLA_U32)
235 ret = ife_validate_meta_u32(val, len);
236 else if (ops->metatype == NLA_U16)
237 ret = ife_validate_meta_u16(val, len);
238
239 return ret;
240}
241
242/* called when adding new meta information
243 * under ife->tcf_lock
244*/
245static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
246 void *val, int len)
247{
248 struct tcf_meta_ops *ops = find_ife_oplist(metaid);
249 int ret = 0;
250
251 if (!ops) {
252 ret = -ENOENT;
253#ifdef CONFIG_MODULES
254 spin_unlock_bh(&ife->tcf_lock);
255 rtnl_unlock();
256 request_module("ifemeta%u", metaid);
257 rtnl_lock();
258 spin_lock_bh(&ife->tcf_lock);
259 ops = find_ife_oplist(metaid);
260#endif
261 }
262
263 if (ops) {
264 ret = 0;
265 if (len)
266 ret = ife_validate_metatype(ops, val, len);
267
268 module_put(ops->owner);
269 }
270
271 return ret;
272}
273
274/* called when adding new meta information
275 * under ife->tcf_lock
276*/
277static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
278 int len)
279{
280 struct tcf_meta_info *mi = NULL;
281 struct tcf_meta_ops *ops = find_ife_oplist(metaid);
282 int ret = 0;
283
284 if (!ops)
285 return -ENOENT;
286
287 mi = kzalloc(sizeof(*mi), GFP_KERNEL);
288 if (!mi) {
289 /*put back what find_ife_oplist took */
290 module_put(ops->owner);
291 return -ENOMEM;
292 }
293
294 mi->metaid = metaid;
295 mi->ops = ops;
296 if (len > 0) {
297 ret = ops->alloc(mi, metaval);
298 if (ret != 0) {
299 kfree(mi);
300 module_put(ops->owner);
301 return ret;
302 }
303 }
304
305 list_add_tail(&mi->metalist, &ife->metalist);
306
307 return ret;
308}
309
310static int use_all_metadata(struct tcf_ife_info *ife)
311{
312 struct tcf_meta_ops *o;
313 int rc = 0;
314 int installed = 0;
315
316 list_for_each_entry(o, &ifeoplist, list) {
317 rc = add_metainfo(ife, o->metaid, NULL, 0);
318 if (rc == 0)
319 installed += 1;
320 }
321
322 if (installed)
323 return 0;
324 else
325 return -EINVAL;
326}
327
328static int dump_metalist(struct sk_buff *skb, struct tcf_ife_info *ife)
329{
330 struct tcf_meta_info *e;
331 struct nlattr *nest;
332 unsigned char *b = skb_tail_pointer(skb);
333 int total_encoded = 0;
334
335 /*can only happen on decode */
336 if (list_empty(&ife->metalist))
337 return 0;
338
339 nest = nla_nest_start(skb, TCA_IFE_METALST);
340 if (!nest)
341 goto out_nlmsg_trim;
342
343 list_for_each_entry(e, &ife->metalist, metalist) {
344 if (!e->ops->get(skb, e))
345 total_encoded += 1;
346 }
347
348 if (!total_encoded)
349 goto out_nlmsg_trim;
350
351 nla_nest_end(skb, nest);
352
353 return 0;
354
355out_nlmsg_trim:
356 nlmsg_trim(skb, b);
357 return -1;
358}
359
360/* under ife->tcf_lock */
361static void _tcf_ife_cleanup(struct tc_action *a, int bind)
362{
363 struct tcf_ife_info *ife = a->priv;
364 struct tcf_meta_info *e, *n;
365
366 list_for_each_entry_safe(e, n, &ife->metalist, metalist) {
367 module_put(e->ops->owner);
368 list_del(&e->metalist);
369 if (e->metaval) {
370 if (e->ops->release)
371 e->ops->release(e);
372 else
373 kfree(e->metaval);
374 }
375 kfree(e);
376 }
377}
378
379static void tcf_ife_cleanup(struct tc_action *a, int bind)
380{
381 struct tcf_ife_info *ife = a->priv;
382
383 spin_lock_bh(&ife->tcf_lock);
384 _tcf_ife_cleanup(a, bind);
385 spin_unlock_bh(&ife->tcf_lock);
386}
387
388/* under ife->tcf_lock */
389static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb)
390{
391 int len = 0;
392 int rc = 0;
393 int i = 0;
394 void *val;
395
396 for (i = 1; i < max_metacnt; i++) {
397 if (tb[i]) {
398 val = nla_data(tb[i]);
399 len = nla_len(tb[i]);
400
401 rc = load_metaops_and_vet(ife, i, val, len);
402 if (rc != 0)
403 return rc;
404
405 rc = add_metainfo(ife, i, val, len);
406 if (rc)
407 return rc;
408 }
409 }
410
411 return rc;
412}
413
414static int tcf_ife_init(struct net *net, struct nlattr *nla,
415 struct nlattr *est, struct tc_action *a,
416 int ovr, int bind)
417{
418 struct tc_action_net *tn = net_generic(net, ife_net_id);
419 struct nlattr *tb[TCA_IFE_MAX + 1];
420 struct nlattr *tb2[IFE_META_MAX + 1];
421 struct tcf_ife_info *ife;
422 struct tc_ife *parm;
423 u16 ife_type = 0;
424 u8 *daddr = NULL;
425 u8 *saddr = NULL;
426 int ret = 0, exists = 0;
427 int err;
428
429 err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy);
430 if (err < 0)
431 return err;
432
433 if (!tb[TCA_IFE_PARMS])
434 return -EINVAL;
435
436 parm = nla_data(tb[TCA_IFE_PARMS]);
437
438 exists = tcf_hash_check(tn, parm->index, a, bind);
439 if (exists && bind)
440 return 0;
441
442 if (parm->flags & IFE_ENCODE) {
443 /* Until we get issued the ethertype, we cant have
444 * a default..
445 **/
446 if (!tb[TCA_IFE_TYPE]) {
447 if (exists)
448 tcf_hash_release(a, bind);
449 pr_info("You MUST pass etherype for encoding\n");
450 return -EINVAL;
451 }
452 }
453
454 if (!exists) {
455 ret = tcf_hash_create(tn, parm->index, est, a, sizeof(*ife),
456 bind, false);
457 if (ret)
458 return ret;
459 ret = ACT_P_CREATED;
460 } else {
461 tcf_hash_release(a, bind);
462 if (!ovr)
463 return -EEXIST;
464 }
465
466 ife = to_ife(a);
467 ife->flags = parm->flags;
468
469 if (parm->flags & IFE_ENCODE) {
470 ife_type = nla_get_u16(tb[TCA_IFE_TYPE]);
471 if (tb[TCA_IFE_DMAC])
472 daddr = nla_data(tb[TCA_IFE_DMAC]);
473 if (tb[TCA_IFE_SMAC])
474 saddr = nla_data(tb[TCA_IFE_SMAC]);
475 }
476
477 spin_lock_bh(&ife->tcf_lock);
478 ife->tcf_action = parm->action;
479
480 if (parm->flags & IFE_ENCODE) {
481 if (daddr)
482 ether_addr_copy(ife->eth_dst, daddr);
483 else
484 eth_zero_addr(ife->eth_dst);
485
486 if (saddr)
487 ether_addr_copy(ife->eth_src, saddr);
488 else
489 eth_zero_addr(ife->eth_src);
490
491 ife->eth_type = ife_type;
492 }
493
494 if (ret == ACT_P_CREATED)
495 INIT_LIST_HEAD(&ife->metalist);
496
497 if (tb[TCA_IFE_METALST]) {
498 err = nla_parse_nested(tb2, IFE_META_MAX, tb[TCA_IFE_METALST],
499 NULL);
500 if (err) {
501metadata_parse_err:
502 if (exists)
503 tcf_hash_release(a, bind);
504 if (ret == ACT_P_CREATED)
505 _tcf_ife_cleanup(a, bind);
506
507 spin_unlock_bh(&ife->tcf_lock);
508 return err;
509 }
510
511 err = populate_metalist(ife, tb2);
512 if (err)
513 goto metadata_parse_err;
514
515 } else {
516 /* if no passed metadata allow list or passed allow-all
517 * then here we process by adding as many supported metadatum
518 * as we can. You better have at least one else we are
519 * going to bail out
520 */
521 err = use_all_metadata(ife);
522 if (err) {
523 if (ret == ACT_P_CREATED)
524 _tcf_ife_cleanup(a, bind);
525
526 spin_unlock_bh(&ife->tcf_lock);
527 return err;
528 }
529 }
530
531 spin_unlock_bh(&ife->tcf_lock);
532
533 if (ret == ACT_P_CREATED)
534 tcf_hash_insert(tn, a);
535
536 return ret;
537}
538
539static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
540 int ref)
541{
542 unsigned char *b = skb_tail_pointer(skb);
543 struct tcf_ife_info *ife = a->priv;
544 struct tc_ife opt = {
545 .index = ife->tcf_index,
546 .refcnt = ife->tcf_refcnt - ref,
547 .bindcnt = ife->tcf_bindcnt - bind,
548 .action = ife->tcf_action,
549 .flags = ife->flags,
550 };
551 struct tcf_t t;
552
553 if (nla_put(skb, TCA_IFE_PARMS, sizeof(opt), &opt))
554 goto nla_put_failure;
555
556 t.install = jiffies_to_clock_t(jiffies - ife->tcf_tm.install);
557 t.lastuse = jiffies_to_clock_t(jiffies - ife->tcf_tm.lastuse);
558 t.expires = jiffies_to_clock_t(ife->tcf_tm.expires);
559 if (nla_put(skb, TCA_IFE_TM, sizeof(t), &t))
560 goto nla_put_failure;
561
562 if (!is_zero_ether_addr(ife->eth_dst)) {
563 if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, ife->eth_dst))
564 goto nla_put_failure;
565 }
566
567 if (!is_zero_ether_addr(ife->eth_src)) {
568 if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, ife->eth_src))
569 goto nla_put_failure;
570 }
571
572 if (nla_put(skb, TCA_IFE_TYPE, 2, &ife->eth_type))
573 goto nla_put_failure;
574
575 if (dump_metalist(skb, ife)) {
576 /*ignore failure to dump metalist */
577 pr_info("Failed to dump metalist\n");
578 }
579
580 return skb->len;
581
582nla_put_failure:
583 nlmsg_trim(skb, b);
584 return -1;
585}
586
587int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife,
588 u16 metaid, u16 mlen, void *mdata)
589{
590 struct tcf_meta_info *e;
591
592 /* XXX: use hash to speed up */
593 list_for_each_entry(e, &ife->metalist, metalist) {
594 if (metaid == e->metaid) {
595 if (e->ops) {
596 /* We check for decode presence already */
597 return e->ops->decode(skb, mdata, mlen);
598 }
599 }
600 }
601
602 return 0;
603}
604
605struct ifeheadr {
606 __be16 metalen;
607 u8 tlv_data[];
608};
609
610struct meta_tlvhdr {
611 __be16 type;
612 __be16 len;
613};
614
615static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
616 struct tcf_result *res)
617{
618 struct tcf_ife_info *ife = a->priv;
619 int action = ife->tcf_action;
620 struct ifeheadr *ifehdr = (struct ifeheadr *)skb->data;
621 u16 ifehdrln = ifehdr->metalen;
622 struct meta_tlvhdr *tlv = (struct meta_tlvhdr *)(ifehdr->tlv_data);
623
624 spin_lock(&ife->tcf_lock);
625 bstats_update(&ife->tcf_bstats, skb);
626 ife->tcf_tm.lastuse = jiffies;
627 spin_unlock(&ife->tcf_lock);
628
629 ifehdrln = ntohs(ifehdrln);
630 if (unlikely(!pskb_may_pull(skb, ifehdrln))) {
631 spin_lock(&ife->tcf_lock);
632 ife->tcf_qstats.drops++;
633 spin_unlock(&ife->tcf_lock);
634 return TC_ACT_SHOT;
635 }
636
637 skb_set_mac_header(skb, ifehdrln);
638 __skb_pull(skb, ifehdrln);
639 skb->protocol = eth_type_trans(skb, skb->dev);
640 ifehdrln -= IFE_METAHDRLEN;
641
642 while (ifehdrln > 0) {
643 u8 *tlvdata = (u8 *)tlv;
644 u16 mtype = tlv->type;
645 u16 mlen = tlv->len;
646
647 mtype = ntohs(mtype);
648 mlen = ntohs(mlen);
649
650 if (find_decode_metaid(skb, ife, mtype, (mlen - 4),
651 (void *)(tlvdata + 4))) {
652 /* abuse overlimits to count when we receive metadata
653 * but dont have an ops for it
654 */
655 pr_info_ratelimited("Unknown metaid %d alnlen %d\n",
656 mtype, mlen);
657 ife->tcf_qstats.overlimits++;
658 }
659
660 tlvdata += mlen;
661 ifehdrln -= mlen;
662 tlv = (struct meta_tlvhdr *)tlvdata;
663 }
664
665 skb_reset_network_header(skb);
666 return action;
667}
668
669/*XXX: check if we can do this at install time instead of current
670 * send data path
671**/
672static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_info *ife)
673{
674 struct tcf_meta_info *e, *n;
675 int tot_run_sz = 0, run_sz = 0;
676
677 list_for_each_entry_safe(e, n, &ife->metalist, metalist) {
678 if (e->ops->check_presence) {
679 run_sz = e->ops->check_presence(skb, e);
680 tot_run_sz += run_sz;
681 }
682 }
683
684 return tot_run_sz;
685}
686
687static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
688 struct tcf_result *res)
689{
690 struct tcf_ife_info *ife = a->priv;
691 int action = ife->tcf_action;
692 struct ethhdr *oethh; /* outer ether header */
693 struct ethhdr *iethh; /* inner eth header */
694 struct tcf_meta_info *e;
695 /*
696 OUTERHDR:TOTMETALEN:{TLVHDR:Metadatum:TLVHDR..}:ORIGDATA
697 where ORIGDATA = original ethernet header ...
698 */
699 u16 metalen = ife_get_sz(skb, ife);
700 int hdrm = metalen + skb->dev->hard_header_len + IFE_METAHDRLEN;
701 unsigned int skboff = skb->dev->hard_header_len;
702 u32 at = G_TC_AT(skb->tc_verd);
703 int new_len = skb->len + hdrm;
704 bool exceed_mtu = false;
705 int err;
706
707 if (at & AT_EGRESS) {
708 if (new_len > skb->dev->mtu)
709 exceed_mtu = true;
710 }
711
712 spin_lock(&ife->tcf_lock);
713 bstats_update(&ife->tcf_bstats, skb);
714 ife->tcf_tm.lastuse = jiffies;
715
716 if (!metalen) { /* no metadata to send */
717 /* abuse overlimits to count when we allow packet
718 * with no metadata
719 */
720 ife->tcf_qstats.overlimits++;
721 spin_unlock(&ife->tcf_lock);
722 return action;
723 }
724 /* could be stupid policy setup or mtu config
725 * so lets be conservative.. */
726 if ((action == TC_ACT_SHOT) || exceed_mtu) {
727 ife->tcf_qstats.drops++;
728 spin_unlock(&ife->tcf_lock);
729 return TC_ACT_SHOT;
730 }
731
732 iethh = eth_hdr(skb);
733
734 err = skb_cow_head(skb, hdrm);
735 if (unlikely(err)) {
736 ife->tcf_qstats.drops++;
737 spin_unlock(&ife->tcf_lock);
738 return TC_ACT_SHOT;
739 }
740
741 if (!(at & AT_EGRESS))
742 skb_push(skb, skb->dev->hard_header_len);
743
744 __skb_push(skb, hdrm);
745 memcpy(skb->data, iethh, skb->mac_len);
746 skb_reset_mac_header(skb);
747 oethh = eth_hdr(skb);
748
749 /*total metadata length */
750 metalen += IFE_METAHDRLEN;
751 metalen = htons(metalen);
752 memcpy((skb->data + skboff), &metalen, IFE_METAHDRLEN);
753 skboff += IFE_METAHDRLEN;
754
755 /* XXX: we dont have a clever way of telling encode to
756 * not repeat some of the computations that are done by
757 * ops->presence_check...
758 */
759 list_for_each_entry(e, &ife->metalist, metalist) {
760 if (e->ops->encode) {
761 err = e->ops->encode(skb, (void *)(skb->data + skboff),
762 e);
763 }
764 if (err < 0) {
765 /* too corrupt to keep around if overwritten */
766 ife->tcf_qstats.drops++;
767 spin_unlock(&ife->tcf_lock);
768 return TC_ACT_SHOT;
769 }
770 skboff += err;
771 }
772
773 if (!is_zero_ether_addr(ife->eth_src))
774 ether_addr_copy(oethh->h_source, ife->eth_src);
775 else
776 ether_addr_copy(oethh->h_source, iethh->h_source);
777 if (!is_zero_ether_addr(ife->eth_dst))
778 ether_addr_copy(oethh->h_dest, ife->eth_dst);
779 else
780 ether_addr_copy(oethh->h_dest, iethh->h_dest);
781 oethh->h_proto = htons(ife->eth_type);
782
783 if (!(at & AT_EGRESS))
784 skb_pull(skb, skb->dev->hard_header_len);
785
786 spin_unlock(&ife->tcf_lock);
787
788 return action;
789}
790
791static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
792 struct tcf_result *res)
793{
794 struct tcf_ife_info *ife = a->priv;
795
796 if (ife->flags & IFE_ENCODE)
797 return tcf_ife_encode(skb, a, res);
798
799 if (!(ife->flags & IFE_ENCODE))
800 return tcf_ife_decode(skb, a, res);
801
802 pr_info_ratelimited("unknown failure(policy neither de/encode\n");
803 spin_lock(&ife->tcf_lock);
804 bstats_update(&ife->tcf_bstats, skb);
805 ife->tcf_tm.lastuse = jiffies;
806 ife->tcf_qstats.drops++;
807 spin_unlock(&ife->tcf_lock);
808
809 return TC_ACT_SHOT;
810}
811
812static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
813 struct netlink_callback *cb, int type,
814 struct tc_action *a)
815{
816 struct tc_action_net *tn = net_generic(net, ife_net_id);
817
818 return tcf_generic_walker(tn, skb, cb, type, a);
819}
820
821static int tcf_ife_search(struct net *net, struct tc_action *a, u32 index)
822{
823 struct tc_action_net *tn = net_generic(net, ife_net_id);
824
825 return tcf_hash_search(tn, a, index);
826}
827
828static struct tc_action_ops act_ife_ops = {
829 .kind = "ife",
830 .type = TCA_ACT_IFE,
831 .owner = THIS_MODULE,
832 .act = tcf_ife_act,
833 .dump = tcf_ife_dump,
834 .cleanup = tcf_ife_cleanup,
835 .init = tcf_ife_init,
836 .walk = tcf_ife_walker,
837 .lookup = tcf_ife_search,
838};
839
840static __net_init int ife_init_net(struct net *net)
841{
842 struct tc_action_net *tn = net_generic(net, ife_net_id);
843
844 return tc_action_net_init(tn, &act_ife_ops, IFE_TAB_MASK);
845}
846
847static void __net_exit ife_exit_net(struct net *net)
848{
849 struct tc_action_net *tn = net_generic(net, ife_net_id);
850
851 tc_action_net_exit(tn);
852}
853
854static struct pernet_operations ife_net_ops = {
855 .init = ife_init_net,
856 .exit = ife_exit_net,
857 .id = &ife_net_id,
858 .size = sizeof(struct tc_action_net),
859};
860
861static int __init ife_init_module(void)
862{
863 return tcf_register_action(&act_ife_ops, &ife_net_ops);
864}
865
866static void __exit ife_cleanup_module(void)
867{
868 tcf_unregister_action(&act_ife_ops, &ife_net_ops);
869}
870
871module_init(ife_init_module);
872module_exit(ife_cleanup_module);
873
874MODULE_AUTHOR("Jamal Hadi Salim(2015)");
875MODULE_DESCRIPTION("Inter-FE LFB action");
876MODULE_LICENSE("GPL");
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 6b70399ab781..8b5270008a6e 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -30,6 +30,10 @@
30 30
31#define IPT_TAB_MASK 15 31#define IPT_TAB_MASK 15
32 32
33static int ipt_net_id;
34
35static int xt_net_id;
36
33static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook) 37static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook)
34{ 38{
35 struct xt_tgchk_param par; 39 struct xt_tgchk_param par;
@@ -84,14 +88,15 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
84 [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) }, 88 [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) },
85}; 89};
86 90
87static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, 91static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
88 struct tc_action *a, int ovr, int bind) 92 struct nlattr *est, struct tc_action *a, int ovr,
93 int bind)
89{ 94{
90 struct nlattr *tb[TCA_IPT_MAX + 1]; 95 struct nlattr *tb[TCA_IPT_MAX + 1];
91 struct tcf_ipt *ipt; 96 struct tcf_ipt *ipt;
92 struct xt_entry_target *td, *t; 97 struct xt_entry_target *td, *t;
93 char *tname; 98 char *tname;
94 int ret = 0, err; 99 int ret = 0, err, exists = 0;
95 u32 hook = 0; 100 u32 hook = 0;
96 u32 index = 0; 101 u32 index = 0;
97 102
@@ -102,20 +107,26 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
102 if (err < 0) 107 if (err < 0)
103 return err; 108 return err;
104 109
105 if (tb[TCA_IPT_HOOK] == NULL) 110 if (tb[TCA_IPT_INDEX] != NULL)
106 return -EINVAL; 111 index = nla_get_u32(tb[TCA_IPT_INDEX]);
107 if (tb[TCA_IPT_TARG] == NULL) 112
113 exists = tcf_hash_check(tn, index, a, bind);
114 if (exists && bind)
115 return 0;
116
117 if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) {
118 if (exists)
119 tcf_hash_release(a, bind);
108 return -EINVAL; 120 return -EINVAL;
121 }
109 122
110 td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]); 123 td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
111 if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) 124 if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size)
112 return -EINVAL; 125 return -EINVAL;
113 126
114 if (tb[TCA_IPT_INDEX] != NULL) 127 if (!tcf_hash_check(tn, index, a, bind)) {
115 index = nla_get_u32(tb[TCA_IPT_INDEX]); 128 ret = tcf_hash_create(tn, index, est, a, sizeof(*ipt), bind,
116 129 false);
117 if (!tcf_hash_check(index, a, bind) ) {
118 ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind, false);
119 if (ret) 130 if (ret)
120 return ret; 131 return ret;
121 ret = ACT_P_CREATED; 132 ret = ACT_P_CREATED;
@@ -158,7 +169,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
158 ipt->tcfi_hook = hook; 169 ipt->tcfi_hook = hook;
159 spin_unlock_bh(&ipt->tcf_lock); 170 spin_unlock_bh(&ipt->tcf_lock);
160 if (ret == ACT_P_CREATED) 171 if (ret == ACT_P_CREATED)
161 tcf_hash_insert(a); 172 tcf_hash_insert(tn, a);
162 return ret; 173 return ret;
163 174
164err3: 175err3:
@@ -171,6 +182,24 @@ err1:
171 return err; 182 return err;
172} 183}
173 184
185static int tcf_ipt_init(struct net *net, struct nlattr *nla,
186 struct nlattr *est, struct tc_action *a, int ovr,
187 int bind)
188{
189 struct tc_action_net *tn = net_generic(net, ipt_net_id);
190
191 return __tcf_ipt_init(tn, nla, est, a, ovr, bind);
192}
193
194static int tcf_xt_init(struct net *net, struct nlattr *nla,
195 struct nlattr *est, struct tc_action *a, int ovr,
196 int bind)
197{
198 struct tc_action_net *tn = net_generic(net, xt_net_id);
199
200 return __tcf_ipt_init(tn, nla, est, a, ovr, bind);
201}
202
174static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, 203static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
175 struct tcf_result *res) 204 struct tcf_result *res)
176{ 205{
@@ -262,6 +291,22 @@ nla_put_failure:
262 return -1; 291 return -1;
263} 292}
264 293
294static int tcf_ipt_walker(struct net *net, struct sk_buff *skb,
295 struct netlink_callback *cb, int type,
296 struct tc_action *a)
297{
298 struct tc_action_net *tn = net_generic(net, ipt_net_id);
299
300 return tcf_generic_walker(tn, skb, cb, type, a);
301}
302
303static int tcf_ipt_search(struct net *net, struct tc_action *a, u32 index)
304{
305 struct tc_action_net *tn = net_generic(net, ipt_net_id);
306
307 return tcf_hash_search(tn, a, index);
308}
309
265static struct tc_action_ops act_ipt_ops = { 310static struct tc_action_ops act_ipt_ops = {
266 .kind = "ipt", 311 .kind = "ipt",
267 .type = TCA_ACT_IPT, 312 .type = TCA_ACT_IPT,
@@ -270,8 +315,47 @@ static struct tc_action_ops act_ipt_ops = {
270 .dump = tcf_ipt_dump, 315 .dump = tcf_ipt_dump,
271 .cleanup = tcf_ipt_release, 316 .cleanup = tcf_ipt_release,
272 .init = tcf_ipt_init, 317 .init = tcf_ipt_init,
318 .walk = tcf_ipt_walker,
319 .lookup = tcf_ipt_search,
320};
321
322static __net_init int ipt_init_net(struct net *net)
323{
324 struct tc_action_net *tn = net_generic(net, ipt_net_id);
325
326 return tc_action_net_init(tn, &act_ipt_ops, IPT_TAB_MASK);
327}
328
329static void __net_exit ipt_exit_net(struct net *net)
330{
331 struct tc_action_net *tn = net_generic(net, ipt_net_id);
332
333 tc_action_net_exit(tn);
334}
335
336static struct pernet_operations ipt_net_ops = {
337 .init = ipt_init_net,
338 .exit = ipt_exit_net,
339 .id = &ipt_net_id,
340 .size = sizeof(struct tc_action_net),
273}; 341};
274 342
343static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
344 struct netlink_callback *cb, int type,
345 struct tc_action *a)
346{
347 struct tc_action_net *tn = net_generic(net, xt_net_id);
348
349 return tcf_generic_walker(tn, skb, cb, type, a);
350}
351
352static int tcf_xt_search(struct net *net, struct tc_action *a, u32 index)
353{
354 struct tc_action_net *tn = net_generic(net, xt_net_id);
355
356 return tcf_hash_search(tn, a, index);
357}
358
275static struct tc_action_ops act_xt_ops = { 359static struct tc_action_ops act_xt_ops = {
276 .kind = "xt", 360 .kind = "xt",
277 .type = TCA_ACT_XT, 361 .type = TCA_ACT_XT,
@@ -279,7 +363,30 @@ static struct tc_action_ops act_xt_ops = {
279 .act = tcf_ipt, 363 .act = tcf_ipt,
280 .dump = tcf_ipt_dump, 364 .dump = tcf_ipt_dump,
281 .cleanup = tcf_ipt_release, 365 .cleanup = tcf_ipt_release,
282 .init = tcf_ipt_init, 366 .init = tcf_xt_init,
367 .walk = tcf_xt_walker,
368 .lookup = tcf_xt_search,
369};
370
371static __net_init int xt_init_net(struct net *net)
372{
373 struct tc_action_net *tn = net_generic(net, xt_net_id);
374
375 return tc_action_net_init(tn, &act_xt_ops, IPT_TAB_MASK);
376}
377
378static void __net_exit xt_exit_net(struct net *net)
379{
380 struct tc_action_net *tn = net_generic(net, xt_net_id);
381
382 tc_action_net_exit(tn);
383}
384
385static struct pernet_operations xt_net_ops = {
386 .init = xt_init_net,
387 .exit = xt_exit_net,
388 .id = &xt_net_id,
389 .size = sizeof(struct tc_action_net),
283}; 390};
284 391
285MODULE_AUTHOR("Jamal Hadi Salim(2002-13)"); 392MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
@@ -291,12 +398,13 @@ static int __init ipt_init_module(void)
291{ 398{
292 int ret1, ret2; 399 int ret1, ret2;
293 400
294 ret1 = tcf_register_action(&act_xt_ops, IPT_TAB_MASK); 401 ret1 = tcf_register_action(&act_xt_ops, &xt_net_ops);
295 if (ret1 < 0) 402 if (ret1 < 0)
296 printk("Failed to load xt action\n"); 403 pr_err("Failed to load xt action\n");
297 ret2 = tcf_register_action(&act_ipt_ops, IPT_TAB_MASK); 404
405 ret2 = tcf_register_action(&act_ipt_ops, &ipt_net_ops);
298 if (ret2 < 0) 406 if (ret2 < 0)
299 printk("Failed to load ipt action\n"); 407 pr_err("Failed to load ipt action\n");
300 408
301 if (ret1 < 0 && ret2 < 0) { 409 if (ret1 < 0 && ret2 < 0) {
302 return ret1; 410 return ret1;
@@ -306,8 +414,8 @@ static int __init ipt_init_module(void)
306 414
307static void __exit ipt_cleanup_module(void) 415static void __exit ipt_cleanup_module(void)
308{ 416{
309 tcf_unregister_action(&act_xt_ops); 417 tcf_unregister_action(&act_ipt_ops, &ipt_net_ops);
310 tcf_unregister_action(&act_ipt_ops); 418 tcf_unregister_action(&act_xt_ops, &xt_net_ops);
311} 419}
312 420
313module_init(ipt_init_module); 421module_init(ipt_init_module);
diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c
new file mode 100644
index 000000000000..82892170ce4f
--- /dev/null
+++ b/net/sched/act_meta_mark.c
@@ -0,0 +1,79 @@
1/*
2 * net/sched/act_meta_mark.c IFE skb->mark metadata module
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * copyright Jamal Hadi Salim (2015)
10 *
11*/
12
13#include <linux/types.h>
14#include <linux/kernel.h>
15#include <linux/string.h>
16#include <linux/errno.h>
17#include <linux/skbuff.h>
18#include <linux/rtnetlink.h>
19#include <linux/module.h>
20#include <linux/init.h>
21#include <net/netlink.h>
22#include <net/pkt_sched.h>
23#include <uapi/linux/tc_act/tc_ife.h>
24#include <net/tc_act/tc_ife.h>
25#include <linux/rtnetlink.h>
26
27static int skbmark_encode(struct sk_buff *skb, void *skbdata,
28 struct tcf_meta_info *e)
29{
30 u32 ifemark = skb->mark;
31
32 return ife_encode_meta_u32(ifemark, skbdata, e);
33}
34
35static int skbmark_decode(struct sk_buff *skb, void *data, u16 len)
36{
37 u32 ifemark = *(u32 *)data;
38
39 skb->mark = ntohl(ifemark);
40 return 0;
41}
42
43static int skbmark_check(struct sk_buff *skb, struct tcf_meta_info *e)
44{
45 return ife_check_meta_u32(skb->mark, e);
46}
47
48static struct tcf_meta_ops ife_skbmark_ops = {
49 .metaid = IFE_META_SKBMARK,
50 .metatype = NLA_U32,
51 .name = "skbmark",
52 .synopsis = "skb mark 32 bit metadata",
53 .check_presence = skbmark_check,
54 .encode = skbmark_encode,
55 .decode = skbmark_decode,
56 .get = ife_get_meta_u32,
57 .alloc = ife_alloc_meta_u32,
58 .release = ife_release_meta_gen,
59 .validate = ife_validate_meta_u32,
60 .owner = THIS_MODULE,
61};
62
63static int __init ifemark_init_module(void)
64{
65 return register_ife_op(&ife_skbmark_ops);
66}
67
68static void __exit ifemark_cleanup_module(void)
69{
70 unregister_ife_op(&ife_skbmark_ops);
71}
72
73module_init(ifemark_init_module);
74module_exit(ifemark_cleanup_module);
75
76MODULE_AUTHOR("Jamal Hadi Salim(2015)");
77MODULE_DESCRIPTION("Inter-FE skb mark metadata module");
78MODULE_LICENSE("GPL");
79MODULE_ALIAS_IFE_META(IFE_META_SKBMARK);
diff --git a/net/sched/act_meta_skbprio.c b/net/sched/act_meta_skbprio.c
new file mode 100644
index 000000000000..26bf4d86030b
--- /dev/null
+++ b/net/sched/act_meta_skbprio.c
@@ -0,0 +1,76 @@
1/*
2 * net/sched/act_meta_prio.c IFE skb->priority metadata module
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * copyright Jamal Hadi Salim (2015)
10 *
11*/
12
13#include <linux/types.h>
14#include <linux/kernel.h>
15#include <linux/string.h>
16#include <linux/errno.h>
17#include <linux/skbuff.h>
18#include <linux/rtnetlink.h>
19#include <linux/module.h>
20#include <linux/init.h>
21#include <net/netlink.h>
22#include <net/pkt_sched.h>
23#include <uapi/linux/tc_act/tc_ife.h>
24#include <net/tc_act/tc_ife.h>
25
26static int skbprio_check(struct sk_buff *skb, struct tcf_meta_info *e)
27{
28 return ife_check_meta_u32(skb->priority, e);
29}
30
31static int skbprio_encode(struct sk_buff *skb, void *skbdata,
32 struct tcf_meta_info *e)
33{
34 u32 ifeprio = skb->priority; /* avoid having to cast skb->priority*/
35
36 return ife_encode_meta_u32(ifeprio, skbdata, e);
37}
38
39static int skbprio_decode(struct sk_buff *skb, void *data, u16 len)
40{
41 u32 ifeprio = *(u32 *)data;
42
43 skb->priority = ntohl(ifeprio);
44 return 0;
45}
46
47static struct tcf_meta_ops ife_prio_ops = {
48 .metaid = IFE_META_PRIO,
49 .metatype = NLA_U32,
50 .name = "skbprio",
51 .synopsis = "skb prio metadata",
52 .check_presence = skbprio_check,
53 .encode = skbprio_encode,
54 .decode = skbprio_decode,
55 .get = ife_get_meta_u32,
56 .alloc = ife_alloc_meta_u32,
57 .owner = THIS_MODULE,
58};
59
60static int __init ifeprio_init_module(void)
61{
62 return register_ife_op(&ife_prio_ops);
63}
64
65static void __exit ifeprio_cleanup_module(void)
66{
67 unregister_ife_op(&ife_prio_ops);
68}
69
70module_init(ifeprio_init_module);
71module_exit(ifeprio_cleanup_module);
72
73MODULE_AUTHOR("Jamal Hadi Salim(2015)");
74MODULE_DESCRIPTION("Inter-FE skb prio metadata action");
75MODULE_LICENSE("GPL");
76MODULE_ALIAS_IFE_META(IFE_META_PRIO);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 32fcdecdb9e2..8f3948dd38b8 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -50,15 +50,18 @@ static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
50 [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) }, 50 [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) },
51}; 51};
52 52
53static int mirred_net_id;
54
53static int tcf_mirred_init(struct net *net, struct nlattr *nla, 55static int tcf_mirred_init(struct net *net, struct nlattr *nla,
54 struct nlattr *est, struct tc_action *a, int ovr, 56 struct nlattr *est, struct tc_action *a, int ovr,
55 int bind) 57 int bind)
56{ 58{
59 struct tc_action_net *tn = net_generic(net, mirred_net_id);
57 struct nlattr *tb[TCA_MIRRED_MAX + 1]; 60 struct nlattr *tb[TCA_MIRRED_MAX + 1];
58 struct tc_mirred *parm; 61 struct tc_mirred *parm;
59 struct tcf_mirred *m; 62 struct tcf_mirred *m;
60 struct net_device *dev; 63 struct net_device *dev;
61 int ret, ok_push = 0; 64 int ret, ok_push = 0, exists = 0;
62 65
63 if (nla == NULL) 66 if (nla == NULL)
64 return -EINVAL; 67 return -EINVAL;
@@ -68,17 +71,27 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
68 if (tb[TCA_MIRRED_PARMS] == NULL) 71 if (tb[TCA_MIRRED_PARMS] == NULL)
69 return -EINVAL; 72 return -EINVAL;
70 parm = nla_data(tb[TCA_MIRRED_PARMS]); 73 parm = nla_data(tb[TCA_MIRRED_PARMS]);
74
75 exists = tcf_hash_check(tn, parm->index, a, bind);
76 if (exists && bind)
77 return 0;
78
71 switch (parm->eaction) { 79 switch (parm->eaction) {
72 case TCA_EGRESS_MIRROR: 80 case TCA_EGRESS_MIRROR:
73 case TCA_EGRESS_REDIR: 81 case TCA_EGRESS_REDIR:
74 break; 82 break;
75 default: 83 default:
84 if (exists)
85 tcf_hash_release(a, bind);
76 return -EINVAL; 86 return -EINVAL;
77 } 87 }
78 if (parm->ifindex) { 88 if (parm->ifindex) {
79 dev = __dev_get_by_index(net, parm->ifindex); 89 dev = __dev_get_by_index(net, parm->ifindex);
80 if (dev == NULL) 90 if (dev == NULL) {
91 if (exists)
92 tcf_hash_release(a, bind);
81 return -ENODEV; 93 return -ENODEV;
94 }
82 switch (dev->type) { 95 switch (dev->type) {
83 case ARPHRD_TUNNEL: 96 case ARPHRD_TUNNEL:
84 case ARPHRD_TUNNEL6: 97 case ARPHRD_TUNNEL6:
@@ -96,18 +109,15 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
96 dev = NULL; 109 dev = NULL;
97 } 110 }
98 111
99 if (!tcf_hash_check(parm->index, a, bind)) { 112 if (!exists) {
100 if (dev == NULL) 113 if (dev == NULL)
101 return -EINVAL; 114 return -EINVAL;
102 ret = tcf_hash_create(parm->index, est, a, sizeof(*m), 115 ret = tcf_hash_create(tn, parm->index, est, a,
103 bind, true); 116 sizeof(*m), bind, true);
104 if (ret) 117 if (ret)
105 return ret; 118 return ret;
106 ret = ACT_P_CREATED; 119 ret = ACT_P_CREATED;
107 } else { 120 } else {
108 if (bind)
109 return 0;
110
111 tcf_hash_release(a, bind); 121 tcf_hash_release(a, bind);
112 if (!ovr) 122 if (!ovr)
113 return -EEXIST; 123 return -EEXIST;
@@ -130,7 +140,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
130 spin_lock_bh(&mirred_list_lock); 140 spin_lock_bh(&mirred_list_lock);
131 list_add(&m->tcfm_list, &mirred_list); 141 list_add(&m->tcfm_list, &mirred_list);
132 spin_unlock_bh(&mirred_list_lock); 142 spin_unlock_bh(&mirred_list_lock);
133 tcf_hash_insert(a); 143 tcf_hash_insert(tn, a);
134 } 144 }
135 145
136 return ret; 146 return ret;
@@ -179,7 +189,6 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
179 189
180 skb2->skb_iif = skb->dev->ifindex; 190 skb2->skb_iif = skb->dev->ifindex;
181 skb2->dev = dev; 191 skb2->dev = dev;
182 skb_sender_cpu_clear(skb2);
183 err = dev_queue_xmit(skb2); 192 err = dev_queue_xmit(skb2);
184 193
185 if (err) { 194 if (err) {
@@ -221,6 +230,22 @@ nla_put_failure:
221 return -1; 230 return -1;
222} 231}
223 232
233static int tcf_mirred_walker(struct net *net, struct sk_buff *skb,
234 struct netlink_callback *cb, int type,
235 struct tc_action *a)
236{
237 struct tc_action_net *tn = net_generic(net, mirred_net_id);
238
239 return tcf_generic_walker(tn, skb, cb, type, a);
240}
241
242static int tcf_mirred_search(struct net *net, struct tc_action *a, u32 index)
243{
244 struct tc_action_net *tn = net_generic(net, mirred_net_id);
245
246 return tcf_hash_search(tn, a, index);
247}
248
224static int mirred_device_event(struct notifier_block *unused, 249static int mirred_device_event(struct notifier_block *unused,
225 unsigned long event, void *ptr) 250 unsigned long event, void *ptr)
226{ 251{
@@ -257,6 +282,29 @@ static struct tc_action_ops act_mirred_ops = {
257 .dump = tcf_mirred_dump, 282 .dump = tcf_mirred_dump,
258 .cleanup = tcf_mirred_release, 283 .cleanup = tcf_mirred_release,
259 .init = tcf_mirred_init, 284 .init = tcf_mirred_init,
285 .walk = tcf_mirred_walker,
286 .lookup = tcf_mirred_search,
287};
288
289static __net_init int mirred_init_net(struct net *net)
290{
291 struct tc_action_net *tn = net_generic(net, mirred_net_id);
292
293 return tc_action_net_init(tn, &act_mirred_ops, MIRRED_TAB_MASK);
294}
295
296static void __net_exit mirred_exit_net(struct net *net)
297{
298 struct tc_action_net *tn = net_generic(net, mirred_net_id);
299
300 tc_action_net_exit(tn);
301}
302
303static struct pernet_operations mirred_net_ops = {
304 .init = mirred_init_net,
305 .exit = mirred_exit_net,
306 .id = &mirred_net_id,
307 .size = sizeof(struct tc_action_net),
260}; 308};
261 309
262MODULE_AUTHOR("Jamal Hadi Salim(2002)"); 310MODULE_AUTHOR("Jamal Hadi Salim(2002)");
@@ -270,12 +318,12 @@ static int __init mirred_init_module(void)
270 return err; 318 return err;
271 319
272 pr_info("Mirror/redirect action on\n"); 320 pr_info("Mirror/redirect action on\n");
273 return tcf_register_action(&act_mirred_ops, MIRRED_TAB_MASK); 321 return tcf_register_action(&act_mirred_ops, &mirred_net_ops);
274} 322}
275 323
276static void __exit mirred_cleanup_module(void) 324static void __exit mirred_cleanup_module(void)
277{ 325{
278 tcf_unregister_action(&act_mirred_ops); 326 tcf_unregister_action(&act_mirred_ops, &mirred_net_ops);
279 unregister_netdevice_notifier(&mirred_device_notifier); 327 unregister_netdevice_notifier(&mirred_device_notifier);
280} 328}
281 329
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index b7c4ead8b5a8..0f65cdfbfb1d 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -31,6 +31,8 @@
31 31
32#define NAT_TAB_MASK 15 32#define NAT_TAB_MASK 15
33 33
34static int nat_net_id;
35
34static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { 36static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
35 [TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) }, 37 [TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) },
36}; 38};
@@ -38,6 +40,7 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
38static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, 40static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
39 struct tc_action *a, int ovr, int bind) 41 struct tc_action *a, int ovr, int bind)
40{ 42{
43 struct tc_action_net *tn = net_generic(net, nat_net_id);
41 struct nlattr *tb[TCA_NAT_MAX + 1]; 44 struct nlattr *tb[TCA_NAT_MAX + 1];
42 struct tc_nat *parm; 45 struct tc_nat *parm;
43 int ret = 0, err; 46 int ret = 0, err;
@@ -54,9 +57,9 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
54 return -EINVAL; 57 return -EINVAL;
55 parm = nla_data(tb[TCA_NAT_PARMS]); 58 parm = nla_data(tb[TCA_NAT_PARMS]);
56 59
57 if (!tcf_hash_check(parm->index, a, bind)) { 60 if (!tcf_hash_check(tn, parm->index, a, bind)) {
58 ret = tcf_hash_create(parm->index, est, a, sizeof(*p), 61 ret = tcf_hash_create(tn, parm->index, est, a,
59 bind, false); 62 sizeof(*p), bind, false);
60 if (ret) 63 if (ret)
61 return ret; 64 return ret;
62 ret = ACT_P_CREATED; 65 ret = ACT_P_CREATED;
@@ -79,7 +82,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
79 spin_unlock_bh(&p->tcf_lock); 82 spin_unlock_bh(&p->tcf_lock);
80 83
81 if (ret == ACT_P_CREATED) 84 if (ret == ACT_P_CREATED)
82 tcf_hash_insert(a); 85 tcf_hash_insert(tn, a);
83 86
84 return ret; 87 return ret;
85} 88}
@@ -126,9 +129,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
126 addr = iph->daddr; 129 addr = iph->daddr;
127 130
128 if (!((old_addr ^ addr) & mask)) { 131 if (!((old_addr ^ addr) & mask)) {
129 if (skb_cloned(skb) && 132 if (skb_try_make_writable(skb, sizeof(*iph) + noff))
130 !skb_clone_writable(skb, sizeof(*iph) + noff) &&
131 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
132 goto drop; 133 goto drop;
133 134
134 new_addr &= mask; 135 new_addr &= mask;
@@ -156,9 +157,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
156 struct tcphdr *tcph; 157 struct tcphdr *tcph;
157 158
158 if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) || 159 if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) ||
159 (skb_cloned(skb) && 160 skb_try_make_writable(skb, ihl + sizeof(*tcph) + noff))
160 !skb_clone_writable(skb, ihl + sizeof(*tcph) + noff) &&
161 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
162 goto drop; 161 goto drop;
163 162
164 tcph = (void *)(skb_network_header(skb) + ihl); 163 tcph = (void *)(skb_network_header(skb) + ihl);
@@ -171,9 +170,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
171 struct udphdr *udph; 170 struct udphdr *udph;
172 171
173 if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) || 172 if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) ||
174 (skb_cloned(skb) && 173 skb_try_make_writable(skb, ihl + sizeof(*udph) + noff))
175 !skb_clone_writable(skb, ihl + sizeof(*udph) + noff) &&
176 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
177 goto drop; 174 goto drop;
178 175
179 udph = (void *)(skb_network_header(skb) + ihl); 176 udph = (void *)(skb_network_header(skb) + ihl);
@@ -213,10 +210,8 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
213 if ((old_addr ^ addr) & mask) 210 if ((old_addr ^ addr) & mask)
214 break; 211 break;
215 212
216 if (skb_cloned(skb) && 213 if (skb_try_make_writable(skb, ihl + sizeof(*icmph) +
217 !skb_clone_writable(skb, ihl + sizeof(*icmph) + 214 sizeof(*iph) + noff))
218 sizeof(*iph) + noff) &&
219 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
220 goto drop; 215 goto drop;
221 216
222 icmph = (void *)(skb_network_header(skb) + ihl); 217 icmph = (void *)(skb_network_header(skb) + ihl);
@@ -282,6 +277,22 @@ nla_put_failure:
282 return -1; 277 return -1;
283} 278}
284 279
280static int tcf_nat_walker(struct net *net, struct sk_buff *skb,
281 struct netlink_callback *cb, int type,
282 struct tc_action *a)
283{
284 struct tc_action_net *tn = net_generic(net, nat_net_id);
285
286 return tcf_generic_walker(tn, skb, cb, type, a);
287}
288
289static int tcf_nat_search(struct net *net, struct tc_action *a, u32 index)
290{
291 struct tc_action_net *tn = net_generic(net, nat_net_id);
292
293 return tcf_hash_search(tn, a, index);
294}
295
285static struct tc_action_ops act_nat_ops = { 296static struct tc_action_ops act_nat_ops = {
286 .kind = "nat", 297 .kind = "nat",
287 .type = TCA_ACT_NAT, 298 .type = TCA_ACT_NAT,
@@ -289,6 +300,29 @@ static struct tc_action_ops act_nat_ops = {
289 .act = tcf_nat, 300 .act = tcf_nat,
290 .dump = tcf_nat_dump, 301 .dump = tcf_nat_dump,
291 .init = tcf_nat_init, 302 .init = tcf_nat_init,
303 .walk = tcf_nat_walker,
304 .lookup = tcf_nat_search,
305};
306
307static __net_init int nat_init_net(struct net *net)
308{
309 struct tc_action_net *tn = net_generic(net, nat_net_id);
310
311 return tc_action_net_init(tn, &act_nat_ops, NAT_TAB_MASK);
312}
313
314static void __net_exit nat_exit_net(struct net *net)
315{
316 struct tc_action_net *tn = net_generic(net, nat_net_id);
317
318 tc_action_net_exit(tn);
319}
320
321static struct pernet_operations nat_net_ops = {
322 .init = nat_init_net,
323 .exit = nat_exit_net,
324 .id = &nat_net_id,
325 .size = sizeof(struct tc_action_net),
292}; 326};
293 327
294MODULE_DESCRIPTION("Stateless NAT actions"); 328MODULE_DESCRIPTION("Stateless NAT actions");
@@ -296,12 +330,12 @@ MODULE_LICENSE("GPL");
296 330
297static int __init nat_init_module(void) 331static int __init nat_init_module(void)
298{ 332{
299 return tcf_register_action(&act_nat_ops, NAT_TAB_MASK); 333 return tcf_register_action(&act_nat_ops, &nat_net_ops);
300} 334}
301 335
302static void __exit nat_cleanup_module(void) 336static void __exit nat_cleanup_module(void)
303{ 337{
304 tcf_unregister_action(&act_nat_ops); 338 tcf_unregister_action(&act_nat_ops, &nat_net_ops);
305} 339}
306 340
307module_init(nat_init_module); 341module_init(nat_init_module);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index e38a7701f154..429c3ab65142 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -25,6 +25,8 @@
25 25
26#define PEDIT_TAB_MASK 15 26#define PEDIT_TAB_MASK 15
27 27
28static int pedit_net_id;
29
28static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { 30static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
29 [TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) }, 31 [TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) },
30}; 32};
@@ -33,6 +35,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
33 struct nlattr *est, struct tc_action *a, 35 struct nlattr *est, struct tc_action *a,
34 int ovr, int bind) 36 int ovr, int bind)
35{ 37{
38 struct tc_action_net *tn = net_generic(net, pedit_net_id);
36 struct nlattr *tb[TCA_PEDIT_MAX + 1]; 39 struct nlattr *tb[TCA_PEDIT_MAX + 1];
37 struct tc_pedit *parm; 40 struct tc_pedit *parm;
38 int ret = 0, err; 41 int ret = 0, err;
@@ -54,11 +57,11 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
54 if (nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm) + ksize) 57 if (nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm) + ksize)
55 return -EINVAL; 58 return -EINVAL;
56 59
57 if (!tcf_hash_check(parm->index, a, bind)) { 60 if (!tcf_hash_check(tn, parm->index, a, bind)) {
58 if (!parm->nkeys) 61 if (!parm->nkeys)
59 return -EINVAL; 62 return -EINVAL;
60 ret = tcf_hash_create(parm->index, est, a, sizeof(*p), 63 ret = tcf_hash_create(tn, parm->index, est, a,
61 bind, false); 64 sizeof(*p), bind, false);
62 if (ret) 65 if (ret)
63 return ret; 66 return ret;
64 p = to_pedit(a); 67 p = to_pedit(a);
@@ -93,7 +96,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
93 memcpy(p->tcfp_keys, parm->keys, ksize); 96 memcpy(p->tcfp_keys, parm->keys, ksize);
94 spin_unlock_bh(&p->tcf_lock); 97 spin_unlock_bh(&p->tcf_lock);
95 if (ret == ACT_P_CREATED) 98 if (ret == ACT_P_CREATED)
96 tcf_hash_insert(a); 99 tcf_hash_insert(tn, a);
97 return ret; 100 return ret;
98} 101}
99 102
@@ -211,6 +214,22 @@ nla_put_failure:
211 return -1; 214 return -1;
212} 215}
213 216
217static int tcf_pedit_walker(struct net *net, struct sk_buff *skb,
218 struct netlink_callback *cb, int type,
219 struct tc_action *a)
220{
221 struct tc_action_net *tn = net_generic(net, pedit_net_id);
222
223 return tcf_generic_walker(tn, skb, cb, type, a);
224}
225
226static int tcf_pedit_search(struct net *net, struct tc_action *a, u32 index)
227{
228 struct tc_action_net *tn = net_generic(net, pedit_net_id);
229
230 return tcf_hash_search(tn, a, index);
231}
232
214static struct tc_action_ops act_pedit_ops = { 233static struct tc_action_ops act_pedit_ops = {
215 .kind = "pedit", 234 .kind = "pedit",
216 .type = TCA_ACT_PEDIT, 235 .type = TCA_ACT_PEDIT,
@@ -219,6 +238,29 @@ static struct tc_action_ops act_pedit_ops = {
219 .dump = tcf_pedit_dump, 238 .dump = tcf_pedit_dump,
220 .cleanup = tcf_pedit_cleanup, 239 .cleanup = tcf_pedit_cleanup,
221 .init = tcf_pedit_init, 240 .init = tcf_pedit_init,
241 .walk = tcf_pedit_walker,
242 .lookup = tcf_pedit_search,
243};
244
245static __net_init int pedit_init_net(struct net *net)
246{
247 struct tc_action_net *tn = net_generic(net, pedit_net_id);
248
249 return tc_action_net_init(tn, &act_pedit_ops, PEDIT_TAB_MASK);
250}
251
252static void __net_exit pedit_exit_net(struct net *net)
253{
254 struct tc_action_net *tn = net_generic(net, pedit_net_id);
255
256 tc_action_net_exit(tn);
257}
258
259static struct pernet_operations pedit_net_ops = {
260 .init = pedit_init_net,
261 .exit = pedit_exit_net,
262 .id = &pedit_net_id,
263 .size = sizeof(struct tc_action_net),
222}; 264};
223 265
224MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); 266MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
@@ -227,12 +269,12 @@ MODULE_LICENSE("GPL");
227 269
228static int __init pedit_init_module(void) 270static int __init pedit_init_module(void)
229{ 271{
230 return tcf_register_action(&act_pedit_ops, PEDIT_TAB_MASK); 272 return tcf_register_action(&act_pedit_ops, &pedit_net_ops);
231} 273}
232 274
233static void __exit pedit_cleanup_module(void) 275static void __exit pedit_cleanup_module(void)
234{ 276{
235 tcf_unregister_action(&act_pedit_ops); 277 tcf_unregister_action(&act_pedit_ops, &pedit_net_ops);
236} 278}
237 279
238module_init(pedit_init_module); 280module_init(pedit_init_module);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 9a1c42a43f92..330f14e302e8 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -55,10 +55,14 @@ struct tc_police_compat {
55 55
56/* Each policer is serialized by its individual spinlock */ 56/* Each policer is serialized by its individual spinlock */
57 57
58static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb, 58static int police_net_id;
59 int type, struct tc_action *a) 59
60static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
61 struct netlink_callback *cb, int type,
62 struct tc_action *a)
60{ 63{
61 struct tcf_hashinfo *hinfo = a->ops->hinfo; 64 struct tc_action_net *tn = net_generic(net, police_net_id);
65 struct tcf_hashinfo *hinfo = tn->hinfo;
62 struct hlist_head *head; 66 struct hlist_head *head;
63 struct tcf_common *p; 67 struct tcf_common *p;
64 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; 68 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
@@ -121,7 +125,8 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla,
121 struct tc_police *parm; 125 struct tc_police *parm;
122 struct tcf_police *police; 126 struct tcf_police *police;
123 struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; 127 struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
124 struct tcf_hashinfo *hinfo = a->ops->hinfo; 128 struct tc_action_net *tn = net_generic(net, police_net_id);
129 struct tcf_hashinfo *hinfo = tn->hinfo;
125 int size; 130 int size;
126 131
127 if (nla == NULL) 132 if (nla == NULL)
@@ -139,7 +144,7 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla,
139 parm = nla_data(tb[TCA_POLICE_TBF]); 144 parm = nla_data(tb[TCA_POLICE_TBF]);
140 145
141 if (parm->index) { 146 if (parm->index) {
142 if (tcf_hash_search(a, parm->index)) { 147 if (tcf_hash_search(tn, a, parm->index)) {
143 police = to_police(a->priv); 148 police = to_police(a->priv);
144 if (bind) { 149 if (bind) {
145 police->tcf_bindcnt += 1; 150 police->tcf_bindcnt += 1;
@@ -233,7 +238,7 @@ override:
233 238
234 police->tcfp_t_c = ktime_get_ns(); 239 police->tcfp_t_c = ktime_get_ns();
235 police->tcf_index = parm->index ? parm->index : 240 police->tcf_index = parm->index ? parm->index :
236 tcf_hash_new_index(hinfo); 241 tcf_hash_new_index(tn);
237 h = tcf_hash(police->tcf_index, POL_TAB_MASK); 242 h = tcf_hash(police->tcf_index, POL_TAB_MASK);
238 spin_lock_bh(&hinfo->lock); 243 spin_lock_bh(&hinfo->lock);
239 hlist_add_head(&police->tcf_head, &hinfo->htab[h]); 244 hlist_add_head(&police->tcf_head, &hinfo->htab[h]);
@@ -342,6 +347,13 @@ nla_put_failure:
342 return -1; 347 return -1;
343} 348}
344 349
350static int tcf_police_search(struct net *net, struct tc_action *a, u32 index)
351{
352 struct tc_action_net *tn = net_generic(net, police_net_id);
353
354 return tcf_hash_search(tn, a, index);
355}
356
345MODULE_AUTHOR("Alexey Kuznetsov"); 357MODULE_AUTHOR("Alexey Kuznetsov");
346MODULE_DESCRIPTION("Policing actions"); 358MODULE_DESCRIPTION("Policing actions");
347MODULE_LICENSE("GPL"); 359MODULE_LICENSE("GPL");
@@ -353,19 +365,41 @@ static struct tc_action_ops act_police_ops = {
353 .act = tcf_act_police, 365 .act = tcf_act_police,
354 .dump = tcf_act_police_dump, 366 .dump = tcf_act_police_dump,
355 .init = tcf_act_police_locate, 367 .init = tcf_act_police_locate,
356 .walk = tcf_act_police_walker 368 .walk = tcf_act_police_walker,
369 .lookup = tcf_police_search,
370};
371
372static __net_init int police_init_net(struct net *net)
373{
374 struct tc_action_net *tn = net_generic(net, police_net_id);
375
376 return tc_action_net_init(tn, &act_police_ops, POL_TAB_MASK);
377}
378
379static void __net_exit police_exit_net(struct net *net)
380{
381 struct tc_action_net *tn = net_generic(net, police_net_id);
382
383 tc_action_net_exit(tn);
384}
385
386static struct pernet_operations police_net_ops = {
387 .init = police_init_net,
388 .exit = police_exit_net,
389 .id = &police_net_id,
390 .size = sizeof(struct tc_action_net),
357}; 391};
358 392
359static int __init 393static int __init
360police_init_module(void) 394police_init_module(void)
361{ 395{
362 return tcf_register_action(&act_police_ops, POL_TAB_MASK); 396 return tcf_register_action(&act_police_ops, &police_net_ops);
363} 397}
364 398
365static void __exit 399static void __exit
366police_cleanup_module(void) 400police_cleanup_module(void)
367{ 401{
368 tcf_unregister_action(&act_police_ops); 402 tcf_unregister_action(&act_police_ops, &police_net_ops);
369} 403}
370 404
371module_init(police_init_module); 405module_init(police_init_module);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index d6b708d6afdf..3a33fb648a6d 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -26,6 +26,8 @@
26 26
27#define SIMP_TAB_MASK 7 27#define SIMP_TAB_MASK 7
28 28
29static int simp_net_id;
30
29#define SIMP_MAX_DATA 32 31#define SIMP_MAX_DATA 32
30static int tcf_simp(struct sk_buff *skb, const struct tc_action *a, 32static int tcf_simp(struct sk_buff *skb, const struct tc_action *a,
31 struct tcf_result *res) 33 struct tcf_result *res)
@@ -80,11 +82,12 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
80 struct nlattr *est, struct tc_action *a, 82 struct nlattr *est, struct tc_action *a,
81 int ovr, int bind) 83 int ovr, int bind)
82{ 84{
85 struct tc_action_net *tn = net_generic(net, simp_net_id);
83 struct nlattr *tb[TCA_DEF_MAX + 1]; 86 struct nlattr *tb[TCA_DEF_MAX + 1];
84 struct tc_defact *parm; 87 struct tc_defact *parm;
85 struct tcf_defact *d; 88 struct tcf_defact *d;
86 char *defdata; 89 char *defdata;
87 int ret = 0, err; 90 int ret = 0, err, exists = 0;
88 91
89 if (nla == NULL) 92 if (nla == NULL)
90 return -EINVAL; 93 return -EINVAL;
@@ -96,15 +99,23 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
96 if (tb[TCA_DEF_PARMS] == NULL) 99 if (tb[TCA_DEF_PARMS] == NULL)
97 return -EINVAL; 100 return -EINVAL;
98 101
99 if (tb[TCA_DEF_DATA] == NULL)
100 return -EINVAL;
101 102
102 parm = nla_data(tb[TCA_DEF_PARMS]); 103 parm = nla_data(tb[TCA_DEF_PARMS]);
104 exists = tcf_hash_check(tn, parm->index, a, bind);
105 if (exists && bind)
106 return 0;
107
108 if (tb[TCA_DEF_DATA] == NULL) {
109 if (exists)
110 tcf_hash_release(a, bind);
111 return -EINVAL;
112 }
113
103 defdata = nla_data(tb[TCA_DEF_DATA]); 114 defdata = nla_data(tb[TCA_DEF_DATA]);
104 115
105 if (!tcf_hash_check(parm->index, a, bind)) { 116 if (!exists) {
106 ret = tcf_hash_create(parm->index, est, a, sizeof(*d), 117 ret = tcf_hash_create(tn, parm->index, est, a,
107 bind, false); 118 sizeof(*d), bind, false);
108 if (ret) 119 if (ret)
109 return ret; 120 return ret;
110 121
@@ -119,8 +130,6 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
119 } else { 130 } else {
120 d = to_defact(a); 131 d = to_defact(a);
121 132
122 if (bind)
123 return 0;
124 tcf_hash_release(a, bind); 133 tcf_hash_release(a, bind);
125 if (!ovr) 134 if (!ovr)
126 return -EEXIST; 135 return -EEXIST;
@@ -129,7 +138,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
129 } 138 }
130 139
131 if (ret == ACT_P_CREATED) 140 if (ret == ACT_P_CREATED)
132 tcf_hash_insert(a); 141 tcf_hash_insert(tn, a);
133 return ret; 142 return ret;
134} 143}
135 144
@@ -161,6 +170,22 @@ nla_put_failure:
161 return -1; 170 return -1;
162} 171}
163 172
173static int tcf_simp_walker(struct net *net, struct sk_buff *skb,
174 struct netlink_callback *cb, int type,
175 struct tc_action *a)
176{
177 struct tc_action_net *tn = net_generic(net, simp_net_id);
178
179 return tcf_generic_walker(tn, skb, cb, type, a);
180}
181
182static int tcf_simp_search(struct net *net, struct tc_action *a, u32 index)
183{
184 struct tc_action_net *tn = net_generic(net, simp_net_id);
185
186 return tcf_hash_search(tn, a, index);
187}
188
164static struct tc_action_ops act_simp_ops = { 189static struct tc_action_ops act_simp_ops = {
165 .kind = "simple", 190 .kind = "simple",
166 .type = TCA_ACT_SIMP, 191 .type = TCA_ACT_SIMP,
@@ -169,6 +194,29 @@ static struct tc_action_ops act_simp_ops = {
169 .dump = tcf_simp_dump, 194 .dump = tcf_simp_dump,
170 .cleanup = tcf_simp_release, 195 .cleanup = tcf_simp_release,
171 .init = tcf_simp_init, 196 .init = tcf_simp_init,
197 .walk = tcf_simp_walker,
198 .lookup = tcf_simp_search,
199};
200
201static __net_init int simp_init_net(struct net *net)
202{
203 struct tc_action_net *tn = net_generic(net, simp_net_id);
204
205 return tc_action_net_init(tn, &act_simp_ops, SIMP_TAB_MASK);
206}
207
208static void __net_exit simp_exit_net(struct net *net)
209{
210 struct tc_action_net *tn = net_generic(net, simp_net_id);
211
212 tc_action_net_exit(tn);
213}
214
215static struct pernet_operations simp_net_ops = {
216 .init = simp_init_net,
217 .exit = simp_exit_net,
218 .id = &simp_net_id,
219 .size = sizeof(struct tc_action_net),
172}; 220};
173 221
174MODULE_AUTHOR("Jamal Hadi Salim(2005)"); 222MODULE_AUTHOR("Jamal Hadi Salim(2005)");
@@ -177,8 +225,7 @@ MODULE_LICENSE("GPL");
177 225
178static int __init simp_init_module(void) 226static int __init simp_init_module(void)
179{ 227{
180 int ret; 228 int ret = tcf_register_action(&act_simp_ops, &simp_net_ops);
181 ret = tcf_register_action(&act_simp_ops, SIMP_TAB_MASK);
182 if (!ret) 229 if (!ret)
183 pr_info("Simple TC action Loaded\n"); 230 pr_info("Simple TC action Loaded\n");
184 return ret; 231 return ret;
@@ -186,7 +233,7 @@ static int __init simp_init_module(void)
186 233
187static void __exit simp_cleanup_module(void) 234static void __exit simp_cleanup_module(void)
188{ 235{
189 tcf_unregister_action(&act_simp_ops); 236 tcf_unregister_action(&act_simp_ops, &simp_net_ops);
190} 237}
191 238
192module_init(simp_init_module); 239module_init(simp_init_module);
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 6751b5f8c046..69da5a8f0034 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -29,6 +29,8 @@
29 29
30#define SKBEDIT_TAB_MASK 15 30#define SKBEDIT_TAB_MASK 15
31 31
32static int skbedit_net_id;
33
32static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, 34static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
33 struct tcf_result *res) 35 struct tcf_result *res)
34{ 36{
@@ -61,12 +63,13 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
61 struct nlattr *est, struct tc_action *a, 63 struct nlattr *est, struct tc_action *a,
62 int ovr, int bind) 64 int ovr, int bind)
63{ 65{
66 struct tc_action_net *tn = net_generic(net, skbedit_net_id);
64 struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; 67 struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
65 struct tc_skbedit *parm; 68 struct tc_skbedit *parm;
66 struct tcf_skbedit *d; 69 struct tcf_skbedit *d;
67 u32 flags = 0, *priority = NULL, *mark = NULL; 70 u32 flags = 0, *priority = NULL, *mark = NULL;
68 u16 *queue_mapping = NULL; 71 u16 *queue_mapping = NULL;
69 int ret = 0, err; 72 int ret = 0, err, exists = 0;
70 73
71 if (nla == NULL) 74 if (nla == NULL)
72 return -EINVAL; 75 return -EINVAL;
@@ -93,14 +96,20 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
93 mark = nla_data(tb[TCA_SKBEDIT_MARK]); 96 mark = nla_data(tb[TCA_SKBEDIT_MARK]);
94 } 97 }
95 98
96 if (!flags)
97 return -EINVAL;
98
99 parm = nla_data(tb[TCA_SKBEDIT_PARMS]); 99 parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
100 100
101 if (!tcf_hash_check(parm->index, a, bind)) { 101 exists = tcf_hash_check(tn, parm->index, a, bind);
102 ret = tcf_hash_create(parm->index, est, a, sizeof(*d), 102 if (exists && bind)
103 bind, false); 103 return 0;
104
105 if (!flags) {
106 tcf_hash_release(a, bind);
107 return -EINVAL;
108 }
109
110 if (!exists) {
111 ret = tcf_hash_create(tn, parm->index, est, a,
112 sizeof(*d), bind, false);
104 if (ret) 113 if (ret)
105 return ret; 114 return ret;
106 115
@@ -108,8 +117,6 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
108 ret = ACT_P_CREATED; 117 ret = ACT_P_CREATED;
109 } else { 118 } else {
110 d = to_skbedit(a); 119 d = to_skbedit(a);
111 if (bind)
112 return 0;
113 tcf_hash_release(a, bind); 120 tcf_hash_release(a, bind);
114 if (!ovr) 121 if (!ovr)
115 return -EEXIST; 122 return -EEXIST;
@@ -130,7 +137,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
130 spin_unlock_bh(&d->tcf_lock); 137 spin_unlock_bh(&d->tcf_lock);
131 138
132 if (ret == ACT_P_CREATED) 139 if (ret == ACT_P_CREATED)
133 tcf_hash_insert(a); 140 tcf_hash_insert(tn, a);
134 return ret; 141 return ret;
135} 142}
136 143
@@ -173,6 +180,22 @@ nla_put_failure:
173 return -1; 180 return -1;
174} 181}
175 182
183static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb,
184 struct netlink_callback *cb, int type,
185 struct tc_action *a)
186{
187 struct tc_action_net *tn = net_generic(net, skbedit_net_id);
188
189 return tcf_generic_walker(tn, skb, cb, type, a);
190}
191
192static int tcf_skbedit_search(struct net *net, struct tc_action *a, u32 index)
193{
194 struct tc_action_net *tn = net_generic(net, skbedit_net_id);
195
196 return tcf_hash_search(tn, a, index);
197}
198
176static struct tc_action_ops act_skbedit_ops = { 199static struct tc_action_ops act_skbedit_ops = {
177 .kind = "skbedit", 200 .kind = "skbedit",
178 .type = TCA_ACT_SKBEDIT, 201 .type = TCA_ACT_SKBEDIT,
@@ -180,6 +203,29 @@ static struct tc_action_ops act_skbedit_ops = {
180 .act = tcf_skbedit, 203 .act = tcf_skbedit,
181 .dump = tcf_skbedit_dump, 204 .dump = tcf_skbedit_dump,
182 .init = tcf_skbedit_init, 205 .init = tcf_skbedit_init,
206 .walk = tcf_skbedit_walker,
207 .lookup = tcf_skbedit_search,
208};
209
210static __net_init int skbedit_init_net(struct net *net)
211{
212 struct tc_action_net *tn = net_generic(net, skbedit_net_id);
213
214 return tc_action_net_init(tn, &act_skbedit_ops, SKBEDIT_TAB_MASK);
215}
216
217static void __net_exit skbedit_exit_net(struct net *net)
218{
219 struct tc_action_net *tn = net_generic(net, skbedit_net_id);
220
221 tc_action_net_exit(tn);
222}
223
224static struct pernet_operations skbedit_net_ops = {
225 .init = skbedit_init_net,
226 .exit = skbedit_exit_net,
227 .id = &skbedit_net_id,
228 .size = sizeof(struct tc_action_net),
183}; 229};
184 230
185MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>"); 231MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
@@ -188,12 +234,12 @@ MODULE_LICENSE("GPL");
188 234
189static int __init skbedit_init_module(void) 235static int __init skbedit_init_module(void)
190{ 236{
191 return tcf_register_action(&act_skbedit_ops, SKBEDIT_TAB_MASK); 237 return tcf_register_action(&act_skbedit_ops, &skbedit_net_ops);
192} 238}
193 239
194static void __exit skbedit_cleanup_module(void) 240static void __exit skbedit_cleanup_module(void)
195{ 241{
196 tcf_unregister_action(&act_skbedit_ops); 242 tcf_unregister_action(&act_skbedit_ops, &skbedit_net_ops);
197} 243}
198 244
199module_init(skbedit_init_module); 245module_init(skbedit_init_module);
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 796785e0bf96..c45f926dafb9 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -21,6 +21,8 @@
21 21
22#define VLAN_TAB_MASK 15 22#define VLAN_TAB_MASK 15
23 23
24static int vlan_net_id;
25
24static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, 26static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
25 struct tcf_result *res) 27 struct tcf_result *res)
26{ 28{
@@ -68,13 +70,14 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
68 struct nlattr *est, struct tc_action *a, 70 struct nlattr *est, struct tc_action *a,
69 int ovr, int bind) 71 int ovr, int bind)
70{ 72{
73 struct tc_action_net *tn = net_generic(net, vlan_net_id);
71 struct nlattr *tb[TCA_VLAN_MAX + 1]; 74 struct nlattr *tb[TCA_VLAN_MAX + 1];
72 struct tc_vlan *parm; 75 struct tc_vlan *parm;
73 struct tcf_vlan *v; 76 struct tcf_vlan *v;
74 int action; 77 int action;
75 __be16 push_vid = 0; 78 __be16 push_vid = 0;
76 __be16 push_proto = 0; 79 __be16 push_proto = 0;
77 int ret = 0; 80 int ret = 0, exists = 0;
78 int err; 81 int err;
79 82
80 if (!nla) 83 if (!nla)
@@ -87,15 +90,25 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
87 if (!tb[TCA_VLAN_PARMS]) 90 if (!tb[TCA_VLAN_PARMS])
88 return -EINVAL; 91 return -EINVAL;
89 parm = nla_data(tb[TCA_VLAN_PARMS]); 92 parm = nla_data(tb[TCA_VLAN_PARMS]);
93 exists = tcf_hash_check(tn, parm->index, a, bind);
94 if (exists && bind)
95 return 0;
96
90 switch (parm->v_action) { 97 switch (parm->v_action) {
91 case TCA_VLAN_ACT_POP: 98 case TCA_VLAN_ACT_POP:
92 break; 99 break;
93 case TCA_VLAN_ACT_PUSH: 100 case TCA_VLAN_ACT_PUSH:
94 if (!tb[TCA_VLAN_PUSH_VLAN_ID]) 101 if (!tb[TCA_VLAN_PUSH_VLAN_ID]) {
102 if (exists)
103 tcf_hash_release(a, bind);
95 return -EINVAL; 104 return -EINVAL;
105 }
96 push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]); 106 push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]);
97 if (push_vid >= VLAN_VID_MASK) 107 if (push_vid >= VLAN_VID_MASK) {
108 if (exists)
109 tcf_hash_release(a, bind);
98 return -ERANGE; 110 return -ERANGE;
111 }
99 112
100 if (tb[TCA_VLAN_PUSH_VLAN_PROTOCOL]) { 113 if (tb[TCA_VLAN_PUSH_VLAN_PROTOCOL]) {
101 push_proto = nla_get_be16(tb[TCA_VLAN_PUSH_VLAN_PROTOCOL]); 114 push_proto = nla_get_be16(tb[TCA_VLAN_PUSH_VLAN_PROTOCOL]);
@@ -111,20 +124,20 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
111 } 124 }
112 break; 125 break;
113 default: 126 default:
127 if (exists)
128 tcf_hash_release(a, bind);
114 return -EINVAL; 129 return -EINVAL;
115 } 130 }
116 action = parm->v_action; 131 action = parm->v_action;
117 132
118 if (!tcf_hash_check(parm->index, a, bind)) { 133 if (!exists) {
119 ret = tcf_hash_create(parm->index, est, a, sizeof(*v), 134 ret = tcf_hash_create(tn, parm->index, est, a,
120 bind, false); 135 sizeof(*v), bind, false);
121 if (ret) 136 if (ret)
122 return ret; 137 return ret;
123 138
124 ret = ACT_P_CREATED; 139 ret = ACT_P_CREATED;
125 } else { 140 } else {
126 if (bind)
127 return 0;
128 tcf_hash_release(a, bind); 141 tcf_hash_release(a, bind);
129 if (!ovr) 142 if (!ovr)
130 return -EEXIST; 143 return -EEXIST;
@@ -143,7 +156,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
143 spin_unlock_bh(&v->tcf_lock); 156 spin_unlock_bh(&v->tcf_lock);
144 157
145 if (ret == ACT_P_CREATED) 158 if (ret == ACT_P_CREATED)
146 tcf_hash_insert(a); 159 tcf_hash_insert(tn, a);
147 return ret; 160 return ret;
148} 161}
149 162
@@ -181,6 +194,22 @@ nla_put_failure:
181 return -1; 194 return -1;
182} 195}
183 196
197static int tcf_vlan_walker(struct net *net, struct sk_buff *skb,
198 struct netlink_callback *cb, int type,
199 struct tc_action *a)
200{
201 struct tc_action_net *tn = net_generic(net, vlan_net_id);
202
203 return tcf_generic_walker(tn, skb, cb, type, a);
204}
205
206static int tcf_vlan_search(struct net *net, struct tc_action *a, u32 index)
207{
208 struct tc_action_net *tn = net_generic(net, vlan_net_id);
209
210 return tcf_hash_search(tn, a, index);
211}
212
184static struct tc_action_ops act_vlan_ops = { 213static struct tc_action_ops act_vlan_ops = {
185 .kind = "vlan", 214 .kind = "vlan",
186 .type = TCA_ACT_VLAN, 215 .type = TCA_ACT_VLAN,
@@ -188,16 +217,39 @@ static struct tc_action_ops act_vlan_ops = {
188 .act = tcf_vlan, 217 .act = tcf_vlan,
189 .dump = tcf_vlan_dump, 218 .dump = tcf_vlan_dump,
190 .init = tcf_vlan_init, 219 .init = tcf_vlan_init,
220 .walk = tcf_vlan_walker,
221 .lookup = tcf_vlan_search,
222};
223
224static __net_init int vlan_init_net(struct net *net)
225{
226 struct tc_action_net *tn = net_generic(net, vlan_net_id);
227
228 return tc_action_net_init(tn, &act_vlan_ops, VLAN_TAB_MASK);
229}
230
231static void __net_exit vlan_exit_net(struct net *net)
232{
233 struct tc_action_net *tn = net_generic(net, vlan_net_id);
234
235 tc_action_net_exit(tn);
236}
237
238static struct pernet_operations vlan_net_ops = {
239 .init = vlan_init_net,
240 .exit = vlan_exit_net,
241 .id = &vlan_net_id,
242 .size = sizeof(struct tc_action_net),
191}; 243};
192 244
193static int __init vlan_init_module(void) 245static int __init vlan_init_module(void)
194{ 246{
195 return tcf_register_action(&act_vlan_ops, VLAN_TAB_MASK); 247 return tcf_register_action(&act_vlan_ops, &vlan_net_ops);
196} 248}
197 249
198static void __exit vlan_cleanup_module(void) 250static void __exit vlan_cleanup_module(void)
199{ 251{
200 tcf_unregister_action(&act_vlan_ops); 252 tcf_unregister_action(&act_vlan_ops, &vlan_net_ops);
201} 253}
202 254
203module_init(vlan_init_module); 255module_init(vlan_init_module);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 8dc84300ee79..425fe6a0eda3 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -103,8 +103,9 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
103 } 103 }
104 104
105 if (prog->exts_integrated) { 105 if (prog->exts_integrated) {
106 res->class = prog->res.class; 106 res->class = 0;
107 res->classid = qdisc_skb_cb(skb)->tc_classid; 107 res->classid = TC_H_MAJ(prog->res.classid) |
108 qdisc_skb_cb(skb)->tc_classid;
108 109
109 ret = cls_bpf_exec_opcode(filter_res); 110 ret = cls_bpf_exec_opcode(filter_res);
110 if (ret == TC_ACT_UNSPEC) 111 if (ret == TC_ACT_UNSPEC)
@@ -114,10 +115,12 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
114 115
115 if (filter_res == 0) 116 if (filter_res == 0)
116 continue; 117 continue;
117 118 if (filter_res != -1) {
118 *res = prog->res; 119 res->class = 0;
119 if (filter_res != -1)
120 res->classid = filter_res; 120 res->classid = filter_res;
121 } else {
122 *res = prog->res;
123 }
121 124
122 ret = tcf_exts_exec(skb, &prog->exts, res); 125 ret = tcf_exts_exec(skb, &prog->exts, res);
123 if (ret < 0) 126 if (ret < 0)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 95b021243233..2181ffc76638 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -165,6 +165,51 @@ static void fl_destroy_filter(struct rcu_head *head)
165 kfree(f); 165 kfree(f);
166} 166}
167 167
168static void fl_hw_destroy_filter(struct tcf_proto *tp, unsigned long cookie)
169{
170 struct net_device *dev = tp->q->dev_queue->dev;
171 struct tc_cls_flower_offload offload = {0};
172 struct tc_to_netdev tc;
173
174 if (!tc_should_offload(dev, 0))
175 return;
176
177 offload.command = TC_CLSFLOWER_DESTROY;
178 offload.cookie = cookie;
179
180 tc.type = TC_SETUP_CLSFLOWER;
181 tc.cls_flower = &offload;
182
183 dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
184}
185
186static void fl_hw_replace_filter(struct tcf_proto *tp,
187 struct flow_dissector *dissector,
188 struct fl_flow_key *mask,
189 struct fl_flow_key *key,
190 struct tcf_exts *actions,
191 unsigned long cookie, u32 flags)
192{
193 struct net_device *dev = tp->q->dev_queue->dev;
194 struct tc_cls_flower_offload offload = {0};
195 struct tc_to_netdev tc;
196
197 if (!tc_should_offload(dev, flags))
198 return;
199
200 offload.command = TC_CLSFLOWER_REPLACE;
201 offload.cookie = cookie;
202 offload.dissector = dissector;
203 offload.mask = mask;
204 offload.key = key;
205 offload.exts = actions;
206
207 tc.type = TC_SETUP_CLSFLOWER;
208 tc.cls_flower = &offload;
209
210 dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
211}
212
168static bool fl_destroy(struct tcf_proto *tp, bool force) 213static bool fl_destroy(struct tcf_proto *tp, bool force)
169{ 214{
170 struct cls_fl_head *head = rtnl_dereference(tp->root); 215 struct cls_fl_head *head = rtnl_dereference(tp->root);
@@ -174,6 +219,7 @@ static bool fl_destroy(struct tcf_proto *tp, bool force)
174 return false; 219 return false;
175 220
176 list_for_each_entry_safe(f, next, &head->filters, list) { 221 list_for_each_entry_safe(f, next, &head->filters, list) {
222 fl_hw_destroy_filter(tp, (unsigned long)f);
177 list_del_rcu(&f->list); 223 list_del_rcu(&f->list);
178 call_rcu(&f->rcu, fl_destroy_filter); 224 call_rcu(&f->rcu, fl_destroy_filter);
179 } 225 }
@@ -459,6 +505,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
459 struct cls_fl_filter *fnew; 505 struct cls_fl_filter *fnew;
460 struct nlattr *tb[TCA_FLOWER_MAX + 1]; 506 struct nlattr *tb[TCA_FLOWER_MAX + 1];
461 struct fl_flow_mask mask = {}; 507 struct fl_flow_mask mask = {};
508 u32 flags = 0;
462 int err; 509 int err;
463 510
464 if (!tca[TCA_OPTIONS]) 511 if (!tca[TCA_OPTIONS])
@@ -486,6 +533,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
486 } 533 }
487 fnew->handle = handle; 534 fnew->handle = handle;
488 535
536 if (tb[TCA_FLOWER_FLAGS])
537 flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
538
489 err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr); 539 err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
490 if (err) 540 if (err)
491 goto errout; 541 goto errout;
@@ -498,9 +548,20 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
498 head->ht_params); 548 head->ht_params);
499 if (err) 549 if (err)
500 goto errout; 550 goto errout;
501 if (fold) 551
552 fl_hw_replace_filter(tp,
553 &head->dissector,
554 &mask.key,
555 &fnew->key,
556 &fnew->exts,
557 (unsigned long)fnew,
558 flags);
559
560 if (fold) {
502 rhashtable_remove_fast(&head->ht, &fold->ht_node, 561 rhashtable_remove_fast(&head->ht, &fold->ht_node,
503 head->ht_params); 562 head->ht_params);
563 fl_hw_destroy_filter(tp, (unsigned long)fold);
564 }
504 565
505 *arg = (unsigned long) fnew; 566 *arg = (unsigned long) fnew;
506 567
@@ -527,6 +588,7 @@ static int fl_delete(struct tcf_proto *tp, unsigned long arg)
527 rhashtable_remove_fast(&head->ht, &f->ht_node, 588 rhashtable_remove_fast(&head->ht, &f->ht_node,
528 head->ht_params); 589 head->ht_params);
529 list_del_rcu(&f->list); 590 list_del_rcu(&f->list);
591 fl_hw_destroy_filter(tp, (unsigned long)f);
530 tcf_unbind_filter(tp, &f->res); 592 tcf_unbind_filter(tp, &f->res);
531 call_rcu(&f->rcu, fl_destroy_filter); 593 call_rcu(&f->rcu, fl_destroy_filter);
532 return 0; 594 return 0;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 4fbb67430ce4..563cdad76448 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -43,6 +43,7 @@
43#include <net/netlink.h> 43#include <net/netlink.h>
44#include <net/act_api.h> 44#include <net/act_api.h>
45#include <net/pkt_cls.h> 45#include <net/pkt_cls.h>
46#include <linux/netdevice.h>
46 47
47struct tc_u_knode { 48struct tc_u_knode {
48 struct tc_u_knode __rcu *next; 49 struct tc_u_knode __rcu *next;
@@ -58,6 +59,7 @@ struct tc_u_knode {
58#ifdef CONFIG_CLS_U32_PERF 59#ifdef CONFIG_CLS_U32_PERF
59 struct tc_u32_pcnt __percpu *pf; 60 struct tc_u32_pcnt __percpu *pf;
60#endif 61#endif
62 u32 flags;
61#ifdef CONFIG_CLS_U32_MARK 63#ifdef CONFIG_CLS_U32_MARK
62 u32 val; 64 u32 val;
63 u32 mask; 65 u32 mask;
@@ -424,6 +426,97 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
424 return 0; 426 return 0;
425} 427}
426 428
429static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
430{
431 struct net_device *dev = tp->q->dev_queue->dev;
432 struct tc_cls_u32_offload u32_offload = {0};
433 struct tc_to_netdev offload;
434
435 offload.type = TC_SETUP_CLSU32;
436 offload.cls_u32 = &u32_offload;
437
438 if (tc_should_offload(dev, 0)) {
439 offload.cls_u32->command = TC_CLSU32_DELETE_KNODE;
440 offload.cls_u32->knode.handle = handle;
441 dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
442 tp->protocol, &offload);
443 }
444}
445
446static void u32_replace_hw_hnode(struct tcf_proto *tp,
447 struct tc_u_hnode *h,
448 u32 flags)
449{
450 struct net_device *dev = tp->q->dev_queue->dev;
451 struct tc_cls_u32_offload u32_offload = {0};
452 struct tc_to_netdev offload;
453
454 offload.type = TC_SETUP_CLSU32;
455 offload.cls_u32 = &u32_offload;
456
457 if (tc_should_offload(dev, flags)) {
458 offload.cls_u32->command = TC_CLSU32_NEW_HNODE;
459 offload.cls_u32->hnode.divisor = h->divisor;
460 offload.cls_u32->hnode.handle = h->handle;
461 offload.cls_u32->hnode.prio = h->prio;
462
463 dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
464 tp->protocol, &offload);
465 }
466}
467
468static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
469{
470 struct net_device *dev = tp->q->dev_queue->dev;
471 struct tc_cls_u32_offload u32_offload = {0};
472 struct tc_to_netdev offload;
473
474 offload.type = TC_SETUP_CLSU32;
475 offload.cls_u32 = &u32_offload;
476
477 if (tc_should_offload(dev, 0)) {
478 offload.cls_u32->command = TC_CLSU32_DELETE_HNODE;
479 offload.cls_u32->hnode.divisor = h->divisor;
480 offload.cls_u32->hnode.handle = h->handle;
481 offload.cls_u32->hnode.prio = h->prio;
482
483 dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
484 tp->protocol, &offload);
485 }
486}
487
488static void u32_replace_hw_knode(struct tcf_proto *tp,
489 struct tc_u_knode *n,
490 u32 flags)
491{
492 struct net_device *dev = tp->q->dev_queue->dev;
493 struct tc_cls_u32_offload u32_offload = {0};
494 struct tc_to_netdev offload;
495
496 offload.type = TC_SETUP_CLSU32;
497 offload.cls_u32 = &u32_offload;
498
499 if (tc_should_offload(dev, flags)) {
500 offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE;
501 offload.cls_u32->knode.handle = n->handle;
502 offload.cls_u32->knode.fshift = n->fshift;
503#ifdef CONFIG_CLS_U32_MARK
504 offload.cls_u32->knode.val = n->val;
505 offload.cls_u32->knode.mask = n->mask;
506#else
507 offload.cls_u32->knode.val = 0;
508 offload.cls_u32->knode.mask = 0;
509#endif
510 offload.cls_u32->knode.sel = &n->sel;
511 offload.cls_u32->knode.exts = &n->exts;
512 if (n->ht_down)
513 offload.cls_u32->knode.link_handle = n->ht_down->handle;
514
515 dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
516 tp->protocol, &offload);
517 }
518}
519
427static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) 520static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
428{ 521{
429 struct tc_u_knode *n; 522 struct tc_u_knode *n;
@@ -434,6 +527,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
434 RCU_INIT_POINTER(ht->ht[h], 527 RCU_INIT_POINTER(ht->ht[h],
435 rtnl_dereference(n->next)); 528 rtnl_dereference(n->next));
436 tcf_unbind_filter(tp, &n->res); 529 tcf_unbind_filter(tp, &n->res);
530 u32_remove_hw_knode(tp, n->handle);
437 call_rcu(&n->rcu, u32_delete_key_freepf_rcu); 531 call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
438 } 532 }
439 } 533 }
@@ -454,6 +548,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
454 phn; 548 phn;
455 hn = &phn->next, phn = rtnl_dereference(*hn)) { 549 hn = &phn->next, phn = rtnl_dereference(*hn)) {
456 if (phn == ht) { 550 if (phn == ht) {
551 u32_clear_hw_hnode(tp, ht);
457 RCU_INIT_POINTER(*hn, ht->next); 552 RCU_INIT_POINTER(*hn, ht->next);
458 kfree_rcu(ht, rcu); 553 kfree_rcu(ht, rcu);
459 return 0; 554 return 0;
@@ -540,8 +635,10 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
540 if (ht == NULL) 635 if (ht == NULL)
541 return 0; 636 return 0;
542 637
543 if (TC_U32_KEY(ht->handle)) 638 if (TC_U32_KEY(ht->handle)) {
639 u32_remove_hw_knode(tp, ht->handle);
544 return u32_delete_key(tp, (struct tc_u_knode *)ht); 640 return u32_delete_key(tp, (struct tc_u_knode *)ht);
641 }
545 642
546 if (root_ht == ht) 643 if (root_ht == ht)
547 return -EINVAL; 644 return -EINVAL;
@@ -587,6 +684,7 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
587 [TCA_U32_SEL] = { .len = sizeof(struct tc_u32_sel) }, 684 [TCA_U32_SEL] = { .len = sizeof(struct tc_u32_sel) },
588 [TCA_U32_INDEV] = { .type = NLA_STRING, .len = IFNAMSIZ }, 685 [TCA_U32_INDEV] = { .type = NLA_STRING, .len = IFNAMSIZ },
589 [TCA_U32_MARK] = { .len = sizeof(struct tc_u32_mark) }, 686 [TCA_U32_MARK] = { .len = sizeof(struct tc_u32_mark) },
687 [TCA_U32_FLAGS] = { .type = NLA_U32 },
590}; 688};
591 689
592static int u32_set_parms(struct net *net, struct tcf_proto *tp, 690static int u32_set_parms(struct net *net, struct tcf_proto *tp,
@@ -694,6 +792,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
694#endif 792#endif
695 new->fshift = n->fshift; 793 new->fshift = n->fshift;
696 new->res = n->res; 794 new->res = n->res;
795 new->flags = n->flags;
697 RCU_INIT_POINTER(new->ht_down, n->ht_down); 796 RCU_INIT_POINTER(new->ht_down, n->ht_down);
698 797
699 /* bump reference count as long as we hold pointer to structure */ 798 /* bump reference count as long as we hold pointer to structure */
@@ -733,7 +832,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
733 struct tc_u32_sel *s; 832 struct tc_u32_sel *s;
734 struct nlattr *opt = tca[TCA_OPTIONS]; 833 struct nlattr *opt = tca[TCA_OPTIONS];
735 struct nlattr *tb[TCA_U32_MAX + 1]; 834 struct nlattr *tb[TCA_U32_MAX + 1];
736 u32 htid; 835 u32 htid, flags = 0;
737 int err; 836 int err;
738#ifdef CONFIG_CLS_U32_PERF 837#ifdef CONFIG_CLS_U32_PERF
739 size_t size; 838 size_t size;
@@ -746,6 +845,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
746 if (err < 0) 845 if (err < 0)
747 return err; 846 return err;
748 847
848 if (tb[TCA_U32_FLAGS])
849 flags = nla_get_u32(tb[TCA_U32_FLAGS]);
850
749 n = (struct tc_u_knode *)*arg; 851 n = (struct tc_u_knode *)*arg;
750 if (n) { 852 if (n) {
751 struct tc_u_knode *new; 853 struct tc_u_knode *new;
@@ -753,6 +855,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
753 if (TC_U32_KEY(n->handle) == 0) 855 if (TC_U32_KEY(n->handle) == 0)
754 return -EINVAL; 856 return -EINVAL;
755 857
858 if (n->flags != flags)
859 return -EINVAL;
860
756 new = u32_init_knode(tp, n); 861 new = u32_init_knode(tp, n);
757 if (!new) 862 if (!new)
758 return -ENOMEM; 863 return -ENOMEM;
@@ -769,6 +874,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
769 u32_replace_knode(tp, tp_c, new); 874 u32_replace_knode(tp, tp_c, new);
770 tcf_unbind_filter(tp, &n->res); 875 tcf_unbind_filter(tp, &n->res);
771 call_rcu(&n->rcu, u32_delete_key_rcu); 876 call_rcu(&n->rcu, u32_delete_key_rcu);
877 u32_replace_hw_knode(tp, new, flags);
772 return 0; 878 return 0;
773 } 879 }
774 880
@@ -795,6 +901,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
795 RCU_INIT_POINTER(ht->next, tp_c->hlist); 901 RCU_INIT_POINTER(ht->next, tp_c->hlist);
796 rcu_assign_pointer(tp_c->hlist, ht); 902 rcu_assign_pointer(tp_c->hlist, ht);
797 *arg = (unsigned long)ht; 903 *arg = (unsigned long)ht;
904
905 u32_replace_hw_hnode(tp, ht, flags);
798 return 0; 906 return 0;
799 } 907 }
800 908
@@ -845,6 +953,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
845 RCU_INIT_POINTER(n->ht_up, ht); 953 RCU_INIT_POINTER(n->ht_up, ht);
846 n->handle = handle; 954 n->handle = handle;
847 n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; 955 n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
956 n->flags = flags;
848 tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE); 957 tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
849 n->tp = tp; 958 n->tp = tp;
850 959
@@ -877,7 +986,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
877 986
878 RCU_INIT_POINTER(n->next, pins); 987 RCU_INIT_POINTER(n->next, pins);
879 rcu_assign_pointer(*ins, n); 988 rcu_assign_pointer(*ins, n);
880 989 u32_replace_hw_knode(tp, n, flags);
881 *arg = (unsigned long)n; 990 *arg = (unsigned long)n;
882 return 0; 991 return 0;
883 } 992 }
@@ -982,6 +1091,9 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
982 nla_put_u32(skb, TCA_U32_LINK, ht_down->handle)) 1091 nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
983 goto nla_put_failure; 1092 goto nla_put_failure;
984 1093
1094 if (n->flags && nla_put_u32(skb, TCA_U32_FLAGS, n->flags))
1095 goto nla_put_failure;
1096
985#ifdef CONFIG_CLS_U32_MARK 1097#ifdef CONFIG_CLS_U32_MARK
986 if ((n->val || n->mask)) { 1098 if ((n->val || n->mask)) {
987 struct tc_u32_mark mark = {.val = n->val, 1099 struct tc_u32_mark mark = {.val = n->val,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index af1acf009866..3b180ff72f79 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -744,14 +744,15 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
744 return 0; 744 return 0;
745} 745}
746 746
747void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) 747void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
748 unsigned int len)
748{ 749{
749 const struct Qdisc_class_ops *cops; 750 const struct Qdisc_class_ops *cops;
750 unsigned long cl; 751 unsigned long cl;
751 u32 parentid; 752 u32 parentid;
752 int drops; 753 int drops;
753 754
754 if (n == 0) 755 if (n == 0 && len == 0)
755 return; 756 return;
756 drops = max_t(int, n, 0); 757 drops = max_t(int, n, 0);
757 rcu_read_lock(); 758 rcu_read_lock();
@@ -774,11 +775,12 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
774 cops->put(sch, cl); 775 cops->put(sch, cl);
775 } 776 }
776 sch->q.qlen -= n; 777 sch->q.qlen -= n;
778 sch->qstats.backlog -= len;
777 __qdisc_qstats_drop(sch, drops); 779 __qdisc_qstats_drop(sch, drops);
778 } 780 }
779 rcu_read_unlock(); 781 rcu_read_unlock();
780} 782}
781EXPORT_SYMBOL(qdisc_tree_decrease_qlen); 783EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
782 784
783static void notify_and_destroy(struct net *net, struct sk_buff *skb, 785static void notify_and_destroy(struct net *net, struct sk_buff *skb,
784 struct nlmsghdr *n, u32 clid, 786 struct nlmsghdr *n, u32 clid,
@@ -1841,7 +1843,7 @@ reclassify:
1841 return err; 1843 return err;
1842 } 1844 }
1843 1845
1844 return -1; 1846 return TC_ACT_UNSPEC; /* signal: continue lookup */
1845#ifdef CONFIG_NET_CLS_ACT 1847#ifdef CONFIG_NET_CLS_ACT
1846reset: 1848reset:
1847 if (unlikely(limit++ >= MAX_REC_LOOP)) { 1849 if (unlikely(limit++ >= MAX_REC_LOOP)) {
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index c538d9e4a8f6..baafddf229ce 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1624,13 +1624,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1624 new->reshape_fail = cbq_reshape_fail; 1624 new->reshape_fail = cbq_reshape_fail;
1625#endif 1625#endif
1626 } 1626 }
1627 sch_tree_lock(sch);
1628 *old = cl->q;
1629 cl->q = new;
1630 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1631 qdisc_reset(*old);
1632 sch_tree_unlock(sch);
1633 1627
1628 *old = qdisc_replace(sch, new, &cl->q);
1634 return 0; 1629 return 0;
1635} 1630}
1636 1631
@@ -1914,7 +1909,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1914{ 1909{
1915 struct cbq_sched_data *q = qdisc_priv(sch); 1910 struct cbq_sched_data *q = qdisc_priv(sch);
1916 struct cbq_class *cl = (struct cbq_class *)arg; 1911 struct cbq_class *cl = (struct cbq_class *)arg;
1917 unsigned int qlen; 1912 unsigned int qlen, backlog;
1918 1913
1919 if (cl->filters || cl->children || cl == &q->link) 1914 if (cl->filters || cl->children || cl == &q->link)
1920 return -EBUSY; 1915 return -EBUSY;
@@ -1922,8 +1917,9 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1922 sch_tree_lock(sch); 1917 sch_tree_lock(sch);
1923 1918
1924 qlen = cl->q->q.qlen; 1919 qlen = cl->q->q.qlen;
1920 backlog = cl->q->qstats.backlog;
1925 qdisc_reset(cl->q); 1921 qdisc_reset(cl->q);
1926 qdisc_tree_decrease_qlen(cl->q, qlen); 1922 qdisc_tree_reduce_backlog(cl->q, qlen, backlog);
1927 1923
1928 if (cl->next_alive) 1924 if (cl->next_alive)
1929 cbq_deactivate_class(cl); 1925 cbq_deactivate_class(cl);
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 5ffb8b8337c7..0a08c860eee4 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -128,8 +128,8 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
128 choke_zap_tail_holes(q); 128 choke_zap_tail_holes(q);
129 129
130 qdisc_qstats_backlog_dec(sch, skb); 130 qdisc_qstats_backlog_dec(sch, skb);
131 qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
131 qdisc_drop(skb, sch); 132 qdisc_drop(skb, sch);
132 qdisc_tree_decrease_qlen(sch, 1);
133 --sch->q.qlen; 133 --sch->q.qlen;
134} 134}
135 135
@@ -456,6 +456,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
456 old = q->tab; 456 old = q->tab;
457 if (old) { 457 if (old) {
458 unsigned int oqlen = sch->q.qlen, tail = 0; 458 unsigned int oqlen = sch->q.qlen, tail = 0;
459 unsigned dropped = 0;
459 460
460 while (q->head != q->tail) { 461 while (q->head != q->tail) {
461 struct sk_buff *skb = q->tab[q->head]; 462 struct sk_buff *skb = q->tab[q->head];
@@ -467,11 +468,12 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
467 ntab[tail++] = skb; 468 ntab[tail++] = skb;
468 continue; 469 continue;
469 } 470 }
471 dropped += qdisc_pkt_len(skb);
470 qdisc_qstats_backlog_dec(sch, skb); 472 qdisc_qstats_backlog_dec(sch, skb);
471 --sch->q.qlen; 473 --sch->q.qlen;
472 qdisc_drop(skb, sch); 474 qdisc_drop(skb, sch);
473 } 475 }
474 qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen); 476 qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
475 q->head = 0; 477 q->head = 0;
476 q->tail = tail; 478 q->tail = tail;
477 } 479 }
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 535007d5f0b5..9b7e2980ee5c 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -79,12 +79,13 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
79 79
80 skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats, dequeue); 80 skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats, dequeue);
81 81
82 /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0, 82 /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
83 * or HTB crashes. Defer it for next round. 83 * or HTB crashes. Defer it for next round.
84 */ 84 */
85 if (q->stats.drop_count && sch->q.qlen) { 85 if (q->stats.drop_count && sch->q.qlen) {
86 qdisc_tree_decrease_qlen(sch, q->stats.drop_count); 86 qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len);
87 q->stats.drop_count = 0; 87 q->stats.drop_count = 0;
88 q->stats.drop_len = 0;
88 } 89 }
89 if (skb) 90 if (skb)
90 qdisc_bstats_update(sch, skb); 91 qdisc_bstats_update(sch, skb);
@@ -116,7 +117,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
116{ 117{
117 struct codel_sched_data *q = qdisc_priv(sch); 118 struct codel_sched_data *q = qdisc_priv(sch);
118 struct nlattr *tb[TCA_CODEL_MAX + 1]; 119 struct nlattr *tb[TCA_CODEL_MAX + 1];
119 unsigned int qlen; 120 unsigned int qlen, dropped = 0;
120 int err; 121 int err;
121 122
122 if (!opt) 123 if (!opt)
@@ -156,10 +157,11 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
156 while (sch->q.qlen > sch->limit) { 157 while (sch->q.qlen > sch->limit) {
157 struct sk_buff *skb = __skb_dequeue(&sch->q); 158 struct sk_buff *skb = __skb_dequeue(&sch->q);
158 159
160 dropped += qdisc_pkt_len(skb);
159 qdisc_qstats_backlog_dec(sch, skb); 161 qdisc_qstats_backlog_dec(sch, skb);
160 qdisc_drop(skb, sch); 162 qdisc_drop(skb, sch);
161 } 163 }
162 qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); 164 qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
163 165
164 sch_tree_unlock(sch); 166 sch_tree_unlock(sch);
165 return 0; 167 return 0;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index a1cd778240cd..a63e879e8975 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -53,9 +53,10 @@ static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid)
53static void drr_purge_queue(struct drr_class *cl) 53static void drr_purge_queue(struct drr_class *cl)
54{ 54{
55 unsigned int len = cl->qdisc->q.qlen; 55 unsigned int len = cl->qdisc->q.qlen;
56 unsigned int backlog = cl->qdisc->qstats.backlog;
56 57
57 qdisc_reset(cl->qdisc); 58 qdisc_reset(cl->qdisc);
58 qdisc_tree_decrease_qlen(cl->qdisc, len); 59 qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
59} 60}
60 61
61static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = { 62static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
@@ -226,11 +227,7 @@ static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
226 new = &noop_qdisc; 227 new = &noop_qdisc;
227 } 228 }
228 229
229 sch_tree_lock(sch); 230 *old = qdisc_replace(sch, new, &cl->qdisc);
230 drr_purge_queue(cl);
231 *old = cl->qdisc;
232 cl->qdisc = new;
233 sch_tree_unlock(sch);
234 return 0; 231 return 0;
235} 232}
236 233
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index f357f34d02d2..34b4ddaca27c 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -73,13 +73,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
73 new = &noop_qdisc; 73 new = &noop_qdisc;
74 } 74 }
75 75
76 sch_tree_lock(sch); 76 *old = qdisc_replace(sch, new, &p->q);
77 *old = p->q;
78 p->q = new;
79 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
80 qdisc_reset(*old);
81 sch_tree_unlock(sch);
82
83 return 0; 77 return 0;
84} 78}
85 79
@@ -264,6 +258,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
264 return err; 258 return err;
265 } 259 }
266 260
261 qdisc_qstats_backlog_inc(sch, skb);
267 sch->q.qlen++; 262 sch->q.qlen++;
268 263
269 return NET_XMIT_SUCCESS; 264 return NET_XMIT_SUCCESS;
@@ -281,11 +276,12 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
281 276
282 pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); 277 pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
283 278
284 skb = p->q->ops->dequeue(p->q); 279 skb = qdisc_dequeue_peeked(p->q);
285 if (skb == NULL) 280 if (skb == NULL)
286 return NULL; 281 return NULL;
287 282
288 qdisc_bstats_update(sch, skb); 283 qdisc_bstats_update(sch, skb);
284 qdisc_qstats_backlog_dec(sch, skb);
289 sch->q.qlen--; 285 sch->q.qlen--;
290 286
291 index = skb->tc_index & (p->indices - 1); 287 index = skb->tc_index & (p->indices - 1);
@@ -401,6 +397,7 @@ static void dsmark_reset(struct Qdisc *sch)
401 397
402 pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); 398 pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
403 qdisc_reset(p->q); 399 qdisc_reset(p->q);
400 sch->qstats.backlog = 0;
404 sch->q.qlen = 0; 401 sch->q.qlen = 0;
405} 402}
406 403
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 109b2322778f..3c6a47d66a04 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -662,6 +662,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
662 struct fq_sched_data *q = qdisc_priv(sch); 662 struct fq_sched_data *q = qdisc_priv(sch);
663 struct nlattr *tb[TCA_FQ_MAX + 1]; 663 struct nlattr *tb[TCA_FQ_MAX + 1];
664 int err, drop_count = 0; 664 int err, drop_count = 0;
665 unsigned drop_len = 0;
665 u32 fq_log; 666 u32 fq_log;
666 667
667 if (!opt) 668 if (!opt)
@@ -736,10 +737,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
736 737
737 if (!skb) 738 if (!skb)
738 break; 739 break;
740 drop_len += qdisc_pkt_len(skb);
739 kfree_skb(skb); 741 kfree_skb(skb);
740 drop_count++; 742 drop_count++;
741 } 743 }
742 qdisc_tree_decrease_qlen(sch, drop_count); 744 qdisc_tree_reduce_backlog(sch, drop_count, drop_len);
743 745
744 sch_tree_unlock(sch); 746 sch_tree_unlock(sch);
745 return err; 747 return err;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 4c834e93dafb..d3fc8f9dd3d4 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -175,7 +175,7 @@ static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch)
175static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) 175static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
176{ 176{
177 struct fq_codel_sched_data *q = qdisc_priv(sch); 177 struct fq_codel_sched_data *q = qdisc_priv(sch);
178 unsigned int idx; 178 unsigned int idx, prev_backlog;
179 struct fq_codel_flow *flow; 179 struct fq_codel_flow *flow;
180 int uninitialized_var(ret); 180 int uninitialized_var(ret);
181 181
@@ -203,6 +203,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
203 if (++sch->q.qlen <= sch->limit) 203 if (++sch->q.qlen <= sch->limit)
204 return NET_XMIT_SUCCESS; 204 return NET_XMIT_SUCCESS;
205 205
206 prev_backlog = sch->qstats.backlog;
206 q->drop_overlimit++; 207 q->drop_overlimit++;
207 /* Return Congestion Notification only if we dropped a packet 208 /* Return Congestion Notification only if we dropped a packet
208 * from this flow. 209 * from this flow.
@@ -211,7 +212,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
211 return NET_XMIT_CN; 212 return NET_XMIT_CN;
212 213
213 /* As we dropped a packet, better let upper stack know this */ 214 /* As we dropped a packet, better let upper stack know this */
214 qdisc_tree_decrease_qlen(sch, 1); 215 qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
215 return NET_XMIT_SUCCESS; 216 return NET_XMIT_SUCCESS;
216} 217}
217 218
@@ -241,6 +242,7 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch)
241 struct fq_codel_flow *flow; 242 struct fq_codel_flow *flow;
242 struct list_head *head; 243 struct list_head *head;
243 u32 prev_drop_count, prev_ecn_mark; 244 u32 prev_drop_count, prev_ecn_mark;
245 unsigned int prev_backlog;
244 246
245begin: 247begin:
246 head = &q->new_flows; 248 head = &q->new_flows;
@@ -259,6 +261,7 @@ begin:
259 261
260 prev_drop_count = q->cstats.drop_count; 262 prev_drop_count = q->cstats.drop_count;
261 prev_ecn_mark = q->cstats.ecn_mark; 263 prev_ecn_mark = q->cstats.ecn_mark;
264 prev_backlog = sch->qstats.backlog;
262 265
263 skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats, 266 skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats,
264 dequeue); 267 dequeue);
@@ -276,12 +279,14 @@ begin:
276 } 279 }
277 qdisc_bstats_update(sch, skb); 280 qdisc_bstats_update(sch, skb);
278 flow->deficit -= qdisc_pkt_len(skb); 281 flow->deficit -= qdisc_pkt_len(skb);
279 /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0, 282 /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
280 * or HTB crashes. Defer it for next round. 283 * or HTB crashes. Defer it for next round.
281 */ 284 */
282 if (q->cstats.drop_count && sch->q.qlen) { 285 if (q->cstats.drop_count && sch->q.qlen) {
283 qdisc_tree_decrease_qlen(sch, q->cstats.drop_count); 286 qdisc_tree_reduce_backlog(sch, q->cstats.drop_count,
287 q->cstats.drop_len);
284 q->cstats.drop_count = 0; 288 q->cstats.drop_count = 0;
289 q->cstats.drop_len = 0;
285 } 290 }
286 return skb; 291 return skb;
287} 292}
@@ -372,11 +377,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
372 while (sch->q.qlen > sch->limit) { 377 while (sch->q.qlen > sch->limit) {
373 struct sk_buff *skb = fq_codel_dequeue(sch); 378 struct sk_buff *skb = fq_codel_dequeue(sch);
374 379
380 q->cstats.drop_len += qdisc_pkt_len(skb);
375 kfree_skb(skb); 381 kfree_skb(skb);
376 q->cstats.drop_count++; 382 q->cstats.drop_count++;
377 } 383 }
378 qdisc_tree_decrease_qlen(sch, q->cstats.drop_count); 384 qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len);
379 q->cstats.drop_count = 0; 385 q->cstats.drop_count = 0;
386 q->cstats.drop_len = 0;
380 387
381 sch_tree_unlock(sch); 388 sch_tree_unlock(sch);
382 return 0; 389 return 0;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 16bc83b2842a..80742edea96f 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -159,12 +159,15 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
159 if (validate) 159 if (validate)
160 skb = validate_xmit_skb_list(skb, dev); 160 skb = validate_xmit_skb_list(skb, dev);
161 161
162 if (skb) { 162 if (likely(skb)) {
163 HARD_TX_LOCK(dev, txq, smp_processor_id()); 163 HARD_TX_LOCK(dev, txq, smp_processor_id());
164 if (!netif_xmit_frozen_or_stopped(txq)) 164 if (!netif_xmit_frozen_or_stopped(txq))
165 skb = dev_hard_start_xmit(skb, dev, txq, &ret); 165 skb = dev_hard_start_xmit(skb, dev, txq, &ret);
166 166
167 HARD_TX_UNLOCK(dev, txq); 167 HARD_TX_UNLOCK(dev, txq);
168 } else {
169 spin_lock(root_lock);
170 return qdisc_qlen(q);
168 } 171 }
169 spin_lock(root_lock); 172 spin_lock(root_lock);
170 173
@@ -567,6 +570,7 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
567 .dump = pfifo_fast_dump, 570 .dump = pfifo_fast_dump,
568 .owner = THIS_MODULE, 571 .owner = THIS_MODULE,
569}; 572};
573EXPORT_SYMBOL(pfifo_fast_ops);
570 574
571static struct lock_class_key qdisc_tx_busylock; 575static struct lock_class_key qdisc_tx_busylock;
572 576
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index b7ebe2c87586..d783d7cc3348 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -895,9 +895,10 @@ static void
895hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl) 895hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl)
896{ 896{
897 unsigned int len = cl->qdisc->q.qlen; 897 unsigned int len = cl->qdisc->q.qlen;
898 unsigned int backlog = cl->qdisc->qstats.backlog;
898 899
899 qdisc_reset(cl->qdisc); 900 qdisc_reset(cl->qdisc);
900 qdisc_tree_decrease_qlen(cl->qdisc, len); 901 qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
901} 902}
902 903
903static void 904static void
@@ -1215,11 +1216,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1215 new = &noop_qdisc; 1216 new = &noop_qdisc;
1216 } 1217 }
1217 1218
1218 sch_tree_lock(sch); 1219 *old = qdisc_replace(sch, new, &cl->qdisc);
1219 hfsc_purge_queue(sch, cl);
1220 *old = cl->qdisc;
1221 cl->qdisc = new;
1222 sch_tree_unlock(sch);
1223 return 0; 1220 return 0;
1224} 1221}
1225 1222
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 86b04e31e60b..13d6f83ec491 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -382,6 +382,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
382 struct hhf_sched_data *q = qdisc_priv(sch); 382 struct hhf_sched_data *q = qdisc_priv(sch);
383 enum wdrr_bucket_idx idx; 383 enum wdrr_bucket_idx idx;
384 struct wdrr_bucket *bucket; 384 struct wdrr_bucket *bucket;
385 unsigned int prev_backlog;
385 386
386 idx = hhf_classify(skb, sch); 387 idx = hhf_classify(skb, sch);
387 388
@@ -409,6 +410,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
409 if (++sch->q.qlen <= sch->limit) 410 if (++sch->q.qlen <= sch->limit)
410 return NET_XMIT_SUCCESS; 411 return NET_XMIT_SUCCESS;
411 412
413 prev_backlog = sch->qstats.backlog;
412 q->drop_overlimit++; 414 q->drop_overlimit++;
413 /* Return Congestion Notification only if we dropped a packet from this 415 /* Return Congestion Notification only if we dropped a packet from this
414 * bucket. 416 * bucket.
@@ -417,7 +419,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
417 return NET_XMIT_CN; 419 return NET_XMIT_CN;
418 420
419 /* As we dropped a packet, better let upper stack know this. */ 421 /* As we dropped a packet, better let upper stack know this. */
420 qdisc_tree_decrease_qlen(sch, 1); 422 qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
421 return NET_XMIT_SUCCESS; 423 return NET_XMIT_SUCCESS;
422} 424}
423 425
@@ -527,7 +529,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
527{ 529{
528 struct hhf_sched_data *q = qdisc_priv(sch); 530 struct hhf_sched_data *q = qdisc_priv(sch);
529 struct nlattr *tb[TCA_HHF_MAX + 1]; 531 struct nlattr *tb[TCA_HHF_MAX + 1];
530 unsigned int qlen; 532 unsigned int qlen, prev_backlog;
531 int err; 533 int err;
532 u64 non_hh_quantum; 534 u64 non_hh_quantum;
533 u32 new_quantum = q->quantum; 535 u32 new_quantum = q->quantum;
@@ -577,12 +579,14 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
577 } 579 }
578 580
579 qlen = sch->q.qlen; 581 qlen = sch->q.qlen;
582 prev_backlog = sch->qstats.backlog;
580 while (sch->q.qlen > sch->limit) { 583 while (sch->q.qlen > sch->limit) {
581 struct sk_buff *skb = hhf_dequeue(sch); 584 struct sk_buff *skb = hhf_dequeue(sch);
582 585
583 kfree_skb(skb); 586 kfree_skb(skb);
584 } 587 }
585 qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); 588 qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen,
589 prev_backlog - sch->qstats.backlog);
586 590
587 sch_tree_unlock(sch); 591 sch_tree_unlock(sch);
588 return 0; 592 return 0;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 15ccd7f8fb2a..87b02ed3d5f2 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -600,6 +600,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
600 htb_activate(q, cl); 600 htb_activate(q, cl);
601 } 601 }
602 602
603 qdisc_qstats_backlog_inc(sch, skb);
603 sch->q.qlen++; 604 sch->q.qlen++;
604 return NET_XMIT_SUCCESS; 605 return NET_XMIT_SUCCESS;
605} 606}
@@ -889,6 +890,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
889ok: 890ok:
890 qdisc_bstats_update(sch, skb); 891 qdisc_bstats_update(sch, skb);
891 qdisc_unthrottled(sch); 892 qdisc_unthrottled(sch);
893 qdisc_qstats_backlog_dec(sch, skb);
892 sch->q.qlen--; 894 sch->q.qlen--;
893 return skb; 895 return skb;
894 } 896 }
@@ -955,6 +957,7 @@ static unsigned int htb_drop(struct Qdisc *sch)
955 unsigned int len; 957 unsigned int len;
956 if (cl->un.leaf.q->ops->drop && 958 if (cl->un.leaf.q->ops->drop &&
957 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) { 959 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
960 sch->qstats.backlog -= len;
958 sch->q.qlen--; 961 sch->q.qlen--;
959 if (!cl->un.leaf.q->q.qlen) 962 if (!cl->un.leaf.q->q.qlen)
960 htb_deactivate(q, cl); 963 htb_deactivate(q, cl);
@@ -984,12 +987,12 @@ static void htb_reset(struct Qdisc *sch)
984 } 987 }
985 cl->prio_activity = 0; 988 cl->prio_activity = 0;
986 cl->cmode = HTB_CAN_SEND; 989 cl->cmode = HTB_CAN_SEND;
987
988 } 990 }
989 } 991 }
990 qdisc_watchdog_cancel(&q->watchdog); 992 qdisc_watchdog_cancel(&q->watchdog);
991 __skb_queue_purge(&q->direct_queue); 993 __skb_queue_purge(&q->direct_queue);
992 sch->q.qlen = 0; 994 sch->q.qlen = 0;
995 sch->qstats.backlog = 0;
993 memset(q->hlevel, 0, sizeof(q->hlevel)); 996 memset(q->hlevel, 0, sizeof(q->hlevel));
994 memset(q->row_mask, 0, sizeof(q->row_mask)); 997 memset(q->row_mask, 0, sizeof(q->row_mask));
995 for (i = 0; i < TC_HTB_NUMPRIO; i++) 998 for (i = 0; i < TC_HTB_NUMPRIO; i++)
@@ -1163,14 +1166,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1163 cl->common.classid)) == NULL) 1166 cl->common.classid)) == NULL)
1164 return -ENOBUFS; 1167 return -ENOBUFS;
1165 1168
1166 sch_tree_lock(sch); 1169 *old = qdisc_replace(sch, new, &cl->un.leaf.q);
1167 *old = cl->un.leaf.q;
1168 cl->un.leaf.q = new;
1169 if (*old != NULL) {
1170 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1171 qdisc_reset(*old);
1172 }
1173 sch_tree_unlock(sch);
1174 return 0; 1170 return 0;
1175} 1171}
1176 1172
@@ -1272,7 +1268,6 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
1272{ 1268{
1273 struct htb_sched *q = qdisc_priv(sch); 1269 struct htb_sched *q = qdisc_priv(sch);
1274 struct htb_class *cl = (struct htb_class *)arg; 1270 struct htb_class *cl = (struct htb_class *)arg;
1275 unsigned int qlen;
1276 struct Qdisc *new_q = NULL; 1271 struct Qdisc *new_q = NULL;
1277 int last_child = 0; 1272 int last_child = 0;
1278 1273
@@ -1292,9 +1287,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
1292 sch_tree_lock(sch); 1287 sch_tree_lock(sch);
1293 1288
1294 if (!cl->level) { 1289 if (!cl->level) {
1295 qlen = cl->un.leaf.q->q.qlen; 1290 unsigned int qlen = cl->un.leaf.q->q.qlen;
1291 unsigned int backlog = cl->un.leaf.q->qstats.backlog;
1292
1296 qdisc_reset(cl->un.leaf.q); 1293 qdisc_reset(cl->un.leaf.q);
1297 qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen); 1294 qdisc_tree_reduce_backlog(cl->un.leaf.q, qlen, backlog);
1298 } 1295 }
1299 1296
1300 /* delete from hash and active; remainder in destroy_class */ 1297 /* delete from hash and active; remainder in destroy_class */
@@ -1428,10 +1425,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1428 sch_tree_lock(sch); 1425 sch_tree_lock(sch);
1429 if (parent && !parent->level) { 1426 if (parent && !parent->level) {
1430 unsigned int qlen = parent->un.leaf.q->q.qlen; 1427 unsigned int qlen = parent->un.leaf.q->q.qlen;
1428 unsigned int backlog = parent->un.leaf.q->qstats.backlog;
1431 1429
1432 /* turn parent into inner node */ 1430 /* turn parent into inner node */
1433 qdisc_reset(parent->un.leaf.q); 1431 qdisc_reset(parent->un.leaf.q);
1434 qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen); 1432 qdisc_tree_reduce_backlog(parent->un.leaf.q, qlen, backlog);
1435 qdisc_destroy(parent->un.leaf.q); 1433 qdisc_destroy(parent->un.leaf.q);
1436 if (parent->prio_activity) 1434 if (parent->prio_activity)
1437 htb_deactivate(q, parent); 1435 htb_deactivate(q, parent);
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 3e82f047caaf..56a77b878eb3 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -57,7 +57,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
57 57
58 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { 58 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
59 dev_queue = netdev_get_tx_queue(dev, ntx); 59 dev_queue = netdev_get_tx_queue(dev, ntx);
60 qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops, 60 qdisc = qdisc_create_dflt(dev_queue, get_default_qdisc_ops(dev, ntx),
61 TC_H_MAKE(TC_H_MAJ(sch->handle), 61 TC_H_MAKE(TC_H_MAJ(sch->handle),
62 TC_H_MIN(ntx + 1))); 62 TC_H_MIN(ntx + 1)));
63 if (qdisc == NULL) 63 if (qdisc == NULL)
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index ad70ecf57ce7..b8002ce3d010 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -28,6 +28,7 @@ static void mqprio_destroy(struct Qdisc *sch)
28{ 28{
29 struct net_device *dev = qdisc_dev(sch); 29 struct net_device *dev = qdisc_dev(sch);
30 struct mqprio_sched *priv = qdisc_priv(sch); 30 struct mqprio_sched *priv = qdisc_priv(sch);
31 struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO};
31 unsigned int ntx; 32 unsigned int ntx;
32 33
33 if (priv->qdiscs) { 34 if (priv->qdiscs) {
@@ -39,7 +40,7 @@ static void mqprio_destroy(struct Qdisc *sch)
39 } 40 }
40 41
41 if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc) 42 if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
42 dev->netdev_ops->ndo_setup_tc(dev, 0); 43 dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
43 else 44 else
44 netdev_set_num_tc(dev, 0); 45 netdev_set_num_tc(dev, 0);
45} 46}
@@ -124,7 +125,8 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
124 125
125 for (i = 0; i < dev->num_tx_queues; i++) { 126 for (i = 0; i < dev->num_tx_queues; i++) {
126 dev_queue = netdev_get_tx_queue(dev, i); 127 dev_queue = netdev_get_tx_queue(dev, i);
127 qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops, 128 qdisc = qdisc_create_dflt(dev_queue,
129 get_default_qdisc_ops(dev, i),
128 TC_H_MAKE(TC_H_MAJ(sch->handle), 130 TC_H_MAKE(TC_H_MAJ(sch->handle),
129 TC_H_MIN(i + 1))); 131 TC_H_MIN(i + 1)));
130 if (qdisc == NULL) { 132 if (qdisc == NULL) {
@@ -140,8 +142,11 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
140 * supplied and verified mapping 142 * supplied and verified mapping
141 */ 143 */
142 if (qopt->hw) { 144 if (qopt->hw) {
145 struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO,
146 { .tc = qopt->num_tc }};
147
143 priv->hw_owned = 1; 148 priv->hw_owned = 1;
144 err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc); 149 err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
145 if (err) 150 if (err)
146 goto err; 151 goto err;
147 } else { 152 } else {
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 4e904ca0af9d..bcdd54bb101c 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -218,7 +218,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
218 if (q->queues[i] != &noop_qdisc) { 218 if (q->queues[i] != &noop_qdisc) {
219 struct Qdisc *child = q->queues[i]; 219 struct Qdisc *child = q->queues[i];
220 q->queues[i] = &noop_qdisc; 220 q->queues[i] = &noop_qdisc;
221 qdisc_tree_decrease_qlen(child, child->q.qlen); 221 qdisc_tree_reduce_backlog(child, child->q.qlen,
222 child->qstats.backlog);
222 qdisc_destroy(child); 223 qdisc_destroy(child);
223 } 224 }
224 } 225 }
@@ -238,8 +239,9 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
238 q->queues[i] = child; 239 q->queues[i] = child;
239 240
240 if (old != &noop_qdisc) { 241 if (old != &noop_qdisc) {
241 qdisc_tree_decrease_qlen(old, 242 qdisc_tree_reduce_backlog(old,
242 old->q.qlen); 243 old->q.qlen,
244 old->qstats.backlog);
243 qdisc_destroy(old); 245 qdisc_destroy(old);
244 } 246 }
245 sch_tree_unlock(sch); 247 sch_tree_unlock(sch);
@@ -303,13 +305,7 @@ static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
303 if (new == NULL) 305 if (new == NULL)
304 new = &noop_qdisc; 306 new = &noop_qdisc;
305 307
306 sch_tree_lock(sch); 308 *old = qdisc_replace(sch, new, &q->queues[band]);
307 *old = q->queues[band];
308 q->queues[band] = new;
309 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
310 qdisc_reset(*old);
311 sch_tree_unlock(sch);
312
313 return 0; 309 return 0;
314} 310}
315 311
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 5abd1d9de989..4befe97a9034 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -395,6 +395,25 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
395 sch->q.qlen++; 395 sch->q.qlen++;
396} 396}
397 397
398/* netem can't properly corrupt a megapacket (like we get from GSO), so instead
399 * when we statistically choose to corrupt one, we instead segment it, returning
400 * the first packet to be corrupted, and re-enqueue the remaining frames
401 */
402static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch)
403{
404 struct sk_buff *segs;
405 netdev_features_t features = netif_skb_features(skb);
406
407 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
408
409 if (IS_ERR_OR_NULL(segs)) {
410 qdisc_reshape_fail(skb, sch);
411 return NULL;
412 }
413 consume_skb(skb);
414 return segs;
415}
416
398/* 417/*
399 * Insert one skb into qdisc. 418 * Insert one skb into qdisc.
400 * Note: parent depends on return value to account for queue length. 419 * Note: parent depends on return value to account for queue length.
@@ -407,7 +426,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
407 /* We don't fill cb now as skb_unshare() may invalidate it */ 426 /* We don't fill cb now as skb_unshare() may invalidate it */
408 struct netem_skb_cb *cb; 427 struct netem_skb_cb *cb;
409 struct sk_buff *skb2; 428 struct sk_buff *skb2;
429 struct sk_buff *segs = NULL;
430 unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb);
431 int nb = 0;
410 int count = 1; 432 int count = 1;
433 int rc = NET_XMIT_SUCCESS;
411 434
412 /* Random duplication */ 435 /* Random duplication */
413 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) 436 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
@@ -453,10 +476,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
453 * do it now in software before we mangle it. 476 * do it now in software before we mangle it.
454 */ 477 */
455 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { 478 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
479 if (skb_is_gso(skb)) {
480 segs = netem_segment(skb, sch);
481 if (!segs)
482 return NET_XMIT_DROP;
483 } else {
484 segs = skb;
485 }
486
487 skb = segs;
488 segs = segs->next;
489
456 if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || 490 if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
457 (skb->ip_summed == CHECKSUM_PARTIAL && 491 (skb->ip_summed == CHECKSUM_PARTIAL &&
458 skb_checksum_help(skb))) 492 skb_checksum_help(skb))) {
459 return qdisc_drop(skb, sch); 493 rc = qdisc_drop(skb, sch);
494 goto finish_segs;
495 }
460 496
461 skb->data[prandom_u32() % skb_headlen(skb)] ^= 497 skb->data[prandom_u32() % skb_headlen(skb)] ^=
462 1<<(prandom_u32() % 8); 498 1<<(prandom_u32() % 8);
@@ -516,6 +552,27 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
516 sch->qstats.requeues++; 552 sch->qstats.requeues++;
517 } 553 }
518 554
555finish_segs:
556 if (segs) {
557 while (segs) {
558 skb2 = segs->next;
559 segs->next = NULL;
560 qdisc_skb_cb(segs)->pkt_len = segs->len;
561 last_len = segs->len;
562 rc = qdisc_enqueue(segs, sch);
563 if (rc != NET_XMIT_SUCCESS) {
564 if (net_xmit_drop_count(rc))
565 qdisc_qstats_drop(sch);
566 } else {
567 nb++;
568 len += last_len;
569 }
570 segs = skb2;
571 }
572 sch->q.qlen += nb;
573 if (nb > 1)
574 qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
575 }
519 return NET_XMIT_SUCCESS; 576 return NET_XMIT_SUCCESS;
520} 577}
521 578
@@ -598,7 +655,8 @@ deliver:
598 if (unlikely(err != NET_XMIT_SUCCESS)) { 655 if (unlikely(err != NET_XMIT_SUCCESS)) {
599 if (net_xmit_drop_count(err)) { 656 if (net_xmit_drop_count(err)) {
600 qdisc_qstats_drop(sch); 657 qdisc_qstats_drop(sch);
601 qdisc_tree_decrease_qlen(sch, 1); 658 qdisc_tree_reduce_backlog(sch, 1,
659 qdisc_pkt_len(skb));
602 } 660 }
603 } 661 }
604 goto tfifo_dequeue; 662 goto tfifo_dequeue;
@@ -1037,15 +1095,7 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1037{ 1095{
1038 struct netem_sched_data *q = qdisc_priv(sch); 1096 struct netem_sched_data *q = qdisc_priv(sch);
1039 1097
1040 sch_tree_lock(sch); 1098 *old = qdisc_replace(sch, new, &q->qdisc);
1041 *old = q->qdisc;
1042 q->qdisc = new;
1043 if (*old) {
1044 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1045 qdisc_reset(*old);
1046 }
1047 sch_tree_unlock(sch);
1048
1049 return 0; 1099 return 0;
1050} 1100}
1051 1101
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index b783a446d884..71ae3b9629f9 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -183,7 +183,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
183{ 183{
184 struct pie_sched_data *q = qdisc_priv(sch); 184 struct pie_sched_data *q = qdisc_priv(sch);
185 struct nlattr *tb[TCA_PIE_MAX + 1]; 185 struct nlattr *tb[TCA_PIE_MAX + 1];
186 unsigned int qlen; 186 unsigned int qlen, dropped = 0;
187 int err; 187 int err;
188 188
189 if (!opt) 189 if (!opt)
@@ -232,10 +232,11 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
232 while (sch->q.qlen > sch->limit) { 232 while (sch->q.qlen > sch->limit) {
233 struct sk_buff *skb = __skb_dequeue(&sch->q); 233 struct sk_buff *skb = __skb_dequeue(&sch->q);
234 234
235 dropped += qdisc_pkt_len(skb);
235 qdisc_qstats_backlog_dec(sch, skb); 236 qdisc_qstats_backlog_dec(sch, skb);
236 qdisc_drop(skb, sch); 237 qdisc_drop(skb, sch);
237 } 238 }
238 qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); 239 qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
239 240
240 sch_tree_unlock(sch); 241 sch_tree_unlock(sch);
241 return 0; 242 return 0;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index ba6487f2741f..fee1b15506b2 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -191,7 +191,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
191 struct Qdisc *child = q->queues[i]; 191 struct Qdisc *child = q->queues[i];
192 q->queues[i] = &noop_qdisc; 192 q->queues[i] = &noop_qdisc;
193 if (child != &noop_qdisc) { 193 if (child != &noop_qdisc) {
194 qdisc_tree_decrease_qlen(child, child->q.qlen); 194 qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog);
195 qdisc_destroy(child); 195 qdisc_destroy(child);
196 } 196 }
197 } 197 }
@@ -210,8 +210,9 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
210 q->queues[i] = child; 210 q->queues[i] = child;
211 211
212 if (old != &noop_qdisc) { 212 if (old != &noop_qdisc) {
213 qdisc_tree_decrease_qlen(old, 213 qdisc_tree_reduce_backlog(old,
214 old->q.qlen); 214 old->q.qlen,
215 old->qstats.backlog);
215 qdisc_destroy(old); 216 qdisc_destroy(old);
216 } 217 }
217 sch_tree_unlock(sch); 218 sch_tree_unlock(sch);
@@ -268,13 +269,7 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
268 if (new == NULL) 269 if (new == NULL)
269 new = &noop_qdisc; 270 new = &noop_qdisc;
270 271
271 sch_tree_lock(sch); 272 *old = qdisc_replace(sch, new, &q->queues[band]);
272 *old = q->queues[band];
273 q->queues[band] = new;
274 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
275 qdisc_reset(*old);
276 sch_tree_unlock(sch);
277
278 return 0; 273 return 0;
279} 274}
280 275
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 3dc3a6e56052..8d2d8d953432 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -220,9 +220,10 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
220static void qfq_purge_queue(struct qfq_class *cl) 220static void qfq_purge_queue(struct qfq_class *cl)
221{ 221{
222 unsigned int len = cl->qdisc->q.qlen; 222 unsigned int len = cl->qdisc->q.qlen;
223 unsigned int backlog = cl->qdisc->qstats.backlog;
223 224
224 qdisc_reset(cl->qdisc); 225 qdisc_reset(cl->qdisc);
225 qdisc_tree_decrease_qlen(cl->qdisc, len); 226 qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
226} 227}
227 228
228static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = { 229static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
@@ -617,11 +618,7 @@ static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
617 new = &noop_qdisc; 618 new = &noop_qdisc;
618 } 619 }
619 620
620 sch_tree_lock(sch); 621 *old = qdisc_replace(sch, new, &cl->qdisc);
621 qfq_purge_queue(cl);
622 *old = cl->qdisc;
623 cl->qdisc = new;
624 sch_tree_unlock(sch);
625 return 0; 622 return 0;
626} 623}
627 624
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 6c0534cc7758..8c0508c0e287 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -210,7 +210,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
210 q->flags = ctl->flags; 210 q->flags = ctl->flags;
211 q->limit = ctl->limit; 211 q->limit = ctl->limit;
212 if (child) { 212 if (child) {
213 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); 213 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
214 q->qdisc->qstats.backlog);
214 qdisc_destroy(q->qdisc); 215 qdisc_destroy(q->qdisc);
215 q->qdisc = child; 216 q->qdisc = child;
216 } 217 }
@@ -313,12 +314,7 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
313 if (new == NULL) 314 if (new == NULL)
314 new = &noop_qdisc; 315 new = &noop_qdisc;
315 316
316 sch_tree_lock(sch); 317 *old = qdisc_replace(sch, new, &q->qdisc);
317 *old = q->qdisc;
318 q->qdisc = new;
319 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
320 qdisc_reset(*old);
321 sch_tree_unlock(sch);
322 return 0; 318 return 0;
323} 319}
324 320
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 5bbb6332ec57..c69611640fa5 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -510,7 +510,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
510 510
511 sch_tree_lock(sch); 511 sch_tree_lock(sch);
512 512
513 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); 513 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
514 q->qdisc->qstats.backlog);
514 qdisc_destroy(q->qdisc); 515 qdisc_destroy(q->qdisc);
515 q->qdisc = child; 516 q->qdisc = child;
516 517
@@ -606,12 +607,7 @@ static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
606 if (new == NULL) 607 if (new == NULL)
607 new = &noop_qdisc; 608 new = &noop_qdisc;
608 609
609 sch_tree_lock(sch); 610 *old = qdisc_replace(sch, new, &q->qdisc);
610 *old = q->qdisc;
611 q->qdisc = new;
612 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
613 qdisc_reset(*old);
614 sch_tree_unlock(sch);
615 return 0; 611 return 0;
616} 612}
617 613
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 3abab534eb5c..498f0a2cb47f 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -346,7 +346,7 @@ static int
346sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) 346sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
347{ 347{
348 struct sfq_sched_data *q = qdisc_priv(sch); 348 struct sfq_sched_data *q = qdisc_priv(sch);
349 unsigned int hash; 349 unsigned int hash, dropped;
350 sfq_index x, qlen; 350 sfq_index x, qlen;
351 struct sfq_slot *slot; 351 struct sfq_slot *slot;
352 int uninitialized_var(ret); 352 int uninitialized_var(ret);
@@ -461,7 +461,7 @@ enqueue:
461 return NET_XMIT_SUCCESS; 461 return NET_XMIT_SUCCESS;
462 462
463 qlen = slot->qlen; 463 qlen = slot->qlen;
464 sfq_drop(sch); 464 dropped = sfq_drop(sch);
465 /* Return Congestion Notification only if we dropped a packet 465 /* Return Congestion Notification only if we dropped a packet
466 * from this flow. 466 * from this flow.
467 */ 467 */
@@ -469,7 +469,7 @@ enqueue:
469 return NET_XMIT_CN; 469 return NET_XMIT_CN;
470 470
471 /* As we dropped a packet, better let upper stack know this */ 471 /* As we dropped a packet, better let upper stack know this */
472 qdisc_tree_decrease_qlen(sch, 1); 472 qdisc_tree_reduce_backlog(sch, 1, dropped);
473 return NET_XMIT_SUCCESS; 473 return NET_XMIT_SUCCESS;
474} 474}
475 475
@@ -537,6 +537,7 @@ static void sfq_rehash(struct Qdisc *sch)
537 struct sfq_slot *slot; 537 struct sfq_slot *slot;
538 struct sk_buff_head list; 538 struct sk_buff_head list;
539 int dropped = 0; 539 int dropped = 0;
540 unsigned int drop_len = 0;
540 541
541 __skb_queue_head_init(&list); 542 __skb_queue_head_init(&list);
542 543
@@ -565,6 +566,7 @@ static void sfq_rehash(struct Qdisc *sch)
565 if (x >= SFQ_MAX_FLOWS) { 566 if (x >= SFQ_MAX_FLOWS) {
566drop: 567drop:
567 qdisc_qstats_backlog_dec(sch, skb); 568 qdisc_qstats_backlog_dec(sch, skb);
569 drop_len += qdisc_pkt_len(skb);
568 kfree_skb(skb); 570 kfree_skb(skb);
569 dropped++; 571 dropped++;
570 continue; 572 continue;
@@ -594,7 +596,7 @@ drop:
594 } 596 }
595 } 597 }
596 sch->q.qlen -= dropped; 598 sch->q.qlen -= dropped;
597 qdisc_tree_decrease_qlen(sch, dropped); 599 qdisc_tree_reduce_backlog(sch, dropped, drop_len);
598} 600}
599 601
600static void sfq_perturbation(unsigned long arg) 602static void sfq_perturbation(unsigned long arg)
@@ -618,7 +620,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
618 struct sfq_sched_data *q = qdisc_priv(sch); 620 struct sfq_sched_data *q = qdisc_priv(sch);
619 struct tc_sfq_qopt *ctl = nla_data(opt); 621 struct tc_sfq_qopt *ctl = nla_data(opt);
620 struct tc_sfq_qopt_v1 *ctl_v1 = NULL; 622 struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
621 unsigned int qlen; 623 unsigned int qlen, dropped = 0;
622 struct red_parms *p = NULL; 624 struct red_parms *p = NULL;
623 625
624 if (opt->nla_len < nla_attr_size(sizeof(*ctl))) 626 if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
@@ -667,8 +669,8 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
667 669
668 qlen = sch->q.qlen; 670 qlen = sch->q.qlen;
669 while (sch->q.qlen > q->limit) 671 while (sch->q.qlen > q->limit)
670 sfq_drop(sch); 672 dropped += sfq_drop(sch);
671 qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); 673 qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
672 674
673 del_timer(&q->perturb_timer); 675 del_timer(&q->perturb_timer);
674 if (q->perturb_period) { 676 if (q->perturb_period) {
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index a4afde14e865..c2fbde742f37 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -160,6 +160,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
160 struct tbf_sched_data *q = qdisc_priv(sch); 160 struct tbf_sched_data *q = qdisc_priv(sch);
161 struct sk_buff *segs, *nskb; 161 struct sk_buff *segs, *nskb;
162 netdev_features_t features = netif_skb_features(skb); 162 netdev_features_t features = netif_skb_features(skb);
163 unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
163 int ret, nb; 164 int ret, nb;
164 165
165 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); 166 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
@@ -172,6 +173,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
172 nskb = segs->next; 173 nskb = segs->next;
173 segs->next = NULL; 174 segs->next = NULL;
174 qdisc_skb_cb(segs)->pkt_len = segs->len; 175 qdisc_skb_cb(segs)->pkt_len = segs->len;
176 len += segs->len;
175 ret = qdisc_enqueue(segs, q->qdisc); 177 ret = qdisc_enqueue(segs, q->qdisc);
176 if (ret != NET_XMIT_SUCCESS) { 178 if (ret != NET_XMIT_SUCCESS) {
177 if (net_xmit_drop_count(ret)) 179 if (net_xmit_drop_count(ret))
@@ -183,7 +185,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
183 } 185 }
184 sch->q.qlen += nb; 186 sch->q.qlen += nb;
185 if (nb > 1) 187 if (nb > 1)
186 qdisc_tree_decrease_qlen(sch, 1 - nb); 188 qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
187 consume_skb(skb); 189 consume_skb(skb);
188 return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; 190 return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
189} 191}
@@ -399,7 +401,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
399 401
400 sch_tree_lock(sch); 402 sch_tree_lock(sch);
401 if (child) { 403 if (child) {
402 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); 404 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
405 q->qdisc->qstats.backlog);
403 qdisc_destroy(q->qdisc); 406 qdisc_destroy(q->qdisc);
404 q->qdisc = child; 407 q->qdisc = child;
405 } 408 }
@@ -502,13 +505,7 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
502 if (new == NULL) 505 if (new == NULL)
503 new = &noop_qdisc; 506 new = &noop_qdisc;
504 507
505 sch_tree_lock(sch); 508 *old = qdisc_replace(sch, new, &q->qdisc);
506 *old = q->qdisc;
507 q->qdisc = new;
508 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
509 qdisc_reset(*old);
510 sch_tree_unlock(sch);
511
512 return 0; 509 return 0;
513} 510}
514 511
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 2bf8ec92dde4..e1849f3714ad 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1263,7 +1263,7 @@ static struct sctp_transport *sctp_trans_elect_best(struct sctp_transport *curr,
1263 if (score_curr > score_best) 1263 if (score_curr > score_best)
1264 return curr; 1264 return curr;
1265 else if (score_curr == score_best) 1265 else if (score_curr == score_best)
1266 return sctp_trans_elect_tie(curr, best); 1266 return sctp_trans_elect_tie(best, curr);
1267 else 1267 else
1268 return best; 1268 return best;
1269} 1269}
@@ -1406,7 +1406,8 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc)
1406 list_for_each_entry(t, &asoc->peer.transport_addr_list, 1406 list_for_each_entry(t, &asoc->peer.transport_addr_list,
1407 transports) { 1407 transports) {
1408 if (t->pmtu_pending && t->dst) { 1408 if (t->pmtu_pending && t->dst) {
1409 sctp_transport_update_pmtu(sk, t, dst_mtu(t->dst)); 1409 sctp_transport_update_pmtu(sk, t,
1410 WORD_TRUNC(dst_mtu(t->dst)));
1410 t->pmtu_pending = 0; 1411 t->pmtu_pending = 0;
1411 } 1412 }
1412 if (!pmtu || (t->pathmtu < pmtu)) 1413 if (!pmtu || (t->pathmtu < pmtu))
@@ -1493,7 +1494,7 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len)
1493 1494
1494 asoc->peer.sack_needed = 0; 1495 asoc->peer.sack_needed = 0;
1495 1496
1496 sctp_outq_tail(&asoc->outqueue, sack); 1497 sctp_outq_tail(&asoc->outqueue, sack, GFP_ATOMIC);
1497 1498
1498 /* Stop the SACK timer. */ 1499 /* Stop the SACK timer. */
1499 timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK]; 1500 timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK];
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 1543e39f47c3..912eb1685a5d 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -27,9 +27,9 @@
27 * Vlad Yasevich <vladislav.yasevich@hp.com> 27 * Vlad Yasevich <vladislav.yasevich@hp.com>
28 */ 28 */
29 29
30#include <crypto/hash.h>
30#include <linux/slab.h> 31#include <linux/slab.h>
31#include <linux/types.h> 32#include <linux/types.h>
32#include <linux/crypto.h>
33#include <linux/scatterlist.h> 33#include <linux/scatterlist.h>
34#include <net/sctp/sctp.h> 34#include <net/sctp/sctp.h>
35#include <net/sctp/auth.h> 35#include <net/sctp/auth.h>
@@ -448,7 +448,7 @@ struct sctp_shared_key *sctp_auth_get_shkey(
448 */ 448 */
449int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp) 449int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
450{ 450{
451 struct crypto_hash *tfm = NULL; 451 struct crypto_shash *tfm = NULL;
452 __u16 id; 452 __u16 id;
453 453
454 /* If AUTH extension is disabled, we are done */ 454 /* If AUTH extension is disabled, we are done */
@@ -462,9 +462,8 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
462 return 0; 462 return 0;
463 463
464 /* Allocated the array of pointers to transorms */ 464 /* Allocated the array of pointers to transorms */
465 ep->auth_hmacs = kzalloc( 465 ep->auth_hmacs = kzalloc(sizeof(struct crypto_shash *) *
466 sizeof(struct crypto_hash *) * SCTP_AUTH_NUM_HMACS, 466 SCTP_AUTH_NUM_HMACS, gfp);
467 gfp);
468 if (!ep->auth_hmacs) 467 if (!ep->auth_hmacs)
469 return -ENOMEM; 468 return -ENOMEM;
470 469
@@ -483,8 +482,7 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
483 continue; 482 continue;
484 483
485 /* Allocate the ID */ 484 /* Allocate the ID */
486 tfm = crypto_alloc_hash(sctp_hmac_list[id].hmac_name, 0, 485 tfm = crypto_alloc_shash(sctp_hmac_list[id].hmac_name, 0, 0);
487 CRYPTO_ALG_ASYNC);
488 if (IS_ERR(tfm)) 486 if (IS_ERR(tfm))
489 goto out_err; 487 goto out_err;
490 488
@@ -500,7 +498,7 @@ out_err:
500} 498}
501 499
502/* Destroy the hmac tfm array */ 500/* Destroy the hmac tfm array */
503void sctp_auth_destroy_hmacs(struct crypto_hash *auth_hmacs[]) 501void sctp_auth_destroy_hmacs(struct crypto_shash *auth_hmacs[])
504{ 502{
505 int i; 503 int i;
506 504
@@ -508,8 +506,7 @@ void sctp_auth_destroy_hmacs(struct crypto_hash *auth_hmacs[])
508 return; 506 return;
509 507
510 for (i = 0; i < SCTP_AUTH_NUM_HMACS; i++) { 508 for (i = 0; i < SCTP_AUTH_NUM_HMACS; i++) {
511 if (auth_hmacs[i]) 509 crypto_free_shash(auth_hmacs[i]);
512 crypto_free_hash(auth_hmacs[i]);
513 } 510 }
514 kfree(auth_hmacs); 511 kfree(auth_hmacs);
515} 512}
@@ -709,8 +706,7 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
709 struct sctp_auth_chunk *auth, 706 struct sctp_auth_chunk *auth,
710 gfp_t gfp) 707 gfp_t gfp)
711{ 708{
712 struct scatterlist sg; 709 struct crypto_shash *tfm;
713 struct hash_desc desc;
714 struct sctp_auth_bytes *asoc_key; 710 struct sctp_auth_bytes *asoc_key;
715 __u16 key_id, hmac_id; 711 __u16 key_id, hmac_id;
716 __u8 *digest; 712 __u8 *digest;
@@ -742,16 +738,22 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
742 738
743 /* set up scatter list */ 739 /* set up scatter list */
744 end = skb_tail_pointer(skb); 740 end = skb_tail_pointer(skb);
745 sg_init_one(&sg, auth, end - (unsigned char *)auth);
746 741
747 desc.tfm = asoc->ep->auth_hmacs[hmac_id]; 742 tfm = asoc->ep->auth_hmacs[hmac_id];
748 desc.flags = 0;
749 743
750 digest = auth->auth_hdr.hmac; 744 digest = auth->auth_hdr.hmac;
751 if (crypto_hash_setkey(desc.tfm, &asoc_key->data[0], asoc_key->len)) 745 if (crypto_shash_setkey(tfm, &asoc_key->data[0], asoc_key->len))
752 goto free; 746 goto free;
753 747
754 crypto_hash_digest(&desc, &sg, sg.length, digest); 748 {
749 SHASH_DESC_ON_STACK(desc, tfm);
750
751 desc->tfm = tfm;
752 desc->flags = 0;
753 crypto_shash_digest(desc, (u8 *)auth,
754 end - (unsigned char *)auth, digest);
755 shash_desc_zero(desc);
756 }
755 757
756free: 758free:
757 if (free_key) 759 if (free_key)
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 871cdf9567e6..401c60750b20 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -111,7 +111,8 @@ int sctp_bind_addr_dup(struct sctp_bind_addr *dest,
111 dest->port = src->port; 111 dest->port = src->port;
112 112
113 list_for_each_entry(addr, &src->address_list, list) { 113 list_for_each_entry(addr, &src->address_list, list) {
114 error = sctp_add_bind_addr(dest, &addr->a, 1, gfp); 114 error = sctp_add_bind_addr(dest, &addr->a, sizeof(addr->a),
115 1, gfp);
115 if (error < 0) 116 if (error < 0)
116 break; 117 break;
117 } 118 }
@@ -150,7 +151,7 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp)
150 151
151/* Add an address to the bind address list in the SCTP_bind_addr structure. */ 152/* Add an address to the bind address list in the SCTP_bind_addr structure. */
152int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, 153int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
153 __u8 addr_state, gfp_t gfp) 154 int new_size, __u8 addr_state, gfp_t gfp)
154{ 155{
155 struct sctp_sockaddr_entry *addr; 156 struct sctp_sockaddr_entry *addr;
156 157
@@ -159,7 +160,7 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
159 if (!addr) 160 if (!addr)
160 return -ENOMEM; 161 return -ENOMEM;
161 162
162 memcpy(&addr->a, new, sizeof(*new)); 163 memcpy(&addr->a, new, min_t(size_t, sizeof(*new), new_size));
163 164
164 /* Fix up the port if it has not yet been set. 165 /* Fix up the port if it has not yet been set.
165 * Both v4 and v6 have the port at the same offset. 166 * Both v4 and v6 have the port at the same offset.
@@ -291,7 +292,8 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
291 } 292 }
292 293
293 af->from_addr_param(&addr, rawaddr, htons(port), 0); 294 af->from_addr_param(&addr, rawaddr, htons(port), 0);
294 retval = sctp_add_bind_addr(bp, &addr, SCTP_ADDR_SRC, gfp); 295 retval = sctp_add_bind_addr(bp, &addr, sizeof(addr),
296 SCTP_ADDR_SRC, gfp);
295 if (retval) { 297 if (retval) {
296 /* Can't finish building the list, clean up. */ 298 /* Can't finish building the list, clean up. */
297 sctp_bind_addr_clean(bp); 299 sctp_bind_addr_clean(bp);
@@ -453,8 +455,8 @@ static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest,
453 (((AF_INET6 == addr->sa.sa_family) && 455 (((AF_INET6 == addr->sa.sa_family) &&
454 (flags & SCTP_ADDR6_ALLOWED) && 456 (flags & SCTP_ADDR6_ALLOWED) &&
455 (flags & SCTP_ADDR6_PEERSUPP)))) 457 (flags & SCTP_ADDR6_PEERSUPP))))
456 error = sctp_add_bind_addr(dest, addr, SCTP_ADDR_SRC, 458 error = sctp_add_bind_addr(dest, addr, sizeof(*addr),
457 gfp); 459 SCTP_ADDR_SRC, gfp);
458 } 460 }
459 461
460 return error; 462 return error;
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index a3380917f197..958ef5f33f4b 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -70,19 +70,6 @@ static struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp)
70 return msg; 70 return msg;
71} 71}
72 72
73void sctp_datamsg_free(struct sctp_datamsg *msg)
74{
75 struct sctp_chunk *chunk;
76
77 /* This doesn't have to be a _safe vairant because
78 * sctp_chunk_free() only drops the refs.
79 */
80 list_for_each_entry(chunk, &msg->chunks, frag_list)
81 sctp_chunk_free(chunk);
82
83 sctp_datamsg_put(msg);
84}
85
86/* Final destructruction of datamsg memory. */ 73/* Final destructruction of datamsg memory. */
87static void sctp_datamsg_destroy(struct sctp_datamsg *msg) 74static void sctp_datamsg_destroy(struct sctp_datamsg *msg)
88{ 75{
@@ -273,7 +260,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
273 frag |= SCTP_DATA_SACK_IMM; 260 frag |= SCTP_DATA_SACK_IMM;
274 } 261 }
275 262
276 chunk = sctp_make_datafrag_empty(asoc, sinfo, len, frag, 0); 263 chunk = sctp_make_datafrag_empty(asoc, sinfo, len, frag,
264 0, GFP_KERNEL);
277 265
278 if (!chunk) { 266 if (!chunk) {
279 err = -ENOMEM; 267 err = -ENOMEM;
@@ -309,7 +297,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
309 (sinfo->sinfo_flags & SCTP_SACK_IMMEDIATELY)) 297 (sinfo->sinfo_flags & SCTP_SACK_IMMEDIATELY))
310 frag |= SCTP_DATA_SACK_IMM; 298 frag |= SCTP_DATA_SACK_IMM;
311 299
312 chunk = sctp_make_datafrag_empty(asoc, sinfo, over, frag, 0); 300 chunk = sctp_make_datafrag_empty(asoc, sinfo, over, frag,
301 0, GFP_KERNEL);
313 302
314 if (!chunk) { 303 if (!chunk) {
315 err = -ENOMEM; 304 err = -ENOMEM;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 2522a6175291..9d494e35e7f9 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -42,7 +42,6 @@
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/in.h> 43#include <linux/in.h>
44#include <linux/random.h> /* get_random_bytes() */ 44#include <linux/random.h> /* get_random_bytes() */
45#include <linux/crypto.h>
46#include <net/sock.h> 45#include <net/sock.h>
47#include <net/ipv6.h> 46#include <net/ipv6.h>
48#include <net/sctp/sctp.h> 47#include <net/sctp/sctp.h>
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 49d2cc751386..00b8445364e3 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -221,7 +221,7 @@ int sctp_rcv(struct sk_buff *skb)
221 goto discard_release; 221 goto discard_release;
222 222
223 /* Create an SCTP packet structure. */ 223 /* Create an SCTP packet structure. */
224 chunk = sctp_chunkify(skb, asoc, sk); 224 chunk = sctp_chunkify(skb, asoc, sk, GFP_ATOMIC);
225 if (!chunk) 225 if (!chunk)
226 goto discard_release; 226 goto discard_release;
227 SCTP_INPUT_CB(skb)->chunk = chunk; 227 SCTP_INPUT_CB(skb)->chunk = chunk;
@@ -606,7 +606,8 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
606 606
607 /* PMTU discovery (RFC1191) */ 607 /* PMTU discovery (RFC1191) */
608 if (ICMP_FRAG_NEEDED == code) { 608 if (ICMP_FRAG_NEEDED == code) {
609 sctp_icmp_frag_needed(sk, asoc, transport, info); 609 sctp_icmp_frag_needed(sk, asoc, transport,
610 WORD_TRUNC(info));
610 goto out_unlock; 611 goto out_unlock;
611 } else { 612 } else {
612 if (ICMP_PROT_UNREACH == code) { 613 if (ICMP_PROT_UNREACH == code) {
@@ -937,7 +938,6 @@ static struct sctp_association *__sctp_lookup_association(
937 struct sctp_transport *t; 938 struct sctp_transport *t;
938 struct sctp_association *asoc = NULL; 939 struct sctp_association *asoc = NULL;
939 940
940 rcu_read_lock();
941 t = sctp_addrs_lookup_transport(net, local, peer); 941 t = sctp_addrs_lookup_transport(net, local, peer);
942 if (!t || !sctp_transport_hold(t)) 942 if (!t || !sctp_transport_hold(t))
943 goto out; 943 goto out;
@@ -949,7 +949,6 @@ static struct sctp_association *__sctp_lookup_association(
949 sctp_transport_put(t); 949 sctp_transport_put(t);
950 950
951out: 951out:
952 rcu_read_unlock();
953 return asoc; 952 return asoc;
954} 953}
955 954
@@ -962,7 +961,9 @@ struct sctp_association *sctp_lookup_association(struct net *net,
962{ 961{
963 struct sctp_association *asoc; 962 struct sctp_association *asoc;
964 963
964 rcu_read_lock();
965 asoc = __sctp_lookup_association(net, laddr, paddr, transportp); 965 asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
966 rcu_read_unlock();
966 967
967 return asoc; 968 return asoc;
968} 969}
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 9d610eddd19e..9844fe573029 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -153,7 +153,7 @@ void sctp_packet_free(struct sctp_packet *packet)
153 */ 153 */
154sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, 154sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
155 struct sctp_chunk *chunk, 155 struct sctp_chunk *chunk,
156 int one_packet) 156 int one_packet, gfp_t gfp)
157{ 157{
158 sctp_xmit_t retval; 158 sctp_xmit_t retval;
159 int error = 0; 159 int error = 0;
@@ -163,7 +163,7 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
163 switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) { 163 switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) {
164 case SCTP_XMIT_PMTU_FULL: 164 case SCTP_XMIT_PMTU_FULL:
165 if (!packet->has_cookie_echo) { 165 if (!packet->has_cookie_echo) {
166 error = sctp_packet_transmit(packet); 166 error = sctp_packet_transmit(packet, gfp);
167 if (error < 0) 167 if (error < 0)
168 chunk->skb->sk->sk_err = -error; 168 chunk->skb->sk->sk_err = -error;
169 169
@@ -376,7 +376,7 @@ static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
376 * 376 *
377 * The return value is a normal kernel error return value. 377 * The return value is a normal kernel error return value.
378 */ 378 */
379int sctp_packet_transmit(struct sctp_packet *packet) 379int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
380{ 380{
381 struct sctp_transport *tp = packet->transport; 381 struct sctp_transport *tp = packet->transport;
382 struct sctp_association *asoc = tp->asoc; 382 struct sctp_association *asoc = tp->asoc;
@@ -401,7 +401,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
401 sk = chunk->skb->sk; 401 sk = chunk->skb->sk;
402 402
403 /* Allocate the new skb. */ 403 /* Allocate the new skb. */
404 nskb = alloc_skb(packet->size + MAX_HEADER, GFP_ATOMIC); 404 nskb = alloc_skb(packet->size + MAX_HEADER, gfp);
405 if (!nskb) 405 if (!nskb)
406 goto nomem; 406 goto nomem;
407 407
@@ -523,8 +523,8 @@ int sctp_packet_transmit(struct sctp_packet *packet)
523 */ 523 */
524 if (auth) 524 if (auth)
525 sctp_auth_calculate_hmac(asoc, nskb, 525 sctp_auth_calculate_hmac(asoc, nskb,
526 (struct sctp_auth_chunk *)auth, 526 (struct sctp_auth_chunk *)auth,
527 GFP_ATOMIC); 527 gfp);
528 528
529 /* 2) Calculate the Adler-32 checksum of the whole packet, 529 /* 2) Calculate the Adler-32 checksum of the whole packet,
530 * including the SCTP common header and all the 530 * including the SCTP common header and all the
@@ -705,7 +705,8 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
705 /* Check whether this chunk and all the rest of pending data will fit 705 /* Check whether this chunk and all the rest of pending data will fit
706 * or delay in hopes of bundling a full sized packet. 706 * or delay in hopes of bundling a full sized packet.
707 */ 707 */
708 if (chunk->skb->len + q->out_qlen >= transport->pathmtu - packet->overhead) 708 if (chunk->skb->len + q->out_qlen >
709 transport->pathmtu - packet->overhead - sizeof(sctp_data_chunk_t) - 4)
709 /* Enough data queued to fill a packet */ 710 /* Enough data queued to fill a packet */
710 return SCTP_XMIT_OK; 711 return SCTP_XMIT_OK;
711 712
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index c0380cfb16ae..084718f9b3da 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -68,7 +68,7 @@ static void sctp_mark_missing(struct sctp_outq *q,
68 68
69static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn); 69static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn);
70 70
71static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout); 71static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp);
72 72
73/* Add data to the front of the queue. */ 73/* Add data to the front of the queue. */
74static inline void sctp_outq_head_data(struct sctp_outq *q, 74static inline void sctp_outq_head_data(struct sctp_outq *q,
@@ -285,7 +285,7 @@ void sctp_outq_free(struct sctp_outq *q)
285} 285}
286 286
287/* Put a new chunk in an sctp_outq. */ 287/* Put a new chunk in an sctp_outq. */
288int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) 288int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp)
289{ 289{
290 struct net *net = sock_net(q->asoc->base.sk); 290 struct net *net = sock_net(q->asoc->base.sk);
291 int error = 0; 291 int error = 0;
@@ -341,7 +341,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
341 return error; 341 return error;
342 342
343 if (!q->cork) 343 if (!q->cork)
344 error = sctp_outq_flush(q, 0); 344 error = sctp_outq_flush(q, 0, gfp);
345 345
346 return error; 346 return error;
347} 347}
@@ -510,7 +510,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
510 * will be flushed at the end. 510 * will be flushed at the end.
511 */ 511 */
512 if (reason != SCTP_RTXR_FAST_RTX) 512 if (reason != SCTP_RTXR_FAST_RTX)
513 error = sctp_outq_flush(q, /* rtx_timeout */ 1); 513 error = sctp_outq_flush(q, /* rtx_timeout */ 1, GFP_ATOMIC);
514 514
515 if (error) 515 if (error)
516 q->asoc->base.sk->sk_err = -error; 516 q->asoc->base.sk->sk_err = -error;
@@ -601,12 +601,12 @@ redo:
601 * control chunks are already freed so there 601 * control chunks are already freed so there
602 * is nothing we can do. 602 * is nothing we can do.
603 */ 603 */
604 sctp_packet_transmit(pkt); 604 sctp_packet_transmit(pkt, GFP_ATOMIC);
605 goto redo; 605 goto redo;
606 } 606 }
607 607
608 /* Send this packet. */ 608 /* Send this packet. */
609 error = sctp_packet_transmit(pkt); 609 error = sctp_packet_transmit(pkt, GFP_ATOMIC);
610 610
611 /* If we are retransmitting, we should only 611 /* If we are retransmitting, we should only
612 * send a single packet. 612 * send a single packet.
@@ -622,7 +622,7 @@ redo:
622 622
623 case SCTP_XMIT_RWND_FULL: 623 case SCTP_XMIT_RWND_FULL:
624 /* Send this packet. */ 624 /* Send this packet. */
625 error = sctp_packet_transmit(pkt); 625 error = sctp_packet_transmit(pkt, GFP_ATOMIC);
626 626
627 /* Stop sending DATA as there is no more room 627 /* Stop sending DATA as there is no more room
628 * at the receiver. 628 * at the receiver.
@@ -632,7 +632,7 @@ redo:
632 632
633 case SCTP_XMIT_DELAY: 633 case SCTP_XMIT_DELAY:
634 /* Send this packet. */ 634 /* Send this packet. */
635 error = sctp_packet_transmit(pkt); 635 error = sctp_packet_transmit(pkt, GFP_ATOMIC);
636 636
637 /* Stop sending DATA because of nagle delay. */ 637 /* Stop sending DATA because of nagle delay. */
638 done = 1; 638 done = 1;
@@ -685,12 +685,12 @@ redo:
685} 685}
686 686
687/* Cork the outqueue so queued chunks are really queued. */ 687/* Cork the outqueue so queued chunks are really queued. */
688int sctp_outq_uncork(struct sctp_outq *q) 688int sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp)
689{ 689{
690 if (q->cork) 690 if (q->cork)
691 q->cork = 0; 691 q->cork = 0;
692 692
693 return sctp_outq_flush(q, 0); 693 return sctp_outq_flush(q, 0, gfp);
694} 694}
695 695
696 696
@@ -703,7 +703,7 @@ int sctp_outq_uncork(struct sctp_outq *q)
703 * locking concerns must be made. Today we use the sock lock to protect 703 * locking concerns must be made. Today we use the sock lock to protect
704 * this function. 704 * this function.
705 */ 705 */
706static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) 706static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
707{ 707{
708 struct sctp_packet *packet; 708 struct sctp_packet *packet;
709 struct sctp_packet singleton; 709 struct sctp_packet singleton;
@@ -825,7 +825,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
825 sctp_packet_init(&singleton, transport, sport, dport); 825 sctp_packet_init(&singleton, transport, sport, dport);
826 sctp_packet_config(&singleton, vtag, 0); 826 sctp_packet_config(&singleton, vtag, 0);
827 sctp_packet_append_chunk(&singleton, chunk); 827 sctp_packet_append_chunk(&singleton, chunk);
828 error = sctp_packet_transmit(&singleton); 828 error = sctp_packet_transmit(&singleton, gfp);
829 if (error < 0) 829 if (error < 0)
830 return error; 830 return error;
831 break; 831 break;
@@ -856,7 +856,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
856 case SCTP_CID_ASCONF: 856 case SCTP_CID_ASCONF:
857 case SCTP_CID_FWD_TSN: 857 case SCTP_CID_FWD_TSN:
858 status = sctp_packet_transmit_chunk(packet, chunk, 858 status = sctp_packet_transmit_chunk(packet, chunk,
859 one_packet); 859 one_packet, gfp);
860 if (status != SCTP_XMIT_OK) { 860 if (status != SCTP_XMIT_OK) {
861 /* put the chunk back */ 861 /* put the chunk back */
862 list_add(&chunk->list, &q->control_chunk_list); 862 list_add(&chunk->list, &q->control_chunk_list);
@@ -866,8 +866,10 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
866 * sender MUST assure that at least one T3-rtx 866 * sender MUST assure that at least one T3-rtx
867 * timer is running. 867 * timer is running.
868 */ 868 */
869 if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) 869 if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) {
870 sctp_transport_reset_timers(transport); 870 sctp_transport_reset_t3_rtx(transport);
871 transport->last_time_sent = jiffies;
872 }
871 } 873 }
872 break; 874 break;
873 875
@@ -924,8 +926,10 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
924 error = sctp_outq_flush_rtx(q, packet, 926 error = sctp_outq_flush_rtx(q, packet,
925 rtx_timeout, &start_timer); 927 rtx_timeout, &start_timer);
926 928
927 if (start_timer) 929 if (start_timer) {
928 sctp_transport_reset_timers(transport); 930 sctp_transport_reset_t3_rtx(transport);
931 transport->last_time_sent = jiffies;
932 }
929 933
930 /* This can happen on COOKIE-ECHO resend. Only 934 /* This can happen on COOKIE-ECHO resend. Only
931 * one chunk can get bundled with a COOKIE-ECHO. 935 * one chunk can get bundled with a COOKIE-ECHO.
@@ -978,8 +982,12 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
978 (new_transport->state == SCTP_UNCONFIRMED) || 982 (new_transport->state == SCTP_UNCONFIRMED) ||
979 (new_transport->state == SCTP_PF))) 983 (new_transport->state == SCTP_PF)))
980 new_transport = asoc->peer.active_path; 984 new_transport = asoc->peer.active_path;
981 if (new_transport->state == SCTP_UNCONFIRMED) 985 if (new_transport->state == SCTP_UNCONFIRMED) {
986 WARN_ONCE(1, "Atempt to send packet on unconfirmed path.");
987 sctp_chunk_fail(chunk, 0);
988 sctp_chunk_free(chunk);
982 continue; 989 continue;
990 }
983 991
984 /* Change packets if necessary. */ 992 /* Change packets if necessary. */
985 if (new_transport != transport) { 993 if (new_transport != transport) {
@@ -1011,7 +1019,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
1011 atomic_read(&chunk->skb->users) : -1); 1019 atomic_read(&chunk->skb->users) : -1);
1012 1020
1013 /* Add the chunk to the packet. */ 1021 /* Add the chunk to the packet. */
1014 status = sctp_packet_transmit_chunk(packet, chunk, 0); 1022 status = sctp_packet_transmit_chunk(packet, chunk, 0, gfp);
1015 1023
1016 switch (status) { 1024 switch (status) {
1017 case SCTP_XMIT_PMTU_FULL: 1025 case SCTP_XMIT_PMTU_FULL:
@@ -1058,7 +1066,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
1058 list_add_tail(&chunk->transmitted_list, 1066 list_add_tail(&chunk->transmitted_list,
1059 &transport->transmitted); 1067 &transport->transmitted);
1060 1068
1061 sctp_transport_reset_timers(transport); 1069 sctp_transport_reset_t3_rtx(transport);
1070 transport->last_time_sent = jiffies;
1062 1071
1063 /* Only let one DATA chunk get bundled with a 1072 /* Only let one DATA chunk get bundled with a
1064 * COOKIE-ECHO chunk. 1073 * COOKIE-ECHO chunk.
@@ -1088,7 +1097,7 @@ sctp_flush_out:
1088 send_ready); 1097 send_ready);
1089 packet = &t->packet; 1098 packet = &t->packet;
1090 if (!sctp_packet_empty(packet)) 1099 if (!sctp_packet_empty(packet))
1091 error = sctp_packet_transmit(packet); 1100 error = sctp_packet_transmit(packet, gfp);
1092 1101
1093 /* Clear the burst limited state, if any */ 1102 /* Clear the burst limited state, if any */
1094 sctp_transport_burst_reset(t); 1103 sctp_transport_burst_reset(t);
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index 5e68b94ee640..6cc2152e0740 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -65,7 +65,7 @@ static struct {
65 struct kfifo fifo; 65 struct kfifo fifo;
66 spinlock_t lock; 66 spinlock_t lock;
67 wait_queue_head_t wait; 67 wait_queue_head_t wait;
68 struct timespec tstart; 68 struct timespec64 tstart;
69} sctpw; 69} sctpw;
70 70
71static __printf(1, 2) void printl(const char *fmt, ...) 71static __printf(1, 2) void printl(const char *fmt, ...)
@@ -85,7 +85,7 @@ static __printf(1, 2) void printl(const char *fmt, ...)
85static int sctpprobe_open(struct inode *inode, struct file *file) 85static int sctpprobe_open(struct inode *inode, struct file *file)
86{ 86{
87 kfifo_reset(&sctpw.fifo); 87 kfifo_reset(&sctpw.fifo);
88 getnstimeofday(&sctpw.tstart); 88 ktime_get_ts64(&sctpw.tstart);
89 89
90 return 0; 90 return 0;
91} 91}
@@ -138,7 +138,7 @@ static sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
138 struct sk_buff *skb = chunk->skb; 138 struct sk_buff *skb = chunk->skb;
139 struct sctp_transport *sp; 139 struct sctp_transport *sp;
140 static __u32 lcwnd = 0; 140 static __u32 lcwnd = 0;
141 struct timespec now; 141 struct timespec64 now;
142 142
143 sp = asoc->peer.primary_path; 143 sp = asoc->peer.primary_path;
144 144
@@ -149,8 +149,8 @@ static sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
149 (full || sp->cwnd != lcwnd)) { 149 (full || sp->cwnd != lcwnd)) {
150 lcwnd = sp->cwnd; 150 lcwnd = sp->cwnd;
151 151
152 getnstimeofday(&now); 152 ktime_get_ts64(&now);
153 now = timespec_sub(now, sctpw.tstart); 153 now = timespec64_sub(now, sctpw.tstart);
154 154
155 printl("%lu.%06lu ", (unsigned long) now.tv_sec, 155 printl("%lu.%06lu ", (unsigned long) now.tv_sec,
156 (unsigned long) now.tv_nsec / NSEC_PER_USEC); 156 (unsigned long) now.tv_nsec / NSEC_PER_USEC);
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 963dffcc2618..5cfac8d5d3b3 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -161,7 +161,6 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa
161 struct sctp_af *af; 161 struct sctp_af *af;
162 162
163 primary = &assoc->peer.primary_addr; 163 primary = &assoc->peer.primary_addr;
164 rcu_read_lock();
165 list_for_each_entry_rcu(transport, &assoc->peer.transport_addr_list, 164 list_for_each_entry_rcu(transport, &assoc->peer.transport_addr_list,
166 transports) { 165 transports) {
167 addr = &transport->ipaddr; 166 addr = &transport->ipaddr;
@@ -172,7 +171,6 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa
172 } 171 }
173 af->seq_dump_addr(seq, addr); 172 af->seq_dump_addr(seq, addr);
174 } 173 }
175 rcu_read_unlock();
176} 174}
177 175
178static void *sctp_eps_seq_start(struct seq_file *seq, loff_t *pos) 176static void *sctp_eps_seq_start(struct seq_file *seq, loff_t *pos)
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1099e99a53c4..d3d50daa248b 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -216,6 +216,7 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
216 (copy_flags & SCTP_ADDR6_ALLOWED) && 216 (copy_flags & SCTP_ADDR6_ALLOWED) &&
217 (copy_flags & SCTP_ADDR6_PEERSUPP)))) { 217 (copy_flags & SCTP_ADDR6_PEERSUPP)))) {
218 error = sctp_add_bind_addr(bp, &addr->a, 218 error = sctp_add_bind_addr(bp, &addr->a,
219 sizeof(addr->a),
219 SCTP_ADDR_SRC, GFP_ATOMIC); 220 SCTP_ADDR_SRC, GFP_ATOMIC);
220 if (error) 221 if (error)
221 goto end_copy; 222 goto end_copy;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 5d6a03fad378..56f364d8f932 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -45,6 +45,7 @@
45 45
46#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 46#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
47 47
48#include <crypto/hash.h>
48#include <linux/types.h> 49#include <linux/types.h>
49#include <linux/kernel.h> 50#include <linux/kernel.h>
50#include <linux/ip.h> 51#include <linux/ip.h>
@@ -52,7 +53,6 @@
52#include <linux/net.h> 53#include <linux/net.h>
53#include <linux/inet.h> 54#include <linux/inet.h>
54#include <linux/scatterlist.h> 55#include <linux/scatterlist.h>
55#include <linux/crypto.h>
56#include <linux/slab.h> 56#include <linux/slab.h>
57#include <net/sock.h> 57#include <net/sock.h>
58 58
@@ -62,11 +62,13 @@
62#include <net/sctp/sm.h> 62#include <net/sctp/sm.h>
63 63
64static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc, 64static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc,
65 __u8 type, __u8 flags, int paylen); 65 __u8 type, __u8 flags, int paylen,
66 gfp_t gfp);
66static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc, 67static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc,
67 __u8 flags, int paylen); 68 __u8 flags, int paylen, gfp_t gfp);
68static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc, 69static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
69 __u8 type, __u8 flags, int paylen); 70 __u8 type, __u8 flags, int paylen,
71 gfp_t gfp);
70static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, 72static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
71 const struct sctp_association *asoc, 73 const struct sctp_association *asoc,
72 const struct sctp_chunk *init_chunk, 74 const struct sctp_chunk *init_chunk,
@@ -318,7 +320,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
318 * PLEASE DO NOT FIXME [This version does not support Host Name.] 320 * PLEASE DO NOT FIXME [This version does not support Host Name.]
319 */ 321 */
320 322
321 retval = sctp_make_control(asoc, SCTP_CID_INIT, 0, chunksize); 323 retval = sctp_make_control(asoc, SCTP_CID_INIT, 0, chunksize, gfp);
322 if (!retval) 324 if (!retval)
323 goto nodata; 325 goto nodata;
324 326
@@ -465,7 +467,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
465 num_ext); 467 num_ext);
466 468
467 /* Now allocate and fill out the chunk. */ 469 /* Now allocate and fill out the chunk. */
468 retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize); 470 retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp);
469 if (!retval) 471 if (!retval)
470 goto nomem_chunk; 472 goto nomem_chunk;
471 473
@@ -570,7 +572,8 @@ struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *asoc,
570 cookie_len = asoc->peer.cookie_len; 572 cookie_len = asoc->peer.cookie_len;
571 573
572 /* Build a cookie echo chunk. */ 574 /* Build a cookie echo chunk. */
573 retval = sctp_make_control(asoc, SCTP_CID_COOKIE_ECHO, 0, cookie_len); 575 retval = sctp_make_control(asoc, SCTP_CID_COOKIE_ECHO, 0,
576 cookie_len, GFP_ATOMIC);
574 if (!retval) 577 if (!retval)
575 goto nodata; 578 goto nodata;
576 retval->subh.cookie_hdr = 579 retval->subh.cookie_hdr =
@@ -615,7 +618,7 @@ struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc,
615{ 618{
616 struct sctp_chunk *retval; 619 struct sctp_chunk *retval;
617 620
618 retval = sctp_make_control(asoc, SCTP_CID_COOKIE_ACK, 0, 0); 621 retval = sctp_make_control(asoc, SCTP_CID_COOKIE_ACK, 0, 0, GFP_ATOMIC);
619 622
620 /* RFC 2960 6.4 Multi-homed SCTP Endpoints 623 /* RFC 2960 6.4 Multi-homed SCTP Endpoints
621 * 624 *
@@ -664,7 +667,7 @@ struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc,
664 667
665 cwr.lowest_tsn = htonl(lowest_tsn); 668 cwr.lowest_tsn = htonl(lowest_tsn);
666 retval = sctp_make_control(asoc, SCTP_CID_ECN_CWR, 0, 669 retval = sctp_make_control(asoc, SCTP_CID_ECN_CWR, 0,
667 sizeof(sctp_cwrhdr_t)); 670 sizeof(sctp_cwrhdr_t), GFP_ATOMIC);
668 671
669 if (!retval) 672 if (!retval)
670 goto nodata; 673 goto nodata;
@@ -698,7 +701,7 @@ struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc,
698 701
699 ecne.lowest_tsn = htonl(lowest_tsn); 702 ecne.lowest_tsn = htonl(lowest_tsn);
700 retval = sctp_make_control(asoc, SCTP_CID_ECN_ECNE, 0, 703 retval = sctp_make_control(asoc, SCTP_CID_ECN_ECNE, 0,
701 sizeof(sctp_ecnehdr_t)); 704 sizeof(sctp_ecnehdr_t), GFP_ATOMIC);
702 if (!retval) 705 if (!retval)
703 goto nodata; 706 goto nodata;
704 retval->subh.ecne_hdr = 707 retval->subh.ecne_hdr =
@@ -713,7 +716,8 @@ nodata:
713 */ 716 */
714struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc, 717struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
715 const struct sctp_sndrcvinfo *sinfo, 718 const struct sctp_sndrcvinfo *sinfo,
716 int data_len, __u8 flags, __u16 ssn) 719 int data_len, __u8 flags, __u16 ssn,
720 gfp_t gfp)
717{ 721{
718 struct sctp_chunk *retval; 722 struct sctp_chunk *retval;
719 struct sctp_datahdr dp; 723 struct sctp_datahdr dp;
@@ -734,7 +738,7 @@ struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
734 dp.ssn = htons(ssn); 738 dp.ssn = htons(ssn);
735 739
736 chunk_len = sizeof(dp) + data_len; 740 chunk_len = sizeof(dp) + data_len;
737 retval = sctp_make_data(asoc, flags, chunk_len); 741 retval = sctp_make_data(asoc, flags, chunk_len, gfp);
738 if (!retval) 742 if (!retval)
739 goto nodata; 743 goto nodata;
740 744
@@ -781,7 +785,7 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
781 + sizeof(__u32) * num_dup_tsns; 785 + sizeof(__u32) * num_dup_tsns;
782 786
783 /* Create the chunk. */ 787 /* Create the chunk. */
784 retval = sctp_make_control(asoc, SCTP_CID_SACK, 0, len); 788 retval = sctp_make_control(asoc, SCTP_CID_SACK, 0, len, GFP_ATOMIC);
785 if (!retval) 789 if (!retval)
786 goto nodata; 790 goto nodata;
787 791
@@ -861,7 +865,7 @@ struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc,
861 shut.cum_tsn_ack = htonl(ctsn); 865 shut.cum_tsn_ack = htonl(ctsn);
862 866
863 retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0, 867 retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0,
864 sizeof(sctp_shutdownhdr_t)); 868 sizeof(sctp_shutdownhdr_t), GFP_ATOMIC);
865 if (!retval) 869 if (!retval)
866 goto nodata; 870 goto nodata;
867 871
@@ -879,7 +883,8 @@ struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
879{ 883{
880 struct sctp_chunk *retval; 884 struct sctp_chunk *retval;
881 885
882 retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN_ACK, 0, 0); 886 retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN_ACK, 0, 0,
887 GFP_ATOMIC);
883 888
884 /* RFC 2960 6.4 Multi-homed SCTP Endpoints 889 /* RFC 2960 6.4 Multi-homed SCTP Endpoints
885 * 890 *
@@ -908,7 +913,8 @@ struct sctp_chunk *sctp_make_shutdown_complete(
908 */ 913 */
909 flags |= asoc ? 0 : SCTP_CHUNK_FLAG_T; 914 flags |= asoc ? 0 : SCTP_CHUNK_FLAG_T;
910 915
911 retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN_COMPLETE, flags, 0); 916 retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN_COMPLETE, flags,
917 0, GFP_ATOMIC);
912 918
913 /* RFC 2960 6.4 Multi-homed SCTP Endpoints 919 /* RFC 2960 6.4 Multi-homed SCTP Endpoints
914 * 920 *
@@ -947,7 +953,8 @@ struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc,
947 flags = SCTP_CHUNK_FLAG_T; 953 flags = SCTP_CHUNK_FLAG_T;
948 } 954 }
949 955
950 retval = sctp_make_control(asoc, SCTP_CID_ABORT, flags, hint); 956 retval = sctp_make_control(asoc, SCTP_CID_ABORT, flags, hint,
957 GFP_ATOMIC);
951 958
952 /* RFC 2960 6.4 Multi-homed SCTP Endpoints 959 /* RFC 2960 6.4 Multi-homed SCTP Endpoints
953 * 960 *
@@ -1139,7 +1146,8 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
1139 struct sctp_chunk *retval; 1146 struct sctp_chunk *retval;
1140 sctp_sender_hb_info_t hbinfo; 1147 sctp_sender_hb_info_t hbinfo;
1141 1148
1142 retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT, 0, sizeof(hbinfo)); 1149 retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT, 0,
1150 sizeof(hbinfo), GFP_ATOMIC);
1143 1151
1144 if (!retval) 1152 if (!retval)
1145 goto nodata; 1153 goto nodata;
@@ -1167,7 +1175,8 @@ struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc,
1167{ 1175{
1168 struct sctp_chunk *retval; 1176 struct sctp_chunk *retval;
1169 1177
1170 retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT_ACK, 0, paylen); 1178 retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT_ACK, 0, paylen,
1179 GFP_ATOMIC);
1171 if (!retval) 1180 if (!retval)
1172 goto nodata; 1181 goto nodata;
1173 1182
@@ -1200,7 +1209,7 @@ static struct sctp_chunk *sctp_make_op_error_space(
1200 struct sctp_chunk *retval; 1209 struct sctp_chunk *retval;
1201 1210
1202 retval = sctp_make_control(asoc, SCTP_CID_ERROR, 0, 1211 retval = sctp_make_control(asoc, SCTP_CID_ERROR, 0,
1203 sizeof(sctp_errhdr_t) + size); 1212 sizeof(sctp_errhdr_t) + size, GFP_ATOMIC);
1204 if (!retval) 1213 if (!retval)
1205 goto nodata; 1214 goto nodata;
1206 1215
@@ -1271,7 +1280,8 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
1271 return NULL; 1280 return NULL;
1272 1281
1273 retval = sctp_make_control(asoc, SCTP_CID_AUTH, 0, 1282 retval = sctp_make_control(asoc, SCTP_CID_AUTH, 0,
1274 hmac_desc->hmac_len + sizeof(sctp_authhdr_t)); 1283 hmac_desc->hmac_len + sizeof(sctp_authhdr_t),
1284 GFP_ATOMIC);
1275 if (!retval) 1285 if (!retval)
1276 return NULL; 1286 return NULL;
1277 1287
@@ -1309,11 +1319,11 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
1309 */ 1319 */
1310struct sctp_chunk *sctp_chunkify(struct sk_buff *skb, 1320struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
1311 const struct sctp_association *asoc, 1321 const struct sctp_association *asoc,
1312 struct sock *sk) 1322 struct sock *sk, gfp_t gfp)
1313{ 1323{
1314 struct sctp_chunk *retval; 1324 struct sctp_chunk *retval;
1315 1325
1316 retval = kmem_cache_zalloc(sctp_chunk_cachep, GFP_ATOMIC); 1326 retval = kmem_cache_zalloc(sctp_chunk_cachep, gfp);
1317 1327
1318 if (!retval) 1328 if (!retval)
1319 goto nodata; 1329 goto nodata;
@@ -1361,7 +1371,8 @@ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk)
1361 * arguments, reserving enough space for a 'paylen' byte payload. 1371 * arguments, reserving enough space for a 'paylen' byte payload.
1362 */ 1372 */
1363static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc, 1373static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
1364 __u8 type, __u8 flags, int paylen) 1374 __u8 type, __u8 flags, int paylen,
1375 gfp_t gfp)
1365{ 1376{
1366 struct sctp_chunk *retval; 1377 struct sctp_chunk *retval;
1367 sctp_chunkhdr_t *chunk_hdr; 1378 sctp_chunkhdr_t *chunk_hdr;
@@ -1369,8 +1380,7 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
1369 struct sock *sk; 1380 struct sock *sk;
1370 1381
1371 /* No need to allocate LL here, as this is only a chunk. */ 1382 /* No need to allocate LL here, as this is only a chunk. */
1372 skb = alloc_skb(WORD_ROUND(sizeof(sctp_chunkhdr_t) + paylen), 1383 skb = alloc_skb(WORD_ROUND(sizeof(sctp_chunkhdr_t) + paylen), gfp);
1373 GFP_ATOMIC);
1374 if (!skb) 1384 if (!skb)
1375 goto nodata; 1385 goto nodata;
1376 1386
@@ -1381,7 +1391,7 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
1381 chunk_hdr->length = htons(sizeof(sctp_chunkhdr_t)); 1391 chunk_hdr->length = htons(sizeof(sctp_chunkhdr_t));
1382 1392
1383 sk = asoc ? asoc->base.sk : NULL; 1393 sk = asoc ? asoc->base.sk : NULL;
1384 retval = sctp_chunkify(skb, asoc, sk); 1394 retval = sctp_chunkify(skb, asoc, sk, gfp);
1385 if (!retval) { 1395 if (!retval) {
1386 kfree_skb(skb); 1396 kfree_skb(skb);
1387 goto nodata; 1397 goto nodata;
@@ -1400,16 +1410,18 @@ nodata:
1400} 1410}
1401 1411
1402static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc, 1412static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc,
1403 __u8 flags, int paylen) 1413 __u8 flags, int paylen, gfp_t gfp)
1404{ 1414{
1405 return _sctp_make_chunk(asoc, SCTP_CID_DATA, flags, paylen); 1415 return _sctp_make_chunk(asoc, SCTP_CID_DATA, flags, paylen, gfp);
1406} 1416}
1407 1417
1408static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc, 1418static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc,
1409 __u8 type, __u8 flags, int paylen) 1419 __u8 type, __u8 flags, int paylen,
1420 gfp_t gfp)
1410{ 1421{
1411 struct sctp_chunk *chunk = _sctp_make_chunk(asoc, type, flags, paylen); 1422 struct sctp_chunk *chunk;
1412 1423
1424 chunk = _sctp_make_chunk(asoc, type, flags, paylen, gfp);
1413 if (chunk) 1425 if (chunk)
1414 sctp_control_set_owner_w(chunk); 1426 sctp_control_set_owner_w(chunk);
1415 1427
@@ -1606,7 +1618,6 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
1606{ 1618{
1607 sctp_cookie_param_t *retval; 1619 sctp_cookie_param_t *retval;
1608 struct sctp_signed_cookie *cookie; 1620 struct sctp_signed_cookie *cookie;
1609 struct scatterlist sg;
1610 int headersize, bodysize; 1621 int headersize, bodysize;
1611 1622
1612 /* Header size is static data prior to the actual cookie, including 1623 /* Header size is static data prior to the actual cookie, including
@@ -1663,16 +1674,19 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
1663 ntohs(init_chunk->chunk_hdr->length), raw_addrs, addrs_len); 1674 ntohs(init_chunk->chunk_hdr->length), raw_addrs, addrs_len);
1664 1675
1665 if (sctp_sk(ep->base.sk)->hmac) { 1676 if (sctp_sk(ep->base.sk)->hmac) {
1666 struct hash_desc desc; 1677 SHASH_DESC_ON_STACK(desc, sctp_sk(ep->base.sk)->hmac);
1678 int err;
1667 1679
1668 /* Sign the message. */ 1680 /* Sign the message. */
1669 sg_init_one(&sg, &cookie->c, bodysize); 1681 desc->tfm = sctp_sk(ep->base.sk)->hmac;
1670 desc.tfm = sctp_sk(ep->base.sk)->hmac; 1682 desc->flags = 0;
1671 desc.flags = 0; 1683
1672 1684 err = crypto_shash_setkey(desc->tfm, ep->secret_key,
1673 if (crypto_hash_setkey(desc.tfm, ep->secret_key, 1685 sizeof(ep->secret_key)) ?:
1674 sizeof(ep->secret_key)) || 1686 crypto_shash_digest(desc, (u8 *)&cookie->c, bodysize,
1675 crypto_hash_digest(&desc, &sg, bodysize, cookie->signature)) 1687 cookie->signature);
1688 shash_desc_zero(desc);
1689 if (err)
1676 goto free_cookie; 1690 goto free_cookie;
1677 } 1691 }
1678 1692
@@ -1697,12 +1711,10 @@ struct sctp_association *sctp_unpack_cookie(
1697 struct sctp_cookie *bear_cookie; 1711 struct sctp_cookie *bear_cookie;
1698 int headersize, bodysize, fixed_size; 1712 int headersize, bodysize, fixed_size;
1699 __u8 *digest = ep->digest; 1713 __u8 *digest = ep->digest;
1700 struct scatterlist sg;
1701 unsigned int len; 1714 unsigned int len;
1702 sctp_scope_t scope; 1715 sctp_scope_t scope;
1703 struct sk_buff *skb = chunk->skb; 1716 struct sk_buff *skb = chunk->skb;
1704 ktime_t kt; 1717 ktime_t kt;
1705 struct hash_desc desc;
1706 1718
1707 /* Header size is static data prior to the actual cookie, including 1719 /* Header size is static data prior to the actual cookie, including
1708 * any padding. 1720 * any padding.
@@ -1733,16 +1745,23 @@ struct sctp_association *sctp_unpack_cookie(
1733 goto no_hmac; 1745 goto no_hmac;
1734 1746
1735 /* Check the signature. */ 1747 /* Check the signature. */
1736 sg_init_one(&sg, bear_cookie, bodysize); 1748 {
1737 desc.tfm = sctp_sk(ep->base.sk)->hmac; 1749 SHASH_DESC_ON_STACK(desc, sctp_sk(ep->base.sk)->hmac);
1738 desc.flags = 0; 1750 int err;
1739 1751
1740 memset(digest, 0x00, SCTP_SIGNATURE_SIZE); 1752 desc->tfm = sctp_sk(ep->base.sk)->hmac;
1741 if (crypto_hash_setkey(desc.tfm, ep->secret_key, 1753 desc->flags = 0;
1742 sizeof(ep->secret_key)) || 1754
1743 crypto_hash_digest(&desc, &sg, bodysize, digest)) { 1755 err = crypto_shash_setkey(desc->tfm, ep->secret_key,
1744 *error = -SCTP_IERROR_NOMEM; 1756 sizeof(ep->secret_key)) ?:
1745 goto fail; 1757 crypto_shash_digest(desc, (u8 *)bear_cookie, bodysize,
1758 digest);
1759 shash_desc_zero(desc);
1760
1761 if (err) {
1762 *error = -SCTP_IERROR_NOMEM;
1763 goto fail;
1764 }
1746 } 1765 }
1747 1766
1748 if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) { 1767 if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) {
@@ -1830,7 +1849,8 @@ no_hmac:
1830 /* Also, add the destination address. */ 1849 /* Also, add the destination address. */
1831 if (list_empty(&retval->base.bind_addr.address_list)) { 1850 if (list_empty(&retval->base.bind_addr.address_list)) {
1832 sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, 1851 sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest,
1833 SCTP_ADDR_SRC, GFP_ATOMIC); 1852 sizeof(chunk->dest), SCTP_ADDR_SRC,
1853 GFP_ATOMIC);
1834 } 1854 }
1835 1855
1836 retval->next_tsn = retval->c.initial_tsn; 1856 retval->next_tsn = retval->c.initial_tsn;
@@ -2756,7 +2776,8 @@ static struct sctp_chunk *sctp_make_asconf(struct sctp_association *asoc,
2756 length += addrlen; 2776 length += addrlen;
2757 2777
2758 /* Create the chunk. */ 2778 /* Create the chunk. */
2759 retval = sctp_make_control(asoc, SCTP_CID_ASCONF, 0, length); 2779 retval = sctp_make_control(asoc, SCTP_CID_ASCONF, 0, length,
2780 GFP_ATOMIC);
2760 if (!retval) 2781 if (!retval)
2761 return NULL; 2782 return NULL;
2762 2783
@@ -2940,7 +2961,8 @@ static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *as
2940 int length = sizeof(asconf) + vparam_len; 2961 int length = sizeof(asconf) + vparam_len;
2941 2962
2942 /* Create the chunk. */ 2963 /* Create the chunk. */
2943 retval = sctp_make_control(asoc, SCTP_CID_ASCONF_ACK, 0, length); 2964 retval = sctp_make_control(asoc, SCTP_CID_ASCONF_ACK, 0, length,
2965 GFP_ATOMIC);
2944 if (!retval) 2966 if (!retval)
2945 return NULL; 2967 return NULL;
2946 2968
@@ -3058,8 +3080,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
3058 return SCTP_ERROR_RSRC_LOW; 3080 return SCTP_ERROR_RSRC_LOW;
3059 3081
3060 /* Start the heartbeat timer. */ 3082 /* Start the heartbeat timer. */
3061 if (!mod_timer(&peer->hb_timer, sctp_transport_timeout(peer))) 3083 sctp_transport_reset_hb_timer(peer);
3062 sctp_transport_hold(peer);
3063 asoc->new_transport = peer; 3084 asoc->new_transport = peer;
3064 break; 3085 break;
3065 case SCTP_PARAM_DEL_IP: 3086 case SCTP_PARAM_DEL_IP:
@@ -3500,7 +3521,7 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
3500 3521
3501 hint = (nstreams + 1) * sizeof(__u32); 3522 hint = (nstreams + 1) * sizeof(__u32);
3502 3523
3503 retval = sctp_make_control(asoc, SCTP_CID_FWD_TSN, 0, hint); 3524 retval = sctp_make_control(asoc, SCTP_CID_FWD_TSN, 0, hint, GFP_ATOMIC);
3504 3525
3505 if (!retval) 3526 if (!retval)
3506 return NULL; 3527 return NULL;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index b5327bb77458..41b081a64752 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -69,8 +69,6 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
69 sctp_cmd_seq_t *commands, 69 sctp_cmd_seq_t *commands,
70 gfp_t gfp); 70 gfp_t gfp);
71 71
72static void sctp_cmd_hb_timer_update(sctp_cmd_seq_t *cmds,
73 struct sctp_transport *t);
74/******************************************************************** 72/********************************************************************
75 * Helper functions 73 * Helper functions
76 ********************************************************************/ 74 ********************************************************************/
@@ -215,10 +213,14 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force,
215 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, 213 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
216 SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); 214 SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
217 } else { 215 } else {
216 __u32 old_a_rwnd = asoc->a_rwnd;
217
218 asoc->a_rwnd = asoc->rwnd; 218 asoc->a_rwnd = asoc->rwnd;
219 sack = sctp_make_sack(asoc); 219 sack = sctp_make_sack(asoc);
220 if (!sack) 220 if (!sack) {
221 asoc->a_rwnd = old_a_rwnd;
221 goto nomem; 222 goto nomem;
223 }
222 224
223 asoc->peer.sack_needed = 0; 225 asoc->peer.sack_needed = 0;
224 asoc->peer.sack_cnt = 0; 226 asoc->peer.sack_cnt = 0;
@@ -363,6 +365,7 @@ void sctp_generate_heartbeat_event(unsigned long data)
363 struct sctp_association *asoc = transport->asoc; 365 struct sctp_association *asoc = transport->asoc;
364 struct sock *sk = asoc->base.sk; 366 struct sock *sk = asoc->base.sk;
365 struct net *net = sock_net(sk); 367 struct net *net = sock_net(sk);
368 u32 elapsed, timeout;
366 369
367 bh_lock_sock(sk); 370 bh_lock_sock(sk);
368 if (sock_owned_by_user(sk)) { 371 if (sock_owned_by_user(sk)) {
@@ -374,6 +377,16 @@ void sctp_generate_heartbeat_event(unsigned long data)
374 goto out_unlock; 377 goto out_unlock;
375 } 378 }
376 379
380 /* Check if we should still send the heartbeat or reschedule */
381 elapsed = jiffies - transport->last_time_sent;
382 timeout = sctp_transport_timeout(transport);
383 if (elapsed < timeout) {
384 elapsed = timeout - elapsed;
385 if (!mod_timer(&transport->hb_timer, jiffies + elapsed))
386 sctp_transport_hold(transport);
387 goto out_unlock;
388 }
389
377 error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, 390 error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
378 SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_HEARTBEAT), 391 SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_HEARTBEAT),
379 asoc->state, asoc->ep, asoc, 392 asoc->state, asoc->ep, asoc,
@@ -503,7 +516,7 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
503 0); 516 0);
504 517
505 /* Update the hb timer to resend a heartbeat every rto */ 518 /* Update the hb timer to resend a heartbeat every rto */
506 sctp_cmd_hb_timer_update(commands, transport); 519 sctp_transport_reset_hb_timer(transport);
507 } 520 }
508 521
509 if (transport->state != SCTP_INACTIVE && 522 if (transport->state != SCTP_INACTIVE &&
@@ -630,11 +643,8 @@ static void sctp_cmd_hb_timers_start(sctp_cmd_seq_t *cmds,
630 * hold a reference on the transport to make sure none of 643 * hold a reference on the transport to make sure none of
631 * the needed data structures go away. 644 * the needed data structures go away.
632 */ 645 */
633 list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { 646 list_for_each_entry(t, &asoc->peer.transport_addr_list, transports)
634 647 sctp_transport_reset_hb_timer(t);
635 if (!mod_timer(&t->hb_timer, sctp_transport_timeout(t)))
636 sctp_transport_hold(t);
637 }
638} 648}
639 649
640static void sctp_cmd_hb_timers_stop(sctp_cmd_seq_t *cmds, 650static void sctp_cmd_hb_timers_stop(sctp_cmd_seq_t *cmds,
@@ -665,15 +675,6 @@ static void sctp_cmd_t3_rtx_timers_stop(sctp_cmd_seq_t *cmds,
665} 675}
666 676
667 677
668/* Helper function to update the heartbeat timer. */
669static void sctp_cmd_hb_timer_update(sctp_cmd_seq_t *cmds,
670 struct sctp_transport *t)
671{
672 /* Update the heartbeat timer. */
673 if (!mod_timer(&t->hb_timer, sctp_transport_timeout(t)))
674 sctp_transport_hold(t);
675}
676
677/* Helper function to handle the reception of an HEARTBEAT ACK. */ 678/* Helper function to handle the reception of an HEARTBEAT ACK. */
678static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, 679static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
679 struct sctp_association *asoc, 680 struct sctp_association *asoc,
@@ -738,8 +739,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
738 sctp_transport_update_rto(t, (jiffies - hbinfo->sent_at)); 739 sctp_transport_update_rto(t, (jiffies - hbinfo->sent_at));
739 740
740 /* Update the heartbeat timer. */ 741 /* Update the heartbeat timer. */
741 if (!mod_timer(&t->hb_timer, sctp_transport_timeout(t))) 742 sctp_transport_reset_hb_timer(t);
742 sctp_transport_hold(t);
743 743
744 if (was_unconfirmed && asoc->peer.transport_count == 1) 744 if (was_unconfirmed && asoc->peer.transport_count == 1)
745 sctp_transport_immediate_rtx(t); 745 sctp_transport_immediate_rtx(t);
@@ -1019,13 +1019,13 @@ static void sctp_cmd_t1_timer_update(struct sctp_association *asoc,
1019 * encouraged for small fragments. 1019 * encouraged for small fragments.
1020 */ 1020 */
1021static int sctp_cmd_send_msg(struct sctp_association *asoc, 1021static int sctp_cmd_send_msg(struct sctp_association *asoc,
1022 struct sctp_datamsg *msg) 1022 struct sctp_datamsg *msg, gfp_t gfp)
1023{ 1023{
1024 struct sctp_chunk *chunk; 1024 struct sctp_chunk *chunk;
1025 int error = 0; 1025 int error = 0;
1026 1026
1027 list_for_each_entry(chunk, &msg->chunks, frag_list) { 1027 list_for_each_entry(chunk, &msg->chunks, frag_list) {
1028 error = sctp_outq_tail(&asoc->outqueue, chunk); 1028 error = sctp_outq_tail(&asoc->outqueue, chunk, gfp);
1029 if (error) 1029 if (error)
1030 break; 1030 break;
1031 } 1031 }
@@ -1249,7 +1249,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1249 case SCTP_CMD_NEW_ASOC: 1249 case SCTP_CMD_NEW_ASOC:
1250 /* Register a new association. */ 1250 /* Register a new association. */
1251 if (local_cork) { 1251 if (local_cork) {
1252 sctp_outq_uncork(&asoc->outqueue); 1252 sctp_outq_uncork(&asoc->outqueue, gfp);
1253 local_cork = 0; 1253 local_cork = 0;
1254 } 1254 }
1255 1255
@@ -1269,7 +1269,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1269 1269
1270 case SCTP_CMD_DELETE_TCB: 1270 case SCTP_CMD_DELETE_TCB:
1271 if (local_cork) { 1271 if (local_cork) {
1272 sctp_outq_uncork(&asoc->outqueue); 1272 sctp_outq_uncork(&asoc->outqueue, gfp);
1273 local_cork = 0; 1273 local_cork = 0;
1274 } 1274 }
1275 /* Delete the current association. */ 1275 /* Delete the current association. */
@@ -1423,13 +1423,14 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1423 local_cork = 1; 1423 local_cork = 1;
1424 } 1424 }
1425 /* Send a chunk to our peer. */ 1425 /* Send a chunk to our peer. */
1426 error = sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk); 1426 error = sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk,
1427 gfp);
1427 break; 1428 break;
1428 1429
1429 case SCTP_CMD_SEND_PKT: 1430 case SCTP_CMD_SEND_PKT:
1430 /* Send a full packet to our peer. */ 1431 /* Send a full packet to our peer. */
1431 packet = cmd->obj.packet; 1432 packet = cmd->obj.packet;
1432 sctp_packet_transmit(packet); 1433 sctp_packet_transmit(packet, gfp);
1433 sctp_ootb_pkt_free(packet); 1434 sctp_ootb_pkt_free(packet);
1434 break; 1435 break;
1435 1436
@@ -1609,7 +1610,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1609 1610
1610 case SCTP_CMD_HB_TIMER_UPDATE: 1611 case SCTP_CMD_HB_TIMER_UPDATE:
1611 t = cmd->obj.transport; 1612 t = cmd->obj.transport;
1612 sctp_cmd_hb_timer_update(commands, t); 1613 sctp_transport_reset_hb_timer(t);
1613 break; 1614 break;
1614 1615
1615 case SCTP_CMD_HB_TIMERS_STOP: 1616 case SCTP_CMD_HB_TIMERS_STOP:
@@ -1639,7 +1640,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1639 */ 1640 */
1640 chunk->pdiscard = 1; 1641 chunk->pdiscard = 1;
1641 if (asoc) { 1642 if (asoc) {
1642 sctp_outq_uncork(&asoc->outqueue); 1643 sctp_outq_uncork(&asoc->outqueue, gfp);
1643 local_cork = 0; 1644 local_cork = 0;
1644 } 1645 }
1645 break; 1646 break;
@@ -1677,7 +1678,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1677 case SCTP_CMD_FORCE_PRIM_RETRAN: 1678 case SCTP_CMD_FORCE_PRIM_RETRAN:
1678 t = asoc->peer.retran_path; 1679 t = asoc->peer.retran_path;
1679 asoc->peer.retran_path = asoc->peer.primary_path; 1680 asoc->peer.retran_path = asoc->peer.primary_path;
1680 error = sctp_outq_uncork(&asoc->outqueue); 1681 error = sctp_outq_uncork(&asoc->outqueue, gfp);
1681 local_cork = 0; 1682 local_cork = 0;
1682 asoc->peer.retran_path = t; 1683 asoc->peer.retran_path = t;
1683 break; 1684 break;
@@ -1704,7 +1705,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1704 sctp_outq_cork(&asoc->outqueue); 1705 sctp_outq_cork(&asoc->outqueue);
1705 local_cork = 1; 1706 local_cork = 1;
1706 } 1707 }
1707 error = sctp_cmd_send_msg(asoc, cmd->obj.msg); 1708 error = sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp);
1708 break; 1709 break;
1709 case SCTP_CMD_SEND_NEXT_ASCONF: 1710 case SCTP_CMD_SEND_NEXT_ASCONF:
1710 sctp_cmd_send_asconf(asoc); 1711 sctp_cmd_send_asconf(asoc);
@@ -1734,9 +1735,9 @@ out:
1734 */ 1735 */
1735 if (asoc && SCTP_EVENT_T_CHUNK == event_type && chunk) { 1736 if (asoc && SCTP_EVENT_T_CHUNK == event_type && chunk) {
1736 if (chunk->end_of_packet || chunk->singleton) 1737 if (chunk->end_of_packet || chunk->singleton)
1737 error = sctp_outq_uncork(&asoc->outqueue); 1738 error = sctp_outq_uncork(&asoc->outqueue, gfp);
1738 } else if (local_cork) 1739 } else if (local_cork)
1739 error = sctp_outq_uncork(&asoc->outqueue); 1740 error = sctp_outq_uncork(&asoc->outqueue, gfp);
1740 return error; 1741 return error;
1741nomem: 1742nomem:
1742 error = -ENOMEM; 1743 error = -ENOMEM;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e878da0949db..878d28eda1a6 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -52,6 +52,7 @@
52 52
53#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 53#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
54 54
55#include <crypto/hash.h>
55#include <linux/types.h> 56#include <linux/types.h>
56#include <linux/kernel.h> 57#include <linux/kernel.h>
57#include <linux/wait.h> 58#include <linux/wait.h>
@@ -61,7 +62,6 @@
61#include <linux/fcntl.h> 62#include <linux/fcntl.h>
62#include <linux/poll.h> 63#include <linux/poll.h>
63#include <linux/init.h> 64#include <linux/init.h>
64#include <linux/crypto.h>
65#include <linux/slab.h> 65#include <linux/slab.h>
66#include <linux/file.h> 66#include <linux/file.h>
67#include <linux/compat.h> 67#include <linux/compat.h>
@@ -386,7 +386,8 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
386 /* Add the address to the bind address list. 386 /* Add the address to the bind address list.
387 * Use GFP_ATOMIC since BHs will be disabled. 387 * Use GFP_ATOMIC since BHs will be disabled.
388 */ 388 */
389 ret = sctp_add_bind_addr(bp, addr, SCTP_ADDR_SRC, GFP_ATOMIC); 389 ret = sctp_add_bind_addr(bp, addr, af->sockaddr_len,
390 SCTP_ADDR_SRC, GFP_ATOMIC);
390 391
391 /* Copy back into socket for getsockname() use. */ 392 /* Copy back into socket for getsockname() use. */
392 if (!ret) { 393 if (!ret) {
@@ -577,6 +578,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
577 af = sctp_get_af_specific(addr->v4.sin_family); 578 af = sctp_get_af_specific(addr->v4.sin_family);
578 memcpy(&saveaddr, addr, af->sockaddr_len); 579 memcpy(&saveaddr, addr, af->sockaddr_len);
579 retval = sctp_add_bind_addr(bp, &saveaddr, 580 retval = sctp_add_bind_addr(bp, &saveaddr,
581 sizeof(saveaddr),
580 SCTP_ADDR_NEW, GFP_ATOMIC); 582 SCTP_ADDR_NEW, GFP_ATOMIC);
581 addr_buf += af->sockaddr_len; 583 addr_buf += af->sockaddr_len;
582 } 584 }
@@ -1389,7 +1391,7 @@ static int sctp_getsockopt_connectx3(struct sock *sk, int len,
1389 int err = 0; 1391 int err = 0;
1390 1392
1391#ifdef CONFIG_COMPAT 1393#ifdef CONFIG_COMPAT
1392 if (is_compat_task()) { 1394 if (in_compat_syscall()) {
1393 struct compat_sctp_getaddrs_old param32; 1395 struct compat_sctp_getaddrs_old param32;
1394 1396
1395 if (len < sizeof(param32)) 1397 if (len < sizeof(param32))
@@ -4160,7 +4162,7 @@ static void sctp_destruct_sock(struct sock *sk)
4160 struct sctp_sock *sp = sctp_sk(sk); 4162 struct sctp_sock *sp = sctp_sk(sk);
4161 4163
4162 /* Free up the HMAC transform. */ 4164 /* Free up the HMAC transform. */
4163 crypto_free_hash(sp->hmac); 4165 crypto_free_shash(sp->hmac);
4164 4166
4165 inet_sock_destruct(sk); 4167 inet_sock_destruct(sk);
4166} 4168}
@@ -6106,9 +6108,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
6106 return retval; 6108 return retval;
6107} 6109}
6108 6110
6109static void sctp_hash(struct sock *sk) 6111static int sctp_hash(struct sock *sk)
6110{ 6112{
6111 /* STUB */ 6113 /* STUB */
6114 return 0;
6112} 6115}
6113 6116
6114static void sctp_unhash(struct sock *sk) 6117static void sctp_unhash(struct sock *sk)
@@ -6304,13 +6307,13 @@ static int sctp_listen_start(struct sock *sk, int backlog)
6304{ 6307{
6305 struct sctp_sock *sp = sctp_sk(sk); 6308 struct sctp_sock *sp = sctp_sk(sk);
6306 struct sctp_endpoint *ep = sp->ep; 6309 struct sctp_endpoint *ep = sp->ep;
6307 struct crypto_hash *tfm = NULL; 6310 struct crypto_shash *tfm = NULL;
6308 char alg[32]; 6311 char alg[32];
6309 6312
6310 /* Allocate HMAC for generating cookie. */ 6313 /* Allocate HMAC for generating cookie. */
6311 if (!sp->hmac && sp->sctp_hmac_alg) { 6314 if (!sp->hmac && sp->sctp_hmac_alg) {
6312 sprintf(alg, "hmac(%s)", sp->sctp_hmac_alg); 6315 sprintf(alg, "hmac(%s)", sp->sctp_hmac_alg);
6313 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC); 6316 tfm = crypto_alloc_shash(alg, 0, 0);
6314 if (IS_ERR(tfm)) { 6317 if (IS_ERR(tfm)) {
6315 net_info_ratelimited("failed to load transform for %s: %ld\n", 6318 net_info_ratelimited("failed to load transform for %s: %ld\n",
6316 sp->sctp_hmac_alg, PTR_ERR(tfm)); 6319 sp->sctp_hmac_alg, PTR_ERR(tfm));
@@ -7253,14 +7256,12 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
7253 /* Hook this new socket in to the bind_hash list. */ 7256 /* Hook this new socket in to the bind_hash list. */
7254 head = &sctp_port_hashtable[sctp_phashfn(sock_net(oldsk), 7257 head = &sctp_port_hashtable[sctp_phashfn(sock_net(oldsk),
7255 inet_sk(oldsk)->inet_num)]; 7258 inet_sk(oldsk)->inet_num)];
7256 local_bh_disable(); 7259 spin_lock_bh(&head->lock);
7257 spin_lock(&head->lock);
7258 pp = sctp_sk(oldsk)->bind_hash; 7260 pp = sctp_sk(oldsk)->bind_hash;
7259 sk_add_bind_node(newsk, &pp->owner); 7261 sk_add_bind_node(newsk, &pp->owner);
7260 sctp_sk(newsk)->bind_hash = pp; 7262 sctp_sk(newsk)->bind_hash = pp;
7261 inet_sk(newsk)->inet_num = inet_sk(oldsk)->inet_num; 7263 inet_sk(newsk)->inet_num = inet_sk(oldsk)->inet_num;
7262 spin_unlock(&head->lock); 7264 spin_unlock_bh(&head->lock);
7263 local_bh_enable();
7264 7265
7265 /* Copy the bind_addr list from the original endpoint to the new 7266 /* Copy the bind_addr list from the original endpoint to the new
7266 * endpoint so that we can handle restarts properly 7267 * endpoint so that we can handle restarts properly
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index a431c14044a4..81b86678be4d 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -72,7 +72,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
72 */ 72 */
73 peer->rto = msecs_to_jiffies(net->sctp.rto_initial); 73 peer->rto = msecs_to_jiffies(net->sctp.rto_initial);
74 74
75 peer->last_time_heard = ktime_get(); 75 peer->last_time_heard = ktime_set(0, 0);
76 peer->last_time_ecne_reduced = jiffies; 76 peer->last_time_ecne_reduced = jiffies;
77 77
78 peer->param_flags = SPP_HB_DISABLE | 78 peer->param_flags = SPP_HB_DISABLE |
@@ -183,7 +183,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport)
183/* Start T3_rtx timer if it is not already running and update the heartbeat 183/* Start T3_rtx timer if it is not already running and update the heartbeat
184 * timer. This routine is called every time a DATA chunk is sent. 184 * timer. This routine is called every time a DATA chunk is sent.
185 */ 185 */
186void sctp_transport_reset_timers(struct sctp_transport *transport) 186void sctp_transport_reset_t3_rtx(struct sctp_transport *transport)
187{ 187{
188 /* RFC 2960 6.3.2 Retransmission Timer Rules 188 /* RFC 2960 6.3.2 Retransmission Timer Rules
189 * 189 *
@@ -197,11 +197,18 @@ void sctp_transport_reset_timers(struct sctp_transport *transport)
197 if (!mod_timer(&transport->T3_rtx_timer, 197 if (!mod_timer(&transport->T3_rtx_timer,
198 jiffies + transport->rto)) 198 jiffies + transport->rto))
199 sctp_transport_hold(transport); 199 sctp_transport_hold(transport);
200}
201
202void sctp_transport_reset_hb_timer(struct sctp_transport *transport)
203{
204 unsigned long expires;
200 205
201 /* When a data chunk is sent, reset the heartbeat interval. */ 206 /* When a data chunk is sent, reset the heartbeat interval. */
202 if (!mod_timer(&transport->hb_timer, 207 expires = jiffies + sctp_transport_timeout(transport);
203 sctp_transport_timeout(transport))) 208 if (time_before(transport->hb_timer.expires, expires) &&
204 sctp_transport_hold(transport); 209 !mod_timer(&transport->hb_timer,
210 expires + prandom_u32_max(transport->rto)))
211 sctp_transport_hold(transport);
205} 212}
206 213
207/* This transport has been assigned to an association. 214/* This transport has been assigned to an association.
@@ -226,7 +233,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
226 } 233 }
227 234
228 if (transport->dst) { 235 if (transport->dst) {
229 transport->pathmtu = dst_mtu(transport->dst); 236 transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst));
230 } else 237 } else
231 transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; 238 transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
232} 239}
@@ -280,7 +287,7 @@ void sctp_transport_route(struct sctp_transport *transport,
280 return; 287 return;
281 } 288 }
282 if (transport->dst) { 289 if (transport->dst) {
283 transport->pathmtu = dst_mtu(transport->dst); 290 transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst));
284 291
285 /* Initialize sk->sk_rcv_saddr, if the transport is the 292 /* Initialize sk->sk_rcv_saddr, if the transport is the
286 * association's active path for getsockname(). 293 * association's active path for getsockname().
@@ -595,13 +602,13 @@ void sctp_transport_burst_reset(struct sctp_transport *t)
595unsigned long sctp_transport_timeout(struct sctp_transport *trans) 602unsigned long sctp_transport_timeout(struct sctp_transport *trans)
596{ 603{
597 /* RTO + timer slack +/- 50% of RTO */ 604 /* RTO + timer slack +/- 50% of RTO */
598 unsigned long timeout = (trans->rto >> 1) + prandom_u32_max(trans->rto); 605 unsigned long timeout = trans->rto >> 1;
599 606
600 if (trans->state != SCTP_UNCONFIRMED && 607 if (trans->state != SCTP_UNCONFIRMED &&
601 trans->state != SCTP_PF) 608 trans->state != SCTP_PF)
602 timeout += trans->hbinterval; 609 timeout += trans->hbinterval;
603 610
604 return timeout + jiffies; 611 return timeout;
605} 612}
606 613
607/* Reset transport variables to their initial values */ 614/* Reset transport variables to their initial values */
diff --git a/net/socket.c b/net/socket.c
index c044d1e8508c..5f77a8e93830 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -533,7 +533,7 @@ static const struct inode_operations sockfs_inode_ops = {
533 * NULL is returned. 533 * NULL is returned.
534 */ 534 */
535 535
536static struct socket *sock_alloc(void) 536struct socket *sock_alloc(void)
537{ 537{
538 struct inode *inode; 538 struct inode *inode;
539 struct socket *sock; 539 struct socket *sock;
@@ -554,6 +554,7 @@ static struct socket *sock_alloc(void)
554 this_cpu_add(sockets_in_use, 1); 554 this_cpu_add(sockets_in_use, 1);
555 return sock; 555 return sock;
556} 556}
557EXPORT_SYMBOL(sock_alloc);
557 558
558/** 559/**
559 * sock_release - close a socket 560 * sock_release - close a socket
@@ -1106,12 +1107,8 @@ int __sock_create(struct net *net, int family, int type, int protocol,
1106 deadlock in module load. 1107 deadlock in module load.
1107 */ 1108 */
1108 if (family == PF_INET && type == SOCK_PACKET) { 1109 if (family == PF_INET && type == SOCK_PACKET) {
1109 static int warned; 1110 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1110 if (!warned) { 1111 current->comm);
1111 warned = 1;
1112 pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1113 current->comm);
1114 }
1115 family = PF_PACKET; 1112 family = PF_PACKET;
1116 } 1113 }
1117 1114
@@ -1874,7 +1871,8 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
1874 1871
1875static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, 1872static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
1876 struct msghdr *msg_sys, unsigned int flags, 1873 struct msghdr *msg_sys, unsigned int flags,
1877 struct used_address *used_address) 1874 struct used_address *used_address,
1875 unsigned int allowed_msghdr_flags)
1878{ 1876{
1879 struct compat_msghdr __user *msg_compat = 1877 struct compat_msghdr __user *msg_compat =
1880 (struct compat_msghdr __user *)msg; 1878 (struct compat_msghdr __user *)msg;
@@ -1900,6 +1898,7 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
1900 1898
1901 if (msg_sys->msg_controllen > INT_MAX) 1899 if (msg_sys->msg_controllen > INT_MAX)
1902 goto out_freeiov; 1900 goto out_freeiov;
1901 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
1903 ctl_len = msg_sys->msg_controllen; 1902 ctl_len = msg_sys->msg_controllen;
1904 if ((MSG_CMSG_COMPAT & flags) && ctl_len) { 1903 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
1905 err = 1904 err =
@@ -1978,7 +1977,7 @@ long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
1978 if (!sock) 1977 if (!sock)
1979 goto out; 1978 goto out;
1980 1979
1981 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL); 1980 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
1982 1981
1983 fput_light(sock->file, fput_needed); 1982 fput_light(sock->file, fput_needed);
1984out: 1983out:
@@ -2005,6 +2004,7 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2005 struct compat_mmsghdr __user *compat_entry; 2004 struct compat_mmsghdr __user *compat_entry;
2006 struct msghdr msg_sys; 2005 struct msghdr msg_sys;
2007 struct used_address used_address; 2006 struct used_address used_address;
2007 unsigned int oflags = flags;
2008 2008
2009 if (vlen > UIO_MAXIOV) 2009 if (vlen > UIO_MAXIOV)
2010 vlen = UIO_MAXIOV; 2010 vlen = UIO_MAXIOV;
@@ -2019,11 +2019,15 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2019 entry = mmsg; 2019 entry = mmsg;
2020 compat_entry = (struct compat_mmsghdr __user *)mmsg; 2020 compat_entry = (struct compat_mmsghdr __user *)mmsg;
2021 err = 0; 2021 err = 0;
2022 flags |= MSG_BATCH;
2022 2023
2023 while (datagrams < vlen) { 2024 while (datagrams < vlen) {
2025 if (datagrams == vlen - 1)
2026 flags = oflags;
2027
2024 if (MSG_CMSG_COMPAT & flags) { 2028 if (MSG_CMSG_COMPAT & flags) {
2025 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry, 2029 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
2026 &msg_sys, flags, &used_address); 2030 &msg_sys, flags, &used_address, MSG_EOR);
2027 if (err < 0) 2031 if (err < 0)
2028 break; 2032 break;
2029 err = __put_user(err, &compat_entry->msg_len); 2033 err = __put_user(err, &compat_entry->msg_len);
@@ -2031,7 +2035,7 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2031 } else { 2035 } else {
2032 err = ___sys_sendmsg(sock, 2036 err = ___sys_sendmsg(sock,
2033 (struct user_msghdr __user *)entry, 2037 (struct user_msghdr __user *)entry,
2034 &msg_sys, flags, &used_address); 2038 &msg_sys, flags, &used_address, MSG_EOR);
2035 if (err < 0) 2039 if (err < 0)
2036 break; 2040 break;
2037 err = put_user(err, &entry->msg_len); 2041 err = put_user(err, &entry->msg_len);
@@ -2240,31 +2244,31 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2240 cond_resched(); 2244 cond_resched();
2241 } 2245 }
2242 2246
2243out_put:
2244 fput_light(sock->file, fput_needed);
2245
2246 if (err == 0) 2247 if (err == 0)
2247 return datagrams; 2248 goto out_put;
2249
2250 if (datagrams == 0) {
2251 datagrams = err;
2252 goto out_put;
2253 }
2248 2254
2249 if (datagrams != 0) { 2255 /*
2256 * We may return less entries than requested (vlen) if the
2257 * sock is non block and there aren't enough datagrams...
2258 */
2259 if (err != -EAGAIN) {
2250 /* 2260 /*
2251 * We may return less entries than requested (vlen) if the 2261 * ... or if recvmsg returns an error after we
2252 * sock is non block and there aren't enough datagrams... 2262 * received some datagrams, where we record the
2263 * error to return on the next call or if the
2264 * app asks about it using getsockopt(SO_ERROR).
2253 */ 2265 */
2254 if (err != -EAGAIN) { 2266 sock->sk->sk_err = -err;
2255 /*
2256 * ... or if recvmsg returns an error after we
2257 * received some datagrams, where we record the
2258 * error to return on the next call or if the
2259 * app asks about it using getsockopt(SO_ERROR).
2260 */
2261 sock->sk->sk_err = -err;
2262 }
2263
2264 return datagrams;
2265 } 2267 }
2268out_put:
2269 fput_light(sock->file, fput_needed);
2266 2270
2267 return err; 2271 return datagrams;
2268} 2272}
2269 2273
2270SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, 2274SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index b512fbd9d79a..ea7ffa12e0f9 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -12,7 +12,8 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
12 svc.o svcsock.o svcauth.o svcauth_unix.o \ 12 svc.o svcsock.o svcauth.o svcauth_unix.o \
13 addr.o rpcb_clnt.o timer.o xdr.o \ 13 addr.o rpcb_clnt.o timer.o xdr.o \
14 sunrpc_syms.o cache.o rpc_pipe.o \ 14 sunrpc_syms.o cache.o rpc_pipe.o \
15 svc_xprt.o 15 svc_xprt.o \
16 xprtmultipath.o
16sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o 17sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o
17sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o 18sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o
18sunrpc-$(CONFIG_PROC_FS) += stats.o 19sunrpc-$(CONFIG_PROC_FS) += stats.o
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index cabf586f47d7..15612ffa8d57 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1181,12 +1181,12 @@ static struct rpc_auth *
1181gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) 1181gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
1182{ 1182{
1183 struct gss_auth *gss_auth; 1183 struct gss_auth *gss_auth;
1184 struct rpc_xprt *xprt = rcu_access_pointer(clnt->cl_xprt); 1184 struct rpc_xprt_switch *xps = rcu_access_pointer(clnt->cl_xpi.xpi_xpswitch);
1185 1185
1186 while (clnt != clnt->cl_parent) { 1186 while (clnt != clnt->cl_parent) {
1187 struct rpc_clnt *parent = clnt->cl_parent; 1187 struct rpc_clnt *parent = clnt->cl_parent;
1188 /* Find the original parent for this transport */ 1188 /* Find the original parent for this transport */
1189 if (rcu_access_pointer(parent->cl_xprt) != xprt) 1189 if (rcu_access_pointer(parent->cl_xpi.xpi_xpswitch) != xps)
1190 break; 1190 break;
1191 clnt = parent; 1191 clnt = parent;
1192 } 1192 }
@@ -1728,8 +1728,8 @@ alloc_enc_pages(struct rpc_rqst *rqstp)
1728 return 0; 1728 return 0;
1729 } 1729 }
1730 1730
1731 first = snd_buf->page_base >> PAGE_CACHE_SHIFT; 1731 first = snd_buf->page_base >> PAGE_SHIFT;
1732 last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_CACHE_SHIFT; 1732 last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_SHIFT;
1733 rqstp->rq_enc_pages_num = last - first + 1 + 1; 1733 rqstp->rq_enc_pages_num = last - first + 1 + 1;
1734 rqstp->rq_enc_pages 1734 rqstp->rq_enc_pages
1735 = kmalloc(rqstp->rq_enc_pages_num * sizeof(struct page *), 1735 = kmalloc(rqstp->rq_enc_pages_num * sizeof(struct page *),
@@ -1775,10 +1775,10 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
1775 status = alloc_enc_pages(rqstp); 1775 status = alloc_enc_pages(rqstp);
1776 if (status) 1776 if (status)
1777 return status; 1777 return status;
1778 first = snd_buf->page_base >> PAGE_CACHE_SHIFT; 1778 first = snd_buf->page_base >> PAGE_SHIFT;
1779 inpages = snd_buf->pages + first; 1779 inpages = snd_buf->pages + first;
1780 snd_buf->pages = rqstp->rq_enc_pages; 1780 snd_buf->pages = rqstp->rq_enc_pages;
1781 snd_buf->page_base -= first << PAGE_CACHE_SHIFT; 1781 snd_buf->page_base -= first << PAGE_SHIFT;
1782 /* 1782 /*
1783 * Give the tail its own page, in case we need extra space in the 1783 * Give the tail its own page, in case we need extra space in the
1784 * head when wrapping: 1784 * head when wrapping:
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index fee3c15a4b52..244245bcbbd2 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -34,11 +34,12 @@
34 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 34 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
35 */ 35 */
36 36
37#include <crypto/hash.h>
38#include <crypto/skcipher.h>
37#include <linux/err.h> 39#include <linux/err.h>
38#include <linux/types.h> 40#include <linux/types.h>
39#include <linux/mm.h> 41#include <linux/mm.h>
40#include <linux/scatterlist.h> 42#include <linux/scatterlist.h>
41#include <linux/crypto.h>
42#include <linux/highmem.h> 43#include <linux/highmem.h>
43#include <linux/pagemap.h> 44#include <linux/pagemap.h>
44#include <linux/random.h> 45#include <linux/random.h>
@@ -51,7 +52,7 @@
51 52
52u32 53u32
53krb5_encrypt( 54krb5_encrypt(
54 struct crypto_blkcipher *tfm, 55 struct crypto_skcipher *tfm,
55 void * iv, 56 void * iv,
56 void * in, 57 void * in,
57 void * out, 58 void * out,
@@ -60,24 +61,29 @@ krb5_encrypt(
60 u32 ret = -EINVAL; 61 u32 ret = -EINVAL;
61 struct scatterlist sg[1]; 62 struct scatterlist sg[1];
62 u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0}; 63 u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
63 struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv }; 64 SKCIPHER_REQUEST_ON_STACK(req, tfm);
64 65
65 if (length % crypto_blkcipher_blocksize(tfm) != 0) 66 if (length % crypto_skcipher_blocksize(tfm) != 0)
66 goto out; 67 goto out;
67 68
68 if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) { 69 if (crypto_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
69 dprintk("RPC: gss_k5encrypt: tfm iv size too large %d\n", 70 dprintk("RPC: gss_k5encrypt: tfm iv size too large %d\n",
70 crypto_blkcipher_ivsize(tfm)); 71 crypto_skcipher_ivsize(tfm));
71 goto out; 72 goto out;
72 } 73 }
73 74
74 if (iv) 75 if (iv)
75 memcpy(local_iv, iv, crypto_blkcipher_ivsize(tfm)); 76 memcpy(local_iv, iv, crypto_skcipher_ivsize(tfm));
76 77
77 memcpy(out, in, length); 78 memcpy(out, in, length);
78 sg_init_one(sg, out, length); 79 sg_init_one(sg, out, length);
79 80
80 ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, length); 81 skcipher_request_set_tfm(req, tfm);
82 skcipher_request_set_callback(req, 0, NULL, NULL);
83 skcipher_request_set_crypt(req, sg, sg, length, local_iv);
84
85 ret = crypto_skcipher_encrypt(req);
86 skcipher_request_zero(req);
81out: 87out:
82 dprintk("RPC: krb5_encrypt returns %d\n", ret); 88 dprintk("RPC: krb5_encrypt returns %d\n", ret);
83 return ret; 89 return ret;
@@ -85,7 +91,7 @@ out:
85 91
86u32 92u32
87krb5_decrypt( 93krb5_decrypt(
88 struct crypto_blkcipher *tfm, 94 struct crypto_skcipher *tfm,
89 void * iv, 95 void * iv,
90 void * in, 96 void * in,
91 void * out, 97 void * out,
@@ -94,23 +100,28 @@ krb5_decrypt(
94 u32 ret = -EINVAL; 100 u32 ret = -EINVAL;
95 struct scatterlist sg[1]; 101 struct scatterlist sg[1];
96 u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0}; 102 u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
97 struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv }; 103 SKCIPHER_REQUEST_ON_STACK(req, tfm);
98 104
99 if (length % crypto_blkcipher_blocksize(tfm) != 0) 105 if (length % crypto_skcipher_blocksize(tfm) != 0)
100 goto out; 106 goto out;
101 107
102 if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) { 108 if (crypto_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
103 dprintk("RPC: gss_k5decrypt: tfm iv size too large %d\n", 109 dprintk("RPC: gss_k5decrypt: tfm iv size too large %d\n",
104 crypto_blkcipher_ivsize(tfm)); 110 crypto_skcipher_ivsize(tfm));
105 goto out; 111 goto out;
106 } 112 }
107 if (iv) 113 if (iv)
108 memcpy(local_iv,iv, crypto_blkcipher_ivsize(tfm)); 114 memcpy(local_iv,iv, crypto_skcipher_ivsize(tfm));
109 115
110 memcpy(out, in, length); 116 memcpy(out, in, length);
111 sg_init_one(sg, out, length); 117 sg_init_one(sg, out, length);
112 118
113 ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, length); 119 skcipher_request_set_tfm(req, tfm);
120 skcipher_request_set_callback(req, 0, NULL, NULL);
121 skcipher_request_set_crypt(req, sg, sg, length, local_iv);
122
123 ret = crypto_skcipher_decrypt(req);
124 skcipher_request_zero(req);
114out: 125out:
115 dprintk("RPC: gss_k5decrypt returns %d\n",ret); 126 dprintk("RPC: gss_k5decrypt returns %d\n",ret);
116 return ret; 127 return ret;
@@ -119,9 +130,11 @@ out:
119static int 130static int
120checksummer(struct scatterlist *sg, void *data) 131checksummer(struct scatterlist *sg, void *data)
121{ 132{
122 struct hash_desc *desc = data; 133 struct ahash_request *req = data;
134
135 ahash_request_set_crypt(req, sg, NULL, sg->length);
123 136
124 return crypto_hash_update(desc, sg, sg->length); 137 return crypto_ahash_update(req);
125} 138}
126 139
127static int 140static int
@@ -152,13 +165,13 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
152 struct xdr_buf *body, int body_offset, u8 *cksumkey, 165 struct xdr_buf *body, int body_offset, u8 *cksumkey,
153 unsigned int usage, struct xdr_netobj *cksumout) 166 unsigned int usage, struct xdr_netobj *cksumout)
154{ 167{
155 struct hash_desc desc;
156 struct scatterlist sg[1]; 168 struct scatterlist sg[1];
157 int err; 169 int err;
158 u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN]; 170 u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
159 u8 rc4salt[4]; 171 u8 rc4salt[4];
160 struct crypto_hash *md5; 172 struct crypto_ahash *md5;
161 struct crypto_hash *hmac_md5; 173 struct crypto_ahash *hmac_md5;
174 struct ahash_request *req;
162 175
163 if (cksumkey == NULL) 176 if (cksumkey == NULL)
164 return GSS_S_FAILURE; 177 return GSS_S_FAILURE;
@@ -174,61 +187,79 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
174 return GSS_S_FAILURE; 187 return GSS_S_FAILURE;
175 } 188 }
176 189
177 md5 = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); 190 md5 = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
178 if (IS_ERR(md5)) 191 if (IS_ERR(md5))
179 return GSS_S_FAILURE; 192 return GSS_S_FAILURE;
180 193
181 hmac_md5 = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, 194 hmac_md5 = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0,
182 CRYPTO_ALG_ASYNC); 195 CRYPTO_ALG_ASYNC);
183 if (IS_ERR(hmac_md5)) { 196 if (IS_ERR(hmac_md5)) {
184 crypto_free_hash(md5); 197 crypto_free_ahash(md5);
198 return GSS_S_FAILURE;
199 }
200
201 req = ahash_request_alloc(md5, GFP_KERNEL);
202 if (!req) {
203 crypto_free_ahash(hmac_md5);
204 crypto_free_ahash(md5);
185 return GSS_S_FAILURE; 205 return GSS_S_FAILURE;
186 } 206 }
187 207
188 desc.tfm = md5; 208 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
189 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
190 209
191 err = crypto_hash_init(&desc); 210 err = crypto_ahash_init(req);
192 if (err) 211 if (err)
193 goto out; 212 goto out;
194 sg_init_one(sg, rc4salt, 4); 213 sg_init_one(sg, rc4salt, 4);
195 err = crypto_hash_update(&desc, sg, 4); 214 ahash_request_set_crypt(req, sg, NULL, 4);
215 err = crypto_ahash_update(req);
196 if (err) 216 if (err)
197 goto out; 217 goto out;
198 218
199 sg_init_one(sg, header, hdrlen); 219 sg_init_one(sg, header, hdrlen);
200 err = crypto_hash_update(&desc, sg, hdrlen); 220 ahash_request_set_crypt(req, sg, NULL, hdrlen);
221 err = crypto_ahash_update(req);
201 if (err) 222 if (err)
202 goto out; 223 goto out;
203 err = xdr_process_buf(body, body_offset, body->len - body_offset, 224 err = xdr_process_buf(body, body_offset, body->len - body_offset,
204 checksummer, &desc); 225 checksummer, req);
205 if (err) 226 if (err)
206 goto out; 227 goto out;
207 err = crypto_hash_final(&desc, checksumdata); 228 ahash_request_set_crypt(req, NULL, checksumdata, 0);
229 err = crypto_ahash_final(req);
208 if (err) 230 if (err)
209 goto out; 231 goto out;
210 232
211 desc.tfm = hmac_md5; 233 ahash_request_free(req);
212 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; 234 req = ahash_request_alloc(hmac_md5, GFP_KERNEL);
235 if (!req) {
236 crypto_free_ahash(hmac_md5);
237 crypto_free_ahash(md5);
238 return GSS_S_FAILURE;
239 }
240
241 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
213 242
214 err = crypto_hash_init(&desc); 243 err = crypto_ahash_init(req);
215 if (err) 244 if (err)
216 goto out; 245 goto out;
217 err = crypto_hash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength); 246 err = crypto_ahash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength);
218 if (err) 247 if (err)
219 goto out; 248 goto out;
220 249
221 sg_init_one(sg, checksumdata, crypto_hash_digestsize(md5)); 250 sg_init_one(sg, checksumdata, crypto_ahash_digestsize(md5));
222 err = crypto_hash_digest(&desc, sg, crypto_hash_digestsize(md5), 251 ahash_request_set_crypt(req, sg, checksumdata,
223 checksumdata); 252 crypto_ahash_digestsize(md5));
253 err = crypto_ahash_digest(req);
224 if (err) 254 if (err)
225 goto out; 255 goto out;
226 256
227 memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength); 257 memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
228 cksumout->len = kctx->gk5e->cksumlength; 258 cksumout->len = kctx->gk5e->cksumlength;
229out: 259out:
230 crypto_free_hash(md5); 260 ahash_request_free(req);
231 crypto_free_hash(hmac_md5); 261 crypto_free_ahash(md5);
262 crypto_free_ahash(hmac_md5);
232 return err ? GSS_S_FAILURE : 0; 263 return err ? GSS_S_FAILURE : 0;
233} 264}
234 265
@@ -242,7 +273,8 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
242 struct xdr_buf *body, int body_offset, u8 *cksumkey, 273 struct xdr_buf *body, int body_offset, u8 *cksumkey,
243 unsigned int usage, struct xdr_netobj *cksumout) 274 unsigned int usage, struct xdr_netobj *cksumout)
244{ 275{
245 struct hash_desc desc; 276 struct crypto_ahash *tfm;
277 struct ahash_request *req;
246 struct scatterlist sg[1]; 278 struct scatterlist sg[1];
247 int err; 279 int err;
248 u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN]; 280 u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
@@ -259,32 +291,41 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
259 return GSS_S_FAILURE; 291 return GSS_S_FAILURE;
260 } 292 }
261 293
262 desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC); 294 tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
263 if (IS_ERR(desc.tfm)) 295 if (IS_ERR(tfm))
264 return GSS_S_FAILURE; 296 return GSS_S_FAILURE;
265 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
266 297
267 checksumlen = crypto_hash_digestsize(desc.tfm); 298 req = ahash_request_alloc(tfm, GFP_KERNEL);
299 if (!req) {
300 crypto_free_ahash(tfm);
301 return GSS_S_FAILURE;
302 }
303
304 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
305
306 checksumlen = crypto_ahash_digestsize(tfm);
268 307
269 if (cksumkey != NULL) { 308 if (cksumkey != NULL) {
270 err = crypto_hash_setkey(desc.tfm, cksumkey, 309 err = crypto_ahash_setkey(tfm, cksumkey,
271 kctx->gk5e->keylength); 310 kctx->gk5e->keylength);
272 if (err) 311 if (err)
273 goto out; 312 goto out;
274 } 313 }
275 314
276 err = crypto_hash_init(&desc); 315 err = crypto_ahash_init(req);
277 if (err) 316 if (err)
278 goto out; 317 goto out;
279 sg_init_one(sg, header, hdrlen); 318 sg_init_one(sg, header, hdrlen);
280 err = crypto_hash_update(&desc, sg, hdrlen); 319 ahash_request_set_crypt(req, sg, NULL, hdrlen);
320 err = crypto_ahash_update(req);
281 if (err) 321 if (err)
282 goto out; 322 goto out;
283 err = xdr_process_buf(body, body_offset, body->len - body_offset, 323 err = xdr_process_buf(body, body_offset, body->len - body_offset,
284 checksummer, &desc); 324 checksummer, req);
285 if (err) 325 if (err)
286 goto out; 326 goto out;
287 err = crypto_hash_final(&desc, checksumdata); 327 ahash_request_set_crypt(req, NULL, checksumdata, 0);
328 err = crypto_ahash_final(req);
288 if (err) 329 if (err)
289 goto out; 330 goto out;
290 331
@@ -307,7 +348,8 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
307 } 348 }
308 cksumout->len = kctx->gk5e->cksumlength; 349 cksumout->len = kctx->gk5e->cksumlength;
309out: 350out:
310 crypto_free_hash(desc.tfm); 351 ahash_request_free(req);
352 crypto_free_ahash(tfm);
311 return err ? GSS_S_FAILURE : 0; 353 return err ? GSS_S_FAILURE : 0;
312} 354}
313 355
@@ -323,7 +365,8 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
323 struct xdr_buf *body, int body_offset, u8 *cksumkey, 365 struct xdr_buf *body, int body_offset, u8 *cksumkey,
324 unsigned int usage, struct xdr_netobj *cksumout) 366 unsigned int usage, struct xdr_netobj *cksumout)
325{ 367{
326 struct hash_desc desc; 368 struct crypto_ahash *tfm;
369 struct ahash_request *req;
327 struct scatterlist sg[1]; 370 struct scatterlist sg[1];
328 int err; 371 int err;
329 u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN]; 372 u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
@@ -340,31 +383,39 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
340 return GSS_S_FAILURE; 383 return GSS_S_FAILURE;
341 } 384 }
342 385
343 desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, 386 tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
344 CRYPTO_ALG_ASYNC); 387 if (IS_ERR(tfm))
345 if (IS_ERR(desc.tfm))
346 return GSS_S_FAILURE; 388 return GSS_S_FAILURE;
347 checksumlen = crypto_hash_digestsize(desc.tfm); 389 checksumlen = crypto_ahash_digestsize(tfm);
348 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; 390
391 req = ahash_request_alloc(tfm, GFP_KERNEL);
392 if (!req) {
393 crypto_free_ahash(tfm);
394 return GSS_S_FAILURE;
395 }
349 396
350 err = crypto_hash_setkey(desc.tfm, cksumkey, kctx->gk5e->keylength); 397 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
398
399 err = crypto_ahash_setkey(tfm, cksumkey, kctx->gk5e->keylength);
351 if (err) 400 if (err)
352 goto out; 401 goto out;
353 402
354 err = crypto_hash_init(&desc); 403 err = crypto_ahash_init(req);
355 if (err) 404 if (err)
356 goto out; 405 goto out;
357 err = xdr_process_buf(body, body_offset, body->len - body_offset, 406 err = xdr_process_buf(body, body_offset, body->len - body_offset,
358 checksummer, &desc); 407 checksummer, req);
359 if (err) 408 if (err)
360 goto out; 409 goto out;
361 if (header != NULL) { 410 if (header != NULL) {
362 sg_init_one(sg, header, hdrlen); 411 sg_init_one(sg, header, hdrlen);
363 err = crypto_hash_update(&desc, sg, hdrlen); 412 ahash_request_set_crypt(req, sg, NULL, hdrlen);
413 err = crypto_ahash_update(req);
364 if (err) 414 if (err)
365 goto out; 415 goto out;
366 } 416 }
367 err = crypto_hash_final(&desc, checksumdata); 417 ahash_request_set_crypt(req, NULL, checksumdata, 0);
418 err = crypto_ahash_final(req);
368 if (err) 419 if (err)
369 goto out; 420 goto out;
370 421
@@ -381,13 +432,14 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
381 break; 432 break;
382 } 433 }
383out: 434out:
384 crypto_free_hash(desc.tfm); 435 ahash_request_free(req);
436 crypto_free_ahash(tfm);
385 return err ? GSS_S_FAILURE : 0; 437 return err ? GSS_S_FAILURE : 0;
386} 438}
387 439
388struct encryptor_desc { 440struct encryptor_desc {
389 u8 iv[GSS_KRB5_MAX_BLOCKSIZE]; 441 u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
390 struct blkcipher_desc desc; 442 struct skcipher_request *req;
391 int pos; 443 int pos;
392 struct xdr_buf *outbuf; 444 struct xdr_buf *outbuf;
393 struct page **pages; 445 struct page **pages;
@@ -402,6 +454,7 @@ encryptor(struct scatterlist *sg, void *data)
402{ 454{
403 struct encryptor_desc *desc = data; 455 struct encryptor_desc *desc = data;
404 struct xdr_buf *outbuf = desc->outbuf; 456 struct xdr_buf *outbuf = desc->outbuf;
457 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(desc->req);
405 struct page *in_page; 458 struct page *in_page;
406 int thislen = desc->fraglen + sg->length; 459 int thislen = desc->fraglen + sg->length;
407 int fraglen, ret; 460 int fraglen, ret;
@@ -414,7 +467,7 @@ encryptor(struct scatterlist *sg, void *data)
414 page_pos = desc->pos - outbuf->head[0].iov_len; 467 page_pos = desc->pos - outbuf->head[0].iov_len;
415 if (page_pos >= 0 && page_pos < outbuf->page_len) { 468 if (page_pos >= 0 && page_pos < outbuf->page_len) {
416 /* pages are not in place: */ 469 /* pages are not in place: */
417 int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT; 470 int i = (page_pos + outbuf->page_base) >> PAGE_SHIFT;
418 in_page = desc->pages[i]; 471 in_page = desc->pages[i];
419 } else { 472 } else {
420 in_page = sg_page(sg); 473 in_page = sg_page(sg);
@@ -427,7 +480,7 @@ encryptor(struct scatterlist *sg, void *data)
427 desc->fraglen += sg->length; 480 desc->fraglen += sg->length;
428 desc->pos += sg->length; 481 desc->pos += sg->length;
429 482
430 fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1); 483 fraglen = thislen & (crypto_skcipher_blocksize(tfm) - 1);
431 thislen -= fraglen; 484 thislen -= fraglen;
432 485
433 if (thislen == 0) 486 if (thislen == 0)
@@ -436,8 +489,10 @@ encryptor(struct scatterlist *sg, void *data)
436 sg_mark_end(&desc->infrags[desc->fragno - 1]); 489 sg_mark_end(&desc->infrags[desc->fragno - 1]);
437 sg_mark_end(&desc->outfrags[desc->fragno - 1]); 490 sg_mark_end(&desc->outfrags[desc->fragno - 1]);
438 491
439 ret = crypto_blkcipher_encrypt_iv(&desc->desc, desc->outfrags, 492 skcipher_request_set_crypt(desc->req, desc->infrags, desc->outfrags,
440 desc->infrags, thislen); 493 thislen, desc->iv);
494
495 ret = crypto_skcipher_encrypt(desc->req);
441 if (ret) 496 if (ret)
442 return ret; 497 return ret;
443 498
@@ -459,18 +514,20 @@ encryptor(struct scatterlist *sg, void *data)
459} 514}
460 515
461int 516int
462gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, 517gss_encrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *buf,
463 int offset, struct page **pages) 518 int offset, struct page **pages)
464{ 519{
465 int ret; 520 int ret;
466 struct encryptor_desc desc; 521 struct encryptor_desc desc;
522 SKCIPHER_REQUEST_ON_STACK(req, tfm);
523
524 BUG_ON((buf->len - offset) % crypto_skcipher_blocksize(tfm) != 0);
467 525
468 BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0); 526 skcipher_request_set_tfm(req, tfm);
527 skcipher_request_set_callback(req, 0, NULL, NULL);
469 528
470 memset(desc.iv, 0, sizeof(desc.iv)); 529 memset(desc.iv, 0, sizeof(desc.iv));
471 desc.desc.tfm = tfm; 530 desc.req = req;
472 desc.desc.info = desc.iv;
473 desc.desc.flags = 0;
474 desc.pos = offset; 531 desc.pos = offset;
475 desc.outbuf = buf; 532 desc.outbuf = buf;
476 desc.pages = pages; 533 desc.pages = pages;
@@ -481,12 +538,13 @@ gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
481 sg_init_table(desc.outfrags, 4); 538 sg_init_table(desc.outfrags, 4);
482 539
483 ret = xdr_process_buf(buf, offset, buf->len - offset, encryptor, &desc); 540 ret = xdr_process_buf(buf, offset, buf->len - offset, encryptor, &desc);
541 skcipher_request_zero(req);
484 return ret; 542 return ret;
485} 543}
486 544
487struct decryptor_desc { 545struct decryptor_desc {
488 u8 iv[GSS_KRB5_MAX_BLOCKSIZE]; 546 u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
489 struct blkcipher_desc desc; 547 struct skcipher_request *req;
490 struct scatterlist frags[4]; 548 struct scatterlist frags[4];
491 int fragno; 549 int fragno;
492 int fraglen; 550 int fraglen;
@@ -497,6 +555,7 @@ decryptor(struct scatterlist *sg, void *data)
497{ 555{
498 struct decryptor_desc *desc = data; 556 struct decryptor_desc *desc = data;
499 int thislen = desc->fraglen + sg->length; 557 int thislen = desc->fraglen + sg->length;
558 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(desc->req);
500 int fraglen, ret; 559 int fraglen, ret;
501 560
502 /* Worst case is 4 fragments: head, end of page 1, start 561 /* Worst case is 4 fragments: head, end of page 1, start
@@ -507,7 +566,7 @@ decryptor(struct scatterlist *sg, void *data)
507 desc->fragno++; 566 desc->fragno++;
508 desc->fraglen += sg->length; 567 desc->fraglen += sg->length;
509 568
510 fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1); 569 fraglen = thislen & (crypto_skcipher_blocksize(tfm) - 1);
511 thislen -= fraglen; 570 thislen -= fraglen;
512 571
513 if (thislen == 0) 572 if (thislen == 0)
@@ -515,8 +574,10 @@ decryptor(struct scatterlist *sg, void *data)
515 574
516 sg_mark_end(&desc->frags[desc->fragno - 1]); 575 sg_mark_end(&desc->frags[desc->fragno - 1]);
517 576
518 ret = crypto_blkcipher_decrypt_iv(&desc->desc, desc->frags, 577 skcipher_request_set_crypt(desc->req, desc->frags, desc->frags,
519 desc->frags, thislen); 578 thislen, desc->iv);
579
580 ret = crypto_skcipher_decrypt(desc->req);
520 if (ret) 581 if (ret)
521 return ret; 582 return ret;
522 583
@@ -535,24 +596,29 @@ decryptor(struct scatterlist *sg, void *data)
535} 596}
536 597
537int 598int
538gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, 599gss_decrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *buf,
539 int offset) 600 int offset)
540{ 601{
602 int ret;
541 struct decryptor_desc desc; 603 struct decryptor_desc desc;
604 SKCIPHER_REQUEST_ON_STACK(req, tfm);
542 605
543 /* XXXJBF: */ 606 /* XXXJBF: */
544 BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0); 607 BUG_ON((buf->len - offset) % crypto_skcipher_blocksize(tfm) != 0);
608
609 skcipher_request_set_tfm(req, tfm);
610 skcipher_request_set_callback(req, 0, NULL, NULL);
545 611
546 memset(desc.iv, 0, sizeof(desc.iv)); 612 memset(desc.iv, 0, sizeof(desc.iv));
547 desc.desc.tfm = tfm; 613 desc.req = req;
548 desc.desc.info = desc.iv;
549 desc.desc.flags = 0;
550 desc.fragno = 0; 614 desc.fragno = 0;
551 desc.fraglen = 0; 615 desc.fraglen = 0;
552 616
553 sg_init_table(desc.frags, 4); 617 sg_init_table(desc.frags, 4);
554 618
555 return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc); 619 ret = xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
620 skcipher_request_zero(req);
621 return ret;
556} 622}
557 623
558/* 624/*
@@ -594,12 +660,12 @@ xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen)
594} 660}
595 661
596static u32 662static u32
597gss_krb5_cts_crypt(struct crypto_blkcipher *cipher, struct xdr_buf *buf, 663gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf,
598 u32 offset, u8 *iv, struct page **pages, int encrypt) 664 u32 offset, u8 *iv, struct page **pages, int encrypt)
599{ 665{
600 u32 ret; 666 u32 ret;
601 struct scatterlist sg[1]; 667 struct scatterlist sg[1];
602 struct blkcipher_desc desc = { .tfm = cipher, .info = iv }; 668 SKCIPHER_REQUEST_ON_STACK(req, cipher);
603 u8 data[GSS_KRB5_MAX_BLOCKSIZE * 2]; 669 u8 data[GSS_KRB5_MAX_BLOCKSIZE * 2];
604 struct page **save_pages; 670 struct page **save_pages;
605 u32 len = buf->len - offset; 671 u32 len = buf->len - offset;
@@ -625,10 +691,16 @@ gss_krb5_cts_crypt(struct crypto_blkcipher *cipher, struct xdr_buf *buf,
625 691
626 sg_init_one(sg, data, len); 692 sg_init_one(sg, data, len);
627 693
694 skcipher_request_set_tfm(req, cipher);
695 skcipher_request_set_callback(req, 0, NULL, NULL);
696 skcipher_request_set_crypt(req, sg, sg, len, iv);
697
628 if (encrypt) 698 if (encrypt)
629 ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, len); 699 ret = crypto_skcipher_encrypt(req);
630 else 700 else
631 ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, len); 701 ret = crypto_skcipher_decrypt(req);
702
703 skcipher_request_zero(req);
632 704
633 if (ret) 705 if (ret)
634 goto out; 706 goto out;
@@ -647,7 +719,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
647 struct xdr_netobj hmac; 719 struct xdr_netobj hmac;
648 u8 *cksumkey; 720 u8 *cksumkey;
649 u8 *ecptr; 721 u8 *ecptr;
650 struct crypto_blkcipher *cipher, *aux_cipher; 722 struct crypto_skcipher *cipher, *aux_cipher;
651 int blocksize; 723 int blocksize;
652 struct page **save_pages; 724 struct page **save_pages;
653 int nblocks, nbytes; 725 int nblocks, nbytes;
@@ -666,7 +738,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
666 cksumkey = kctx->acceptor_integ; 738 cksumkey = kctx->acceptor_integ;
667 usage = KG_USAGE_ACCEPTOR_SEAL; 739 usage = KG_USAGE_ACCEPTOR_SEAL;
668 } 740 }
669 blocksize = crypto_blkcipher_blocksize(cipher); 741 blocksize = crypto_skcipher_blocksize(cipher);
670 742
671 /* hide the gss token header and insert the confounder */ 743 /* hide the gss token header and insert the confounder */
672 offset += GSS_KRB5_TOK_HDR_LEN; 744 offset += GSS_KRB5_TOK_HDR_LEN;
@@ -719,20 +791,24 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
719 memset(desc.iv, 0, sizeof(desc.iv)); 791 memset(desc.iv, 0, sizeof(desc.iv));
720 792
721 if (cbcbytes) { 793 if (cbcbytes) {
794 SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
795
722 desc.pos = offset + GSS_KRB5_TOK_HDR_LEN; 796 desc.pos = offset + GSS_KRB5_TOK_HDR_LEN;
723 desc.fragno = 0; 797 desc.fragno = 0;
724 desc.fraglen = 0; 798 desc.fraglen = 0;
725 desc.pages = pages; 799 desc.pages = pages;
726 desc.outbuf = buf; 800 desc.outbuf = buf;
727 desc.desc.info = desc.iv; 801 desc.req = req;
728 desc.desc.flags = 0; 802
729 desc.desc.tfm = aux_cipher; 803 skcipher_request_set_tfm(req, aux_cipher);
804 skcipher_request_set_callback(req, 0, NULL, NULL);
730 805
731 sg_init_table(desc.infrags, 4); 806 sg_init_table(desc.infrags, 4);
732 sg_init_table(desc.outfrags, 4); 807 sg_init_table(desc.outfrags, 4);
733 808
734 err = xdr_process_buf(buf, offset + GSS_KRB5_TOK_HDR_LEN, 809 err = xdr_process_buf(buf, offset + GSS_KRB5_TOK_HDR_LEN,
735 cbcbytes, encryptor, &desc); 810 cbcbytes, encryptor, &desc);
811 skcipher_request_zero(req);
736 if (err) 812 if (err)
737 goto out_err; 813 goto out_err;
738 } 814 }
@@ -763,7 +839,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
763 struct xdr_buf subbuf; 839 struct xdr_buf subbuf;
764 u32 ret = 0; 840 u32 ret = 0;
765 u8 *cksum_key; 841 u8 *cksum_key;
766 struct crypto_blkcipher *cipher, *aux_cipher; 842 struct crypto_skcipher *cipher, *aux_cipher;
767 struct xdr_netobj our_hmac_obj; 843 struct xdr_netobj our_hmac_obj;
768 u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN]; 844 u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN];
769 u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN]; 845 u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
@@ -782,7 +858,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
782 cksum_key = kctx->initiator_integ; 858 cksum_key = kctx->initiator_integ;
783 usage = KG_USAGE_INITIATOR_SEAL; 859 usage = KG_USAGE_INITIATOR_SEAL;
784 } 860 }
785 blocksize = crypto_blkcipher_blocksize(cipher); 861 blocksize = crypto_skcipher_blocksize(cipher);
786 862
787 863
788 /* create a segment skipping the header and leaving out the checksum */ 864 /* create a segment skipping the header and leaving out the checksum */
@@ -799,15 +875,19 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
799 memset(desc.iv, 0, sizeof(desc.iv)); 875 memset(desc.iv, 0, sizeof(desc.iv));
800 876
801 if (cbcbytes) { 877 if (cbcbytes) {
878 SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
879
802 desc.fragno = 0; 880 desc.fragno = 0;
803 desc.fraglen = 0; 881 desc.fraglen = 0;
804 desc.desc.info = desc.iv; 882 desc.req = req;
805 desc.desc.flags = 0; 883
806 desc.desc.tfm = aux_cipher; 884 skcipher_request_set_tfm(req, aux_cipher);
885 skcipher_request_set_callback(req, 0, NULL, NULL);
807 886
808 sg_init_table(desc.frags, 4); 887 sg_init_table(desc.frags, 4);
809 888
810 ret = xdr_process_buf(&subbuf, 0, cbcbytes, decryptor, &desc); 889 ret = xdr_process_buf(&subbuf, 0, cbcbytes, decryptor, &desc);
890 skcipher_request_zero(req);
811 if (ret) 891 if (ret)
812 goto out_err; 892 goto out_err;
813 } 893 }
@@ -850,61 +930,63 @@ out_err:
850 * Set the key of the given cipher. 930 * Set the key of the given cipher.
851 */ 931 */
852int 932int
853krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher, 933krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
854 unsigned char *cksum) 934 unsigned char *cksum)
855{ 935{
856 struct crypto_hash *hmac; 936 struct crypto_shash *hmac;
857 struct hash_desc desc; 937 struct shash_desc *desc;
858 struct scatterlist sg[1];
859 u8 Kseq[GSS_KRB5_MAX_KEYLEN]; 938 u8 Kseq[GSS_KRB5_MAX_KEYLEN];
860 u32 zeroconstant = 0; 939 u32 zeroconstant = 0;
861 int err; 940 int err;
862 941
863 dprintk("%s: entered\n", __func__); 942 dprintk("%s: entered\n", __func__);
864 943
865 hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC); 944 hmac = crypto_alloc_shash(kctx->gk5e->cksum_name, 0, 0);
866 if (IS_ERR(hmac)) { 945 if (IS_ERR(hmac)) {
867 dprintk("%s: error %ld, allocating hash '%s'\n", 946 dprintk("%s: error %ld, allocating hash '%s'\n",
868 __func__, PTR_ERR(hmac), kctx->gk5e->cksum_name); 947 __func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
869 return PTR_ERR(hmac); 948 return PTR_ERR(hmac);
870 } 949 }
871 950
872 desc.tfm = hmac; 951 desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac),
873 desc.flags = 0; 952 GFP_KERNEL);
953 if (!desc) {
954 dprintk("%s: failed to allocate shash descriptor for '%s'\n",
955 __func__, kctx->gk5e->cksum_name);
956 crypto_free_shash(hmac);
957 return -ENOMEM;
958 }
874 959
875 err = crypto_hash_init(&desc); 960 desc->tfm = hmac;
876 if (err) 961 desc->flags = 0;
877 goto out_err;
878 962
879 /* Compute intermediate Kseq from session key */ 963 /* Compute intermediate Kseq from session key */
880 err = crypto_hash_setkey(hmac, kctx->Ksess, kctx->gk5e->keylength); 964 err = crypto_shash_setkey(hmac, kctx->Ksess, kctx->gk5e->keylength);
881 if (err) 965 if (err)
882 goto out_err; 966 goto out_err;
883 967
884 sg_init_one(sg, &zeroconstant, 4); 968 err = crypto_shash_digest(desc, (u8 *)&zeroconstant, 4, Kseq);
885 err = crypto_hash_digest(&desc, sg, 4, Kseq);
886 if (err) 969 if (err)
887 goto out_err; 970 goto out_err;
888 971
889 /* Compute final Kseq from the checksum and intermediate Kseq */ 972 /* Compute final Kseq from the checksum and intermediate Kseq */
890 err = crypto_hash_setkey(hmac, Kseq, kctx->gk5e->keylength); 973 err = crypto_shash_setkey(hmac, Kseq, kctx->gk5e->keylength);
891 if (err) 974 if (err)
892 goto out_err; 975 goto out_err;
893 976
894 sg_set_buf(sg, cksum, 8); 977 err = crypto_shash_digest(desc, cksum, 8, Kseq);
895
896 err = crypto_hash_digest(&desc, sg, 8, Kseq);
897 if (err) 978 if (err)
898 goto out_err; 979 goto out_err;
899 980
900 err = crypto_blkcipher_setkey(cipher, Kseq, kctx->gk5e->keylength); 981 err = crypto_skcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
901 if (err) 982 if (err)
902 goto out_err; 983 goto out_err;
903 984
904 err = 0; 985 err = 0;
905 986
906out_err: 987out_err:
907 crypto_free_hash(hmac); 988 kzfree(desc);
989 crypto_free_shash(hmac);
908 dprintk("%s: returning %d\n", __func__, err); 990 dprintk("%s: returning %d\n", __func__, err);
909 return err; 991 return err;
910} 992}
@@ -914,12 +996,11 @@ out_err:
914 * Set the key of cipher kctx->enc. 996 * Set the key of cipher kctx->enc.
915 */ 997 */
916int 998int
917krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher, 999krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
918 s32 seqnum) 1000 s32 seqnum)
919{ 1001{
920 struct crypto_hash *hmac; 1002 struct crypto_shash *hmac;
921 struct hash_desc desc; 1003 struct shash_desc *desc;
922 struct scatterlist sg[1];
923 u8 Kcrypt[GSS_KRB5_MAX_KEYLEN]; 1004 u8 Kcrypt[GSS_KRB5_MAX_KEYLEN];
924 u8 zeroconstant[4] = {0}; 1005 u8 zeroconstant[4] = {0};
925 u8 seqnumarray[4]; 1006 u8 seqnumarray[4];
@@ -927,35 +1008,39 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
927 1008
928 dprintk("%s: entered, seqnum %u\n", __func__, seqnum); 1009 dprintk("%s: entered, seqnum %u\n", __func__, seqnum);
929 1010
930 hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC); 1011 hmac = crypto_alloc_shash(kctx->gk5e->cksum_name, 0, 0);
931 if (IS_ERR(hmac)) { 1012 if (IS_ERR(hmac)) {
932 dprintk("%s: error %ld, allocating hash '%s'\n", 1013 dprintk("%s: error %ld, allocating hash '%s'\n",
933 __func__, PTR_ERR(hmac), kctx->gk5e->cksum_name); 1014 __func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
934 return PTR_ERR(hmac); 1015 return PTR_ERR(hmac);
935 } 1016 }
936 1017
937 desc.tfm = hmac; 1018 desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac),
938 desc.flags = 0; 1019 GFP_KERNEL);
1020 if (!desc) {
1021 dprintk("%s: failed to allocate shash descriptor for '%s'\n",
1022 __func__, kctx->gk5e->cksum_name);
1023 crypto_free_shash(hmac);
1024 return -ENOMEM;
1025 }
939 1026
940 err = crypto_hash_init(&desc); 1027 desc->tfm = hmac;
941 if (err) 1028 desc->flags = 0;
942 goto out_err;
943 1029
944 /* Compute intermediate Kcrypt from session key */ 1030 /* Compute intermediate Kcrypt from session key */
945 for (i = 0; i < kctx->gk5e->keylength; i++) 1031 for (i = 0; i < kctx->gk5e->keylength; i++)
946 Kcrypt[i] = kctx->Ksess[i] ^ 0xf0; 1032 Kcrypt[i] = kctx->Ksess[i] ^ 0xf0;
947 1033
948 err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength); 1034 err = crypto_shash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
949 if (err) 1035 if (err)
950 goto out_err; 1036 goto out_err;
951 1037
952 sg_init_one(sg, zeroconstant, 4); 1038 err = crypto_shash_digest(desc, zeroconstant, 4, Kcrypt);
953 err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
954 if (err) 1039 if (err)
955 goto out_err; 1040 goto out_err;
956 1041
957 /* Compute final Kcrypt from the seqnum and intermediate Kcrypt */ 1042 /* Compute final Kcrypt from the seqnum and intermediate Kcrypt */
958 err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength); 1043 err = crypto_shash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
959 if (err) 1044 if (err)
960 goto out_err; 1045 goto out_err;
961 1046
@@ -964,20 +1049,19 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
964 seqnumarray[2] = (unsigned char) ((seqnum >> 8) & 0xff); 1049 seqnumarray[2] = (unsigned char) ((seqnum >> 8) & 0xff);
965 seqnumarray[3] = (unsigned char) ((seqnum >> 0) & 0xff); 1050 seqnumarray[3] = (unsigned char) ((seqnum >> 0) & 0xff);
966 1051
967 sg_set_buf(sg, seqnumarray, 4); 1052 err = crypto_shash_digest(desc, seqnumarray, 4, Kcrypt);
968
969 err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
970 if (err) 1053 if (err)
971 goto out_err; 1054 goto out_err;
972 1055
973 err = crypto_blkcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength); 1056 err = crypto_skcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength);
974 if (err) 1057 if (err)
975 goto out_err; 1058 goto out_err;
976 1059
977 err = 0; 1060 err = 0;
978 1061
979out_err: 1062out_err:
980 crypto_free_hash(hmac); 1063 kzfree(desc);
1064 crypto_free_shash(hmac);
981 dprintk("%s: returning %d\n", __func__, err); 1065 dprintk("%s: returning %d\n", __func__, err);
982 return err; 1066 return err;
983} 1067}
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 234fa8d0fd9b..870133146026 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -54,9 +54,9 @@
54 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 54 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
55 */ 55 */
56 56
57#include <crypto/skcipher.h>
57#include <linux/err.h> 58#include <linux/err.h>
58#include <linux/types.h> 59#include <linux/types.h>
59#include <linux/crypto.h>
60#include <linux/sunrpc/gss_krb5.h> 60#include <linux/sunrpc/gss_krb5.h>
61#include <linux/sunrpc/xdr.h> 61#include <linux/sunrpc/xdr.h>
62#include <linux/lcm.h> 62#include <linux/lcm.h>
@@ -147,7 +147,7 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
147 size_t blocksize, keybytes, keylength, n; 147 size_t blocksize, keybytes, keylength, n;
148 unsigned char *inblockdata, *outblockdata, *rawkey; 148 unsigned char *inblockdata, *outblockdata, *rawkey;
149 struct xdr_netobj inblock, outblock; 149 struct xdr_netobj inblock, outblock;
150 struct crypto_blkcipher *cipher; 150 struct crypto_skcipher *cipher;
151 u32 ret = EINVAL; 151 u32 ret = EINVAL;
152 152
153 blocksize = gk5e->blocksize; 153 blocksize = gk5e->blocksize;
@@ -157,11 +157,11 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
157 if ((inkey->len != keylength) || (outkey->len != keylength)) 157 if ((inkey->len != keylength) || (outkey->len != keylength))
158 goto err_return; 158 goto err_return;
159 159
160 cipher = crypto_alloc_blkcipher(gk5e->encrypt_name, 0, 160 cipher = crypto_alloc_skcipher(gk5e->encrypt_name, 0,
161 CRYPTO_ALG_ASYNC); 161 CRYPTO_ALG_ASYNC);
162 if (IS_ERR(cipher)) 162 if (IS_ERR(cipher))
163 goto err_return; 163 goto err_return;
164 if (crypto_blkcipher_setkey(cipher, inkey->data, inkey->len)) 164 if (crypto_skcipher_setkey(cipher, inkey->data, inkey->len))
165 goto err_return; 165 goto err_return;
166 166
167 /* allocate and set up buffers */ 167 /* allocate and set up buffers */
@@ -238,7 +238,7 @@ err_free_in:
238 memset(inblockdata, 0, blocksize); 238 memset(inblockdata, 0, blocksize);
239 kfree(inblockdata); 239 kfree(inblockdata);
240err_free_cipher: 240err_free_cipher:
241 crypto_free_blkcipher(cipher); 241 crypto_free_skcipher(cipher);
242err_return: 242err_return:
243 return ret; 243 return ret;
244} 244}
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 28db442a0034..65427492b1c9 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -34,6 +34,8 @@
34 * 34 *
35 */ 35 */
36 36
37#include <crypto/hash.h>
38#include <crypto/skcipher.h>
37#include <linux/err.h> 39#include <linux/err.h>
38#include <linux/module.h> 40#include <linux/module.h>
39#include <linux/init.h> 41#include <linux/init.h>
@@ -42,7 +44,6 @@
42#include <linux/sunrpc/auth.h> 44#include <linux/sunrpc/auth.h>
43#include <linux/sunrpc/gss_krb5.h> 45#include <linux/sunrpc/gss_krb5.h>
44#include <linux/sunrpc/xdr.h> 46#include <linux/sunrpc/xdr.h>
45#include <linux/crypto.h>
46#include <linux/sunrpc/gss_krb5_enctypes.h> 47#include <linux/sunrpc/gss_krb5_enctypes.h>
47 48
48#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 49#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -217,7 +218,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
217 218
218static inline const void * 219static inline const void *
219get_key(const void *p, const void *end, 220get_key(const void *p, const void *end,
220 struct krb5_ctx *ctx, struct crypto_blkcipher **res) 221 struct krb5_ctx *ctx, struct crypto_skcipher **res)
221{ 222{
222 struct xdr_netobj key; 223 struct xdr_netobj key;
223 int alg; 224 int alg;
@@ -245,7 +246,7 @@ get_key(const void *p, const void *end,
245 if (IS_ERR(p)) 246 if (IS_ERR(p))
246 goto out_err; 247 goto out_err;
247 248
248 *res = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0, 249 *res = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
249 CRYPTO_ALG_ASYNC); 250 CRYPTO_ALG_ASYNC);
250 if (IS_ERR(*res)) { 251 if (IS_ERR(*res)) {
251 printk(KERN_WARNING "gss_kerberos_mech: unable to initialize " 252 printk(KERN_WARNING "gss_kerberos_mech: unable to initialize "
@@ -253,7 +254,7 @@ get_key(const void *p, const void *end,
253 *res = NULL; 254 *res = NULL;
254 goto out_err_free_key; 255 goto out_err_free_key;
255 } 256 }
256 if (crypto_blkcipher_setkey(*res, key.data, key.len)) { 257 if (crypto_skcipher_setkey(*res, key.data, key.len)) {
257 printk(KERN_WARNING "gss_kerberos_mech: error setting key for " 258 printk(KERN_WARNING "gss_kerberos_mech: error setting key for "
258 "crypto algorithm %s\n", ctx->gk5e->encrypt_name); 259 "crypto algorithm %s\n", ctx->gk5e->encrypt_name);
259 goto out_err_free_tfm; 260 goto out_err_free_tfm;
@@ -263,7 +264,7 @@ get_key(const void *p, const void *end,
263 return p; 264 return p;
264 265
265out_err_free_tfm: 266out_err_free_tfm:
266 crypto_free_blkcipher(*res); 267 crypto_free_skcipher(*res);
267out_err_free_key: 268out_err_free_key:
268 kfree(key.data); 269 kfree(key.data);
269 p = ERR_PTR(-EINVAL); 270 p = ERR_PTR(-EINVAL);
@@ -335,30 +336,30 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
335 return 0; 336 return 0;
336 337
337out_err_free_key2: 338out_err_free_key2:
338 crypto_free_blkcipher(ctx->seq); 339 crypto_free_skcipher(ctx->seq);
339out_err_free_key1: 340out_err_free_key1:
340 crypto_free_blkcipher(ctx->enc); 341 crypto_free_skcipher(ctx->enc);
341out_err_free_mech: 342out_err_free_mech:
342 kfree(ctx->mech_used.data); 343 kfree(ctx->mech_used.data);
343out_err: 344out_err:
344 return PTR_ERR(p); 345 return PTR_ERR(p);
345} 346}
346 347
347static struct crypto_blkcipher * 348static struct crypto_skcipher *
348context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key) 349context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key)
349{ 350{
350 struct crypto_blkcipher *cp; 351 struct crypto_skcipher *cp;
351 352
352 cp = crypto_alloc_blkcipher(cname, 0, CRYPTO_ALG_ASYNC); 353 cp = crypto_alloc_skcipher(cname, 0, CRYPTO_ALG_ASYNC);
353 if (IS_ERR(cp)) { 354 if (IS_ERR(cp)) {
354 dprintk("gss_kerberos_mech: unable to initialize " 355 dprintk("gss_kerberos_mech: unable to initialize "
355 "crypto algorithm %s\n", cname); 356 "crypto algorithm %s\n", cname);
356 return NULL; 357 return NULL;
357 } 358 }
358 if (crypto_blkcipher_setkey(cp, key, ctx->gk5e->keylength)) { 359 if (crypto_skcipher_setkey(cp, key, ctx->gk5e->keylength)) {
359 dprintk("gss_kerberos_mech: error setting key for " 360 dprintk("gss_kerberos_mech: error setting key for "
360 "crypto algorithm %s\n", cname); 361 "crypto algorithm %s\n", cname);
361 crypto_free_blkcipher(cp); 362 crypto_free_skcipher(cp);
362 return NULL; 363 return NULL;
363 } 364 }
364 return cp; 365 return cp;
@@ -412,9 +413,9 @@ context_derive_keys_des3(struct krb5_ctx *ctx, gfp_t gfp_mask)
412 return 0; 413 return 0;
413 414
414out_free_enc: 415out_free_enc:
415 crypto_free_blkcipher(ctx->enc); 416 crypto_free_skcipher(ctx->enc);
416out_free_seq: 417out_free_seq:
417 crypto_free_blkcipher(ctx->seq); 418 crypto_free_skcipher(ctx->seq);
418out_err: 419out_err:
419 return -EINVAL; 420 return -EINVAL;
420} 421}
@@ -427,18 +428,17 @@ out_err:
427static int 428static int
428context_derive_keys_rc4(struct krb5_ctx *ctx) 429context_derive_keys_rc4(struct krb5_ctx *ctx)
429{ 430{
430 struct crypto_hash *hmac; 431 struct crypto_shash *hmac;
431 char sigkeyconstant[] = "signaturekey"; 432 char sigkeyconstant[] = "signaturekey";
432 int slen = strlen(sigkeyconstant) + 1; /* include null terminator */ 433 int slen = strlen(sigkeyconstant) + 1; /* include null terminator */
433 struct hash_desc desc; 434 struct shash_desc *desc;
434 struct scatterlist sg[1];
435 int err; 435 int err;
436 436
437 dprintk("RPC: %s: entered\n", __func__); 437 dprintk("RPC: %s: entered\n", __func__);
438 /* 438 /*
439 * derive cksum (aka Ksign) key 439 * derive cksum (aka Ksign) key
440 */ 440 */
441 hmac = crypto_alloc_hash(ctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC); 441 hmac = crypto_alloc_shash(ctx->gk5e->cksum_name, 0, 0);
442 if (IS_ERR(hmac)) { 442 if (IS_ERR(hmac)) {
443 dprintk("%s: error %ld allocating hash '%s'\n", 443 dprintk("%s: error %ld allocating hash '%s'\n",
444 __func__, PTR_ERR(hmac), ctx->gk5e->cksum_name); 444 __func__, PTR_ERR(hmac), ctx->gk5e->cksum_name);
@@ -446,37 +446,41 @@ context_derive_keys_rc4(struct krb5_ctx *ctx)
446 goto out_err; 446 goto out_err;
447 } 447 }
448 448
449 err = crypto_hash_setkey(hmac, ctx->Ksess, ctx->gk5e->keylength); 449 err = crypto_shash_setkey(hmac, ctx->Ksess, ctx->gk5e->keylength);
450 if (err) 450 if (err)
451 goto out_err_free_hmac; 451 goto out_err_free_hmac;
452 452
453 sg_init_table(sg, 1);
454 sg_set_buf(sg, sigkeyconstant, slen);
455 453
456 desc.tfm = hmac; 454 desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac),
457 desc.flags = 0; 455 GFP_KERNEL);
458 456 if (!desc) {
459 err = crypto_hash_init(&desc); 457 dprintk("%s: failed to allocate hash descriptor for '%s'\n",
460 if (err) 458 __func__, ctx->gk5e->cksum_name);
459 err = -ENOMEM;
461 goto out_err_free_hmac; 460 goto out_err_free_hmac;
461 }
462
463 desc->tfm = hmac;
464 desc->flags = 0;
462 465
463 err = crypto_hash_digest(&desc, sg, slen, ctx->cksum); 466 err = crypto_shash_digest(desc, sigkeyconstant, slen, ctx->cksum);
467 kzfree(desc);
464 if (err) 468 if (err)
465 goto out_err_free_hmac; 469 goto out_err_free_hmac;
466 /* 470 /*
467 * allocate hash, and blkciphers for data and seqnum encryption 471 * allocate hash, and skciphers for data and seqnum encryption
468 */ 472 */
469 ctx->enc = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0, 473 ctx->enc = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
470 CRYPTO_ALG_ASYNC); 474 CRYPTO_ALG_ASYNC);
471 if (IS_ERR(ctx->enc)) { 475 if (IS_ERR(ctx->enc)) {
472 err = PTR_ERR(ctx->enc); 476 err = PTR_ERR(ctx->enc);
473 goto out_err_free_hmac; 477 goto out_err_free_hmac;
474 } 478 }
475 479
476 ctx->seq = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0, 480 ctx->seq = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
477 CRYPTO_ALG_ASYNC); 481 CRYPTO_ALG_ASYNC);
478 if (IS_ERR(ctx->seq)) { 482 if (IS_ERR(ctx->seq)) {
479 crypto_free_blkcipher(ctx->enc); 483 crypto_free_skcipher(ctx->enc);
480 err = PTR_ERR(ctx->seq); 484 err = PTR_ERR(ctx->seq);
481 goto out_err_free_hmac; 485 goto out_err_free_hmac;
482 } 486 }
@@ -486,7 +490,7 @@ context_derive_keys_rc4(struct krb5_ctx *ctx)
486 err = 0; 490 err = 0;
487 491
488out_err_free_hmac: 492out_err_free_hmac:
489 crypto_free_hash(hmac); 493 crypto_free_shash(hmac);
490out_err: 494out_err:
491 dprintk("RPC: %s: returning %d\n", __func__, err); 495 dprintk("RPC: %s: returning %d\n", __func__, err);
492 return err; 496 return err;
@@ -588,7 +592,7 @@ context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
588 context_v2_alloc_cipher(ctx, "cbc(aes)", 592 context_v2_alloc_cipher(ctx, "cbc(aes)",
589 ctx->acceptor_seal); 593 ctx->acceptor_seal);
590 if (ctx->acceptor_enc_aux == NULL) { 594 if (ctx->acceptor_enc_aux == NULL) {
591 crypto_free_blkcipher(ctx->initiator_enc_aux); 595 crypto_free_skcipher(ctx->initiator_enc_aux);
592 goto out_free_acceptor_enc; 596 goto out_free_acceptor_enc;
593 } 597 }
594 } 598 }
@@ -596,9 +600,9 @@ context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
596 return 0; 600 return 0;
597 601
598out_free_acceptor_enc: 602out_free_acceptor_enc:
599 crypto_free_blkcipher(ctx->acceptor_enc); 603 crypto_free_skcipher(ctx->acceptor_enc);
600out_free_initiator_enc: 604out_free_initiator_enc:
601 crypto_free_blkcipher(ctx->initiator_enc); 605 crypto_free_skcipher(ctx->initiator_enc);
602out_err: 606out_err:
603 return -EINVAL; 607 return -EINVAL;
604} 608}
@@ -710,12 +714,12 @@ static void
710gss_delete_sec_context_kerberos(void *internal_ctx) { 714gss_delete_sec_context_kerberos(void *internal_ctx) {
711 struct krb5_ctx *kctx = internal_ctx; 715 struct krb5_ctx *kctx = internal_ctx;
712 716
713 crypto_free_blkcipher(kctx->seq); 717 crypto_free_skcipher(kctx->seq);
714 crypto_free_blkcipher(kctx->enc); 718 crypto_free_skcipher(kctx->enc);
715 crypto_free_blkcipher(kctx->acceptor_enc); 719 crypto_free_skcipher(kctx->acceptor_enc);
716 crypto_free_blkcipher(kctx->initiator_enc); 720 crypto_free_skcipher(kctx->initiator_enc);
717 crypto_free_blkcipher(kctx->acceptor_enc_aux); 721 crypto_free_skcipher(kctx->acceptor_enc_aux);
718 crypto_free_blkcipher(kctx->initiator_enc_aux); 722 crypto_free_skcipher(kctx->initiator_enc_aux);
719 kfree(kctx->mech_used.data); 723 kfree(kctx->mech_used.data);
720 kfree(kctx); 724 kfree(kctx);
721} 725}
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index 20d55c793eb6..c8b9082f4a9d 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -31,9 +31,9 @@
31 * PERFORMANCE OF THIS SOFTWARE. 31 * PERFORMANCE OF THIS SOFTWARE.
32 */ 32 */
33 33
34#include <crypto/skcipher.h>
34#include <linux/types.h> 35#include <linux/types.h>
35#include <linux/sunrpc/gss_krb5.h> 36#include <linux/sunrpc/gss_krb5.h>
36#include <linux/crypto.h>
37 37
38#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 38#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
39# define RPCDBG_FACILITY RPCDBG_AUTH 39# define RPCDBG_FACILITY RPCDBG_AUTH
@@ -43,13 +43,13 @@ static s32
43krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum, 43krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
44 unsigned char *cksum, unsigned char *buf) 44 unsigned char *cksum, unsigned char *buf)
45{ 45{
46 struct crypto_blkcipher *cipher; 46 struct crypto_skcipher *cipher;
47 unsigned char plain[8]; 47 unsigned char plain[8];
48 s32 code; 48 s32 code;
49 49
50 dprintk("RPC: %s:\n", __func__); 50 dprintk("RPC: %s:\n", __func__);
51 cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0, 51 cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
52 CRYPTO_ALG_ASYNC); 52 CRYPTO_ALG_ASYNC);
53 if (IS_ERR(cipher)) 53 if (IS_ERR(cipher))
54 return PTR_ERR(cipher); 54 return PTR_ERR(cipher);
55 55
@@ -68,12 +68,12 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
68 68
69 code = krb5_encrypt(cipher, cksum, plain, buf, 8); 69 code = krb5_encrypt(cipher, cksum, plain, buf, 8);
70out: 70out:
71 crypto_free_blkcipher(cipher); 71 crypto_free_skcipher(cipher);
72 return code; 72 return code;
73} 73}
74s32 74s32
75krb5_make_seq_num(struct krb5_ctx *kctx, 75krb5_make_seq_num(struct krb5_ctx *kctx,
76 struct crypto_blkcipher *key, 76 struct crypto_skcipher *key,
77 int direction, 77 int direction,
78 u32 seqnum, 78 u32 seqnum,
79 unsigned char *cksum, unsigned char *buf) 79 unsigned char *cksum, unsigned char *buf)
@@ -101,13 +101,13 @@ static s32
101krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum, 101krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
102 unsigned char *buf, int *direction, s32 *seqnum) 102 unsigned char *buf, int *direction, s32 *seqnum)
103{ 103{
104 struct crypto_blkcipher *cipher; 104 struct crypto_skcipher *cipher;
105 unsigned char plain[8]; 105 unsigned char plain[8];
106 s32 code; 106 s32 code;
107 107
108 dprintk("RPC: %s:\n", __func__); 108 dprintk("RPC: %s:\n", __func__);
109 cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0, 109 cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
110 CRYPTO_ALG_ASYNC); 110 CRYPTO_ALG_ASYNC);
111 if (IS_ERR(cipher)) 111 if (IS_ERR(cipher))
112 return PTR_ERR(cipher); 112 return PTR_ERR(cipher);
113 113
@@ -130,7 +130,7 @@ krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
130 *seqnum = ((plain[0] << 24) | (plain[1] << 16) | 130 *seqnum = ((plain[0] << 24) | (plain[1] << 16) |
131 (plain[2] << 8) | (plain[3])); 131 (plain[2] << 8) | (plain[3]));
132out: 132out:
133 crypto_free_blkcipher(cipher); 133 crypto_free_skcipher(cipher);
134 return code; 134 return code;
135} 135}
136 136
@@ -142,7 +142,7 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
142{ 142{
143 s32 code; 143 s32 code;
144 unsigned char plain[8]; 144 unsigned char plain[8];
145 struct crypto_blkcipher *key = kctx->seq; 145 struct crypto_skcipher *key = kctx->seq;
146 146
147 dprintk("RPC: krb5_get_seq_num:\n"); 147 dprintk("RPC: krb5_get_seq_num:\n");
148 148
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index ca7e92a32f84..a737c2da0837 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -28,12 +28,12 @@
28 * SUCH DAMAGES. 28 * SUCH DAMAGES.
29 */ 29 */
30 30
31#include <crypto/skcipher.h>
31#include <linux/types.h> 32#include <linux/types.h>
32#include <linux/jiffies.h> 33#include <linux/jiffies.h>
33#include <linux/sunrpc/gss_krb5.h> 34#include <linux/sunrpc/gss_krb5.h>
34#include <linux/random.h> 35#include <linux/random.h>
35#include <linux/pagemap.h> 36#include <linux/pagemap.h>
36#include <linux/crypto.h>
37 37
38#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 38#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
39# define RPCDBG_FACILITY RPCDBG_AUTH 39# define RPCDBG_FACILITY RPCDBG_AUTH
@@ -79,9 +79,9 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
79 len -= buf->head[0].iov_len; 79 len -= buf->head[0].iov_len;
80 if (len <= buf->page_len) { 80 if (len <= buf->page_len) {
81 unsigned int last = (buf->page_base + len - 1) 81 unsigned int last = (buf->page_base + len - 1)
82 >>PAGE_CACHE_SHIFT; 82 >>PAGE_SHIFT;
83 unsigned int offset = (buf->page_base + len - 1) 83 unsigned int offset = (buf->page_base + len - 1)
84 & (PAGE_CACHE_SIZE - 1); 84 & (PAGE_SIZE - 1);
85 ptr = kmap_atomic(buf->pages[last]); 85 ptr = kmap_atomic(buf->pages[last]);
86 pad = *(ptr + offset); 86 pad = *(ptr + offset);
87 kunmap_atomic(ptr); 87 kunmap_atomic(ptr);
@@ -174,7 +174,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
174 174
175 now = get_seconds(); 175 now = get_seconds();
176 176
177 blocksize = crypto_blkcipher_blocksize(kctx->enc); 177 blocksize = crypto_skcipher_blocksize(kctx->enc);
178 gss_krb5_add_padding(buf, offset, blocksize); 178 gss_krb5_add_padding(buf, offset, blocksize);
179 BUG_ON((buf->len - offset) % blocksize); 179 BUG_ON((buf->len - offset) % blocksize);
180 plainlen = conflen + buf->len - offset; 180 plainlen = conflen + buf->len - offset;
@@ -239,10 +239,10 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
239 return GSS_S_FAILURE; 239 return GSS_S_FAILURE;
240 240
241 if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) { 241 if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
242 struct crypto_blkcipher *cipher; 242 struct crypto_skcipher *cipher;
243 int err; 243 int err;
244 cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0, 244 cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
245 CRYPTO_ALG_ASYNC); 245 CRYPTO_ALG_ASYNC);
246 if (IS_ERR(cipher)) 246 if (IS_ERR(cipher))
247 return GSS_S_FAILURE; 247 return GSS_S_FAILURE;
248 248
@@ -250,7 +250,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
250 250
251 err = gss_encrypt_xdr_buf(cipher, buf, 251 err = gss_encrypt_xdr_buf(cipher, buf,
252 offset + headlen - conflen, pages); 252 offset + headlen - conflen, pages);
253 crypto_free_blkcipher(cipher); 253 crypto_free_skcipher(cipher);
254 if (err) 254 if (err)
255 return GSS_S_FAILURE; 255 return GSS_S_FAILURE;
256 } else { 256 } else {
@@ -327,18 +327,18 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
327 return GSS_S_BAD_SIG; 327 return GSS_S_BAD_SIG;
328 328
329 if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) { 329 if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
330 struct crypto_blkcipher *cipher; 330 struct crypto_skcipher *cipher;
331 int err; 331 int err;
332 332
333 cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0, 333 cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
334 CRYPTO_ALG_ASYNC); 334 CRYPTO_ALG_ASYNC);
335 if (IS_ERR(cipher)) 335 if (IS_ERR(cipher))
336 return GSS_S_FAILURE; 336 return GSS_S_FAILURE;
337 337
338 krb5_rc4_setup_enc_key(kctx, cipher, seqnum); 338 krb5_rc4_setup_enc_key(kctx, cipher, seqnum);
339 339
340 err = gss_decrypt_xdr_buf(cipher, buf, crypt_offset); 340 err = gss_decrypt_xdr_buf(cipher, buf, crypt_offset);
341 crypto_free_blkcipher(cipher); 341 crypto_free_skcipher(cipher);
342 if (err) 342 if (err)
343 return GSS_S_DEFECTIVE_TOKEN; 343 return GSS_S_DEFECTIVE_TOKEN;
344 } else { 344 } else {
@@ -371,7 +371,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
371 /* Copy the data back to the right position. XXX: Would probably be 371 /* Copy the data back to the right position. XXX: Would probably be
372 * better to copy and encrypt at the same time. */ 372 * better to copy and encrypt at the same time. */
373 373
374 blocksize = crypto_blkcipher_blocksize(kctx->enc); 374 blocksize = crypto_skcipher_blocksize(kctx->enc);
375 data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) + 375 data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
376 conflen; 376 conflen;
377 orig_start = buf->head[0].iov_base + offset; 377 orig_start = buf->head[0].iov_base + offset;
@@ -473,7 +473,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
473 *ptr++ = 0xff; 473 *ptr++ = 0xff;
474 be16ptr = (__be16 *)ptr; 474 be16ptr = (__be16 *)ptr;
475 475
476 blocksize = crypto_blkcipher_blocksize(kctx->acceptor_enc); 476 blocksize = crypto_skcipher_blocksize(kctx->acceptor_enc);
477 *be16ptr++ = 0; 477 *be16ptr++ = 0;
478 /* "inner" token header always uses 0 for RRC */ 478 /* "inner" token header always uses 0 for RRC */
479 *be16ptr++ = 0; 479 *be16ptr++ = 0;
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index c2a2b584a056..8d9eb4d5ddd8 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -113,8 +113,8 @@ const struct rpc_authops authnull_ops = {
113 113
114static 114static
115struct rpc_auth null_auth = { 115struct rpc_auth null_auth = {
116 .au_cslack = 4, 116 .au_cslack = NUL_CALLSLACK,
117 .au_rslack = 2, 117 .au_rslack = NUL_REPLYSLACK,
118 .au_ops = &authnull_ops, 118 .au_ops = &authnull_ops,
119 .au_flavor = RPC_AUTH_NULL, 119 .au_flavor = RPC_AUTH_NULL,
120 .au_count = ATOMIC_INIT(0), 120 .au_count = ATOMIC_INIT(0),
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 548240dd15fc..0d3dd364c22f 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -23,8 +23,6 @@ struct unx_cred {
23}; 23};
24#define uc_uid uc_base.cr_uid 24#define uc_uid uc_base.cr_uid
25 25
26#define UNX_WRITESLACK (21 + XDR_QUADLEN(UNX_MAXNODENAME))
27
28#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 26#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
29# define RPCDBG_FACILITY RPCDBG_AUTH 27# define RPCDBG_FACILITY RPCDBG_AUTH
30#endif 28#endif
@@ -228,8 +226,8 @@ const struct rpc_authops authunix_ops = {
228 226
229static 227static
230struct rpc_auth unix_auth = { 228struct rpc_auth unix_auth = {
231 .au_cslack = UNX_WRITESLACK, 229 .au_cslack = UNX_CALLSLACK,
232 .au_rslack = 2, /* assume AUTH_NULL verf */ 230 .au_rslack = NUL_REPLYSLACK,
233 .au_ops = &authunix_ops, 231 .au_ops = &authunix_ops,
234 .au_flavor = RPC_AUTH_UNIX, 232 .au_flavor = RPC_AUTH_UNIX,
235 .au_count = ATOMIC_INIT(0), 233 .au_count = ATOMIC_INIT(0),
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 273bc3a35425..553bf95f7003 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -881,7 +881,7 @@ static ssize_t cache_downcall(struct address_space *mapping,
881 char *kaddr; 881 char *kaddr;
882 ssize_t ret = -ENOMEM; 882 ssize_t ret = -ENOMEM;
883 883
884 if (count >= PAGE_CACHE_SIZE) 884 if (count >= PAGE_SIZE)
885 goto out_slow; 885 goto out_slow;
886 886
887 page = find_or_create_page(mapping, 0, GFP_KERNEL); 887 page = find_or_create_page(mapping, 0, GFP_KERNEL);
@@ -892,7 +892,7 @@ static ssize_t cache_downcall(struct address_space *mapping,
892 ret = cache_do_downcall(kaddr, buf, count, cd); 892 ret = cache_do_downcall(kaddr, buf, count, cd);
893 kunmap(page); 893 kunmap(page);
894 unlock_page(page); 894 unlock_page(page);
895 page_cache_release(page); 895 put_page(page);
896 return ret; 896 return ret;
897out_slow: 897out_slow:
898 return cache_slow_downcall(buf, count, cd); 898 return cache_slow_downcall(buf, count, cd);
@@ -1182,14 +1182,14 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
1182 } 1182 }
1183 1183
1184 crq->q.reader = 0; 1184 crq->q.reader = 0;
1185 crq->item = cache_get(h);
1186 crq->buf = buf; 1185 crq->buf = buf;
1187 crq->len = 0; 1186 crq->len = 0;
1188 crq->readers = 0; 1187 crq->readers = 0;
1189 spin_lock(&queue_lock); 1188 spin_lock(&queue_lock);
1190 if (test_bit(CACHE_PENDING, &h->flags)) 1189 if (test_bit(CACHE_PENDING, &h->flags)) {
1190 crq->item = cache_get(h);
1191 list_add_tail(&crq->q.list, &detail->queue); 1191 list_add_tail(&crq->q.list, &detail->queue);
1192 else 1192 } else
1193 /* Lost a race, no longer PENDING, so don't enqueue */ 1193 /* Lost a race, no longer PENDING, so don't enqueue */
1194 ret = -EAGAIN; 1194 ret = -EAGAIN;
1195 spin_unlock(&queue_lock); 1195 spin_unlock(&queue_lock);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index b7f21044f4d8..7e0c9bf22df8 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -354,6 +354,7 @@ static void rpc_free_clid(struct rpc_clnt *clnt)
354} 354}
355 355
356static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, 356static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
357 struct rpc_xprt_switch *xps,
357 struct rpc_xprt *xprt, 358 struct rpc_xprt *xprt,
358 struct rpc_clnt *parent) 359 struct rpc_clnt *parent)
359{ 360{
@@ -411,6 +412,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
411 } 412 }
412 413
413 rpc_clnt_set_transport(clnt, xprt, timeout); 414 rpc_clnt_set_transport(clnt, xprt, timeout);
415 xprt_iter_init(&clnt->cl_xpi, xps);
416 xprt_switch_put(xps);
414 417
415 clnt->cl_rtt = &clnt->cl_rtt_default; 418 clnt->cl_rtt = &clnt->cl_rtt_default;
416 rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval); 419 rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
@@ -438,6 +441,7 @@ out_no_clid:
438out_err: 441out_err:
439 rpciod_down(); 442 rpciod_down();
440out_no_rpciod: 443out_no_rpciod:
444 xprt_switch_put(xps);
441 xprt_put(xprt); 445 xprt_put(xprt);
442 return ERR_PTR(err); 446 return ERR_PTR(err);
443} 447}
@@ -446,8 +450,13 @@ struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
446 struct rpc_xprt *xprt) 450 struct rpc_xprt *xprt)
447{ 451{
448 struct rpc_clnt *clnt = NULL; 452 struct rpc_clnt *clnt = NULL;
453 struct rpc_xprt_switch *xps;
449 454
450 clnt = rpc_new_client(args, xprt, NULL); 455 xps = xprt_switch_alloc(xprt, GFP_KERNEL);
456 if (xps == NULL)
457 return ERR_PTR(-ENOMEM);
458
459 clnt = rpc_new_client(args, xps, xprt, NULL);
451 if (IS_ERR(clnt)) 460 if (IS_ERR(clnt))
452 return clnt; 461 return clnt;
453 462
@@ -564,6 +573,7 @@ EXPORT_SYMBOL_GPL(rpc_create);
564static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, 573static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
565 struct rpc_clnt *clnt) 574 struct rpc_clnt *clnt)
566{ 575{
576 struct rpc_xprt_switch *xps;
567 struct rpc_xprt *xprt; 577 struct rpc_xprt *xprt;
568 struct rpc_clnt *new; 578 struct rpc_clnt *new;
569 int err; 579 int err;
@@ -571,13 +581,17 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
571 err = -ENOMEM; 581 err = -ENOMEM;
572 rcu_read_lock(); 582 rcu_read_lock();
573 xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); 583 xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
584 xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
574 rcu_read_unlock(); 585 rcu_read_unlock();
575 if (xprt == NULL) 586 if (xprt == NULL || xps == NULL) {
587 xprt_put(xprt);
588 xprt_switch_put(xps);
576 goto out_err; 589 goto out_err;
590 }
577 args->servername = xprt->servername; 591 args->servername = xprt->servername;
578 args->nodename = clnt->cl_nodename; 592 args->nodename = clnt->cl_nodename;
579 593
580 new = rpc_new_client(args, xprt, clnt); 594 new = rpc_new_client(args, xps, xprt, clnt);
581 if (IS_ERR(new)) { 595 if (IS_ERR(new)) {
582 err = PTR_ERR(new); 596 err = PTR_ERR(new);
583 goto out_err; 597 goto out_err;
@@ -657,6 +671,7 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt,
657{ 671{
658 const struct rpc_timeout *old_timeo; 672 const struct rpc_timeout *old_timeo;
659 rpc_authflavor_t pseudoflavor; 673 rpc_authflavor_t pseudoflavor;
674 struct rpc_xprt_switch *xps, *oldxps;
660 struct rpc_xprt *xprt, *old; 675 struct rpc_xprt *xprt, *old;
661 struct rpc_clnt *parent; 676 struct rpc_clnt *parent;
662 int err; 677 int err;
@@ -668,10 +683,17 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt,
668 return PTR_ERR(xprt); 683 return PTR_ERR(xprt);
669 } 684 }
670 685
686 xps = xprt_switch_alloc(xprt, GFP_KERNEL);
687 if (xps == NULL) {
688 xprt_put(xprt);
689 return -ENOMEM;
690 }
691
671 pseudoflavor = clnt->cl_auth->au_flavor; 692 pseudoflavor = clnt->cl_auth->au_flavor;
672 693
673 old_timeo = clnt->cl_timeout; 694 old_timeo = clnt->cl_timeout;
674 old = rpc_clnt_set_transport(clnt, xprt, timeout); 695 old = rpc_clnt_set_transport(clnt, xprt, timeout);
696 oldxps = xprt_iter_xchg_switch(&clnt->cl_xpi, xps);
675 697
676 rpc_unregister_client(clnt); 698 rpc_unregister_client(clnt);
677 __rpc_clnt_remove_pipedir(clnt); 699 __rpc_clnt_remove_pipedir(clnt);
@@ -697,20 +719,74 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt,
697 synchronize_rcu(); 719 synchronize_rcu();
698 if (parent != clnt) 720 if (parent != clnt)
699 rpc_release_client(parent); 721 rpc_release_client(parent);
722 xprt_switch_put(oldxps);
700 xprt_put(old); 723 xprt_put(old);
701 dprintk("RPC: replaced xprt for clnt %p\n", clnt); 724 dprintk("RPC: replaced xprt for clnt %p\n", clnt);
702 return 0; 725 return 0;
703 726
704out_revert: 727out_revert:
728 xps = xprt_iter_xchg_switch(&clnt->cl_xpi, oldxps);
705 rpc_clnt_set_transport(clnt, old, old_timeo); 729 rpc_clnt_set_transport(clnt, old, old_timeo);
706 clnt->cl_parent = parent; 730 clnt->cl_parent = parent;
707 rpc_client_register(clnt, pseudoflavor, NULL); 731 rpc_client_register(clnt, pseudoflavor, NULL);
732 xprt_switch_put(xps);
708 xprt_put(xprt); 733 xprt_put(xprt);
709 dprintk("RPC: failed to switch xprt for clnt %p\n", clnt); 734 dprintk("RPC: failed to switch xprt for clnt %p\n", clnt);
710 return err; 735 return err;
711} 736}
712EXPORT_SYMBOL_GPL(rpc_switch_client_transport); 737EXPORT_SYMBOL_GPL(rpc_switch_client_transport);
713 738
739static
740int rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi)
741{
742 struct rpc_xprt_switch *xps;
743
744 rcu_read_lock();
745 xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
746 rcu_read_unlock();
747 if (xps == NULL)
748 return -EAGAIN;
749 xprt_iter_init_listall(xpi, xps);
750 xprt_switch_put(xps);
751 return 0;
752}
753
754/**
755 * rpc_clnt_iterate_for_each_xprt - Apply a function to all transports
756 * @clnt: pointer to client
757 * @fn: function to apply
758 * @data: void pointer to function data
759 *
760 * Iterates through the list of RPC transports currently attached to the
761 * client and applies the function fn(clnt, xprt, data).
762 *
763 * On error, the iteration stops, and the function returns the error value.
764 */
765int rpc_clnt_iterate_for_each_xprt(struct rpc_clnt *clnt,
766 int (*fn)(struct rpc_clnt *, struct rpc_xprt *, void *),
767 void *data)
768{
769 struct rpc_xprt_iter xpi;
770 int ret;
771
772 ret = rpc_clnt_xprt_iter_init(clnt, &xpi);
773 if (ret)
774 return ret;
775 for (;;) {
776 struct rpc_xprt *xprt = xprt_iter_get_next(&xpi);
777
778 if (!xprt)
779 break;
780 ret = fn(clnt, xprt, data);
781 xprt_put(xprt);
782 if (ret < 0)
783 break;
784 }
785 xprt_iter_destroy(&xpi);
786 return ret;
787}
788EXPORT_SYMBOL_GPL(rpc_clnt_iterate_for_each_xprt);
789
714/* 790/*
715 * Kill all tasks for the given client. 791 * Kill all tasks for the given client.
716 * XXX: kill their descendants as well? 792 * XXX: kill their descendants as well?
@@ -783,6 +859,7 @@ rpc_free_client(struct rpc_clnt *clnt)
783 rpc_free_iostats(clnt->cl_metrics); 859 rpc_free_iostats(clnt->cl_metrics);
784 clnt->cl_metrics = NULL; 860 clnt->cl_metrics = NULL;
785 xprt_put(rcu_dereference_raw(clnt->cl_xprt)); 861 xprt_put(rcu_dereference_raw(clnt->cl_xprt));
862 xprt_iter_destroy(&clnt->cl_xpi);
786 rpciod_down(); 863 rpciod_down();
787 rpc_free_clid(clnt); 864 rpc_free_clid(clnt);
788 kfree(clnt); 865 kfree(clnt);
@@ -868,6 +945,7 @@ EXPORT_SYMBOL_GPL(rpc_bind_new_program);
868void rpc_task_release_client(struct rpc_task *task) 945void rpc_task_release_client(struct rpc_task *task)
869{ 946{
870 struct rpc_clnt *clnt = task->tk_client; 947 struct rpc_clnt *clnt = task->tk_client;
948 struct rpc_xprt *xprt = task->tk_xprt;
871 949
872 if (clnt != NULL) { 950 if (clnt != NULL) {
873 /* Remove from client task list */ 951 /* Remove from client task list */
@@ -878,13 +956,22 @@ void rpc_task_release_client(struct rpc_task *task)
878 956
879 rpc_release_client(clnt); 957 rpc_release_client(clnt);
880 } 958 }
959
960 if (xprt != NULL) {
961 task->tk_xprt = NULL;
962
963 xprt_put(xprt);
964 }
881} 965}
882 966
883static 967static
884void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) 968void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
885{ 969{
970
886 if (clnt != NULL) { 971 if (clnt != NULL) {
887 rpc_task_release_client(task); 972 rpc_task_release_client(task);
973 if (task->tk_xprt == NULL)
974 task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi);
888 task->tk_client = clnt; 975 task->tk_client = clnt;
889 atomic_inc(&clnt->cl_count); 976 atomic_inc(&clnt->cl_count);
890 if (clnt->cl_softrtry) 977 if (clnt->cl_softrtry)
@@ -900,14 +987,6 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
900 } 987 }
901} 988}
902 989
903void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt)
904{
905 rpc_task_release_client(task);
906 rpc_task_set_client(task, clnt);
907}
908EXPORT_SYMBOL_GPL(rpc_task_reset_client);
909
910
911static void 990static void
912rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg) 991rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
913{ 992{
@@ -2104,11 +2183,9 @@ call_timeout(struct rpc_task *task)
2104 } 2183 }
2105 if (RPC_IS_SOFT(task)) { 2184 if (RPC_IS_SOFT(task)) {
2106 if (clnt->cl_chatty) { 2185 if (clnt->cl_chatty) {
2107 rcu_read_lock();
2108 printk(KERN_NOTICE "%s: server %s not responding, timed out\n", 2186 printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
2109 clnt->cl_program->name, 2187 clnt->cl_program->name,
2110 rcu_dereference(clnt->cl_xprt)->servername); 2188 task->tk_xprt->servername);
2111 rcu_read_unlock();
2112 } 2189 }
2113 if (task->tk_flags & RPC_TASK_TIMEOUT) 2190 if (task->tk_flags & RPC_TASK_TIMEOUT)
2114 rpc_exit(task, -ETIMEDOUT); 2191 rpc_exit(task, -ETIMEDOUT);
@@ -2120,11 +2197,9 @@ call_timeout(struct rpc_task *task)
2120 if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) { 2197 if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {
2121 task->tk_flags |= RPC_CALL_MAJORSEEN; 2198 task->tk_flags |= RPC_CALL_MAJORSEEN;
2122 if (clnt->cl_chatty) { 2199 if (clnt->cl_chatty) {
2123 rcu_read_lock();
2124 printk(KERN_NOTICE "%s: server %s not responding, still trying\n", 2200 printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
2125 clnt->cl_program->name, 2201 clnt->cl_program->name,
2126 rcu_dereference(clnt->cl_xprt)->servername); 2202 task->tk_xprt->servername);
2127 rcu_read_unlock();
2128 } 2203 }
2129 } 2204 }
2130 rpc_force_rebind(clnt); 2205 rpc_force_rebind(clnt);
@@ -2154,11 +2229,9 @@ call_decode(struct rpc_task *task)
2154 2229
2155 if (task->tk_flags & RPC_CALL_MAJORSEEN) { 2230 if (task->tk_flags & RPC_CALL_MAJORSEEN) {
2156 if (clnt->cl_chatty) { 2231 if (clnt->cl_chatty) {
2157 rcu_read_lock();
2158 printk(KERN_NOTICE "%s: server %s OK\n", 2232 printk(KERN_NOTICE "%s: server %s OK\n",
2159 clnt->cl_program->name, 2233 clnt->cl_program->name,
2160 rcu_dereference(clnt->cl_xprt)->servername); 2234 task->tk_xprt->servername);
2161 rcu_read_unlock();
2162 } 2235 }
2163 task->tk_flags &= ~RPC_CALL_MAJORSEEN; 2236 task->tk_flags &= ~RPC_CALL_MAJORSEEN;
2164 } 2237 }
@@ -2312,11 +2385,9 @@ rpc_verify_header(struct rpc_task *task)
2312 task->tk_action = call_bind; 2385 task->tk_action = call_bind;
2313 goto out_retry; 2386 goto out_retry;
2314 case RPC_AUTH_TOOWEAK: 2387 case RPC_AUTH_TOOWEAK:
2315 rcu_read_lock();
2316 printk(KERN_NOTICE "RPC: server %s requires stronger " 2388 printk(KERN_NOTICE "RPC: server %s requires stronger "
2317 "authentication.\n", 2389 "authentication.\n",
2318 rcu_dereference(clnt->cl_xprt)->servername); 2390 task->tk_xprt->servername);
2319 rcu_read_unlock();
2320 break; 2391 break;
2321 default: 2392 default:
2322 dprintk("RPC: %5u %s: unknown auth error: %x\n", 2393 dprintk("RPC: %5u %s: unknown auth error: %x\n",
@@ -2341,27 +2412,27 @@ rpc_verify_header(struct rpc_task *task)
2341 case RPC_SUCCESS: 2412 case RPC_SUCCESS:
2342 return p; 2413 return p;
2343 case RPC_PROG_UNAVAIL: 2414 case RPC_PROG_UNAVAIL:
2344 dprintk_rcu("RPC: %5u %s: program %u is unsupported " 2415 dprintk("RPC: %5u %s: program %u is unsupported "
2345 "by server %s\n", task->tk_pid, __func__, 2416 "by server %s\n", task->tk_pid, __func__,
2346 (unsigned int)clnt->cl_prog, 2417 (unsigned int)clnt->cl_prog,
2347 rcu_dereference(clnt->cl_xprt)->servername); 2418 task->tk_xprt->servername);
2348 error = -EPFNOSUPPORT; 2419 error = -EPFNOSUPPORT;
2349 goto out_err; 2420 goto out_err;
2350 case RPC_PROG_MISMATCH: 2421 case RPC_PROG_MISMATCH:
2351 dprintk_rcu("RPC: %5u %s: program %u, version %u unsupported " 2422 dprintk("RPC: %5u %s: program %u, version %u unsupported "
2352 "by server %s\n", task->tk_pid, __func__, 2423 "by server %s\n", task->tk_pid, __func__,
2353 (unsigned int)clnt->cl_prog, 2424 (unsigned int)clnt->cl_prog,
2354 (unsigned int)clnt->cl_vers, 2425 (unsigned int)clnt->cl_vers,
2355 rcu_dereference(clnt->cl_xprt)->servername); 2426 task->tk_xprt->servername);
2356 error = -EPROTONOSUPPORT; 2427 error = -EPROTONOSUPPORT;
2357 goto out_err; 2428 goto out_err;
2358 case RPC_PROC_UNAVAIL: 2429 case RPC_PROC_UNAVAIL:
2359 dprintk_rcu("RPC: %5u %s: proc %s unsupported by program %u, " 2430 dprintk("RPC: %5u %s: proc %s unsupported by program %u, "
2360 "version %u on server %s\n", 2431 "version %u on server %s\n",
2361 task->tk_pid, __func__, 2432 task->tk_pid, __func__,
2362 rpc_proc_name(task), 2433 rpc_proc_name(task),
2363 clnt->cl_prog, clnt->cl_vers, 2434 clnt->cl_prog, clnt->cl_vers,
2364 rcu_dereference(clnt->cl_xprt)->servername); 2435 task->tk_xprt->servername);
2365 error = -EOPNOTSUPP; 2436 error = -EOPNOTSUPP;
2366 goto out_err; 2437 goto out_err;
2367 case RPC_GARBAGE_ARGS: 2438 case RPC_GARBAGE_ARGS:
@@ -2421,7 +2492,10 @@ static int rpc_ping(struct rpc_clnt *clnt)
2421 return err; 2492 return err;
2422} 2493}
2423 2494
2424struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags) 2495static
2496struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
2497 struct rpc_xprt *xprt, struct rpc_cred *cred, int flags,
2498 const struct rpc_call_ops *ops, void *data)
2425{ 2499{
2426 struct rpc_message msg = { 2500 struct rpc_message msg = {
2427 .rpc_proc = &rpcproc_null, 2501 .rpc_proc = &rpcproc_null,
@@ -2429,14 +2503,140 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int
2429 }; 2503 };
2430 struct rpc_task_setup task_setup_data = { 2504 struct rpc_task_setup task_setup_data = {
2431 .rpc_client = clnt, 2505 .rpc_client = clnt,
2506 .rpc_xprt = xprt,
2432 .rpc_message = &msg, 2507 .rpc_message = &msg,
2433 .callback_ops = &rpc_default_ops, 2508 .callback_ops = (ops != NULL) ? ops : &rpc_default_ops,
2509 .callback_data = data,
2434 .flags = flags, 2510 .flags = flags,
2435 }; 2511 };
2512
2436 return rpc_run_task(&task_setup_data); 2513 return rpc_run_task(&task_setup_data);
2437} 2514}
2515
2516struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags)
2517{
2518 return rpc_call_null_helper(clnt, NULL, cred, flags, NULL, NULL);
2519}
2438EXPORT_SYMBOL_GPL(rpc_call_null); 2520EXPORT_SYMBOL_GPL(rpc_call_null);
2439 2521
2522struct rpc_cb_add_xprt_calldata {
2523 struct rpc_xprt_switch *xps;
2524 struct rpc_xprt *xprt;
2525};
2526
2527static void rpc_cb_add_xprt_done(struct rpc_task *task, void *calldata)
2528{
2529 struct rpc_cb_add_xprt_calldata *data = calldata;
2530
2531 if (task->tk_status == 0)
2532 rpc_xprt_switch_add_xprt(data->xps, data->xprt);
2533}
2534
2535static void rpc_cb_add_xprt_release(void *calldata)
2536{
2537 struct rpc_cb_add_xprt_calldata *data = calldata;
2538
2539 xprt_put(data->xprt);
2540 xprt_switch_put(data->xps);
2541 kfree(data);
2542}
2543
2544const static struct rpc_call_ops rpc_cb_add_xprt_call_ops = {
2545 .rpc_call_done = rpc_cb_add_xprt_done,
2546 .rpc_release = rpc_cb_add_xprt_release,
2547};
2548
2549/**
2550 * rpc_clnt_test_and_add_xprt - Test and add a new transport to a rpc_clnt
2551 * @clnt: pointer to struct rpc_clnt
2552 * @xps: pointer to struct rpc_xprt_switch,
2553 * @xprt: pointer struct rpc_xprt
2554 * @dummy: unused
2555 */
2556int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
2557 struct rpc_xprt_switch *xps, struct rpc_xprt *xprt,
2558 void *dummy)
2559{
2560 struct rpc_cb_add_xprt_calldata *data;
2561 struct rpc_cred *cred;
2562 struct rpc_task *task;
2563
2564 data = kmalloc(sizeof(*data), GFP_NOFS);
2565 if (!data)
2566 return -ENOMEM;
2567 data->xps = xprt_switch_get(xps);
2568 data->xprt = xprt_get(xprt);
2569
2570 cred = authnull_ops.lookup_cred(NULL, NULL, 0);
2571 task = rpc_call_null_helper(clnt, xprt, cred,
2572 RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC,
2573 &rpc_cb_add_xprt_call_ops, data);
2574 put_rpccred(cred);
2575 if (IS_ERR(task))
2576 return PTR_ERR(task);
2577 rpc_put_task(task);
2578 return 1;
2579}
2580EXPORT_SYMBOL_GPL(rpc_clnt_test_and_add_xprt);
2581
2582/**
2583 * rpc_clnt_add_xprt - Add a new transport to a rpc_clnt
2584 * @clnt: pointer to struct rpc_clnt
2585 * @xprtargs: pointer to struct xprt_create
2586 * @setup: callback to test and/or set up the connection
2587 * @data: pointer to setup function data
2588 *
2589 * Creates a new transport using the parameters set in args and
2590 * adds it to clnt.
2591 * If ping is set, then test that connectivity succeeds before
2592 * adding the new transport.
2593 *
2594 */
2595int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
2596 struct xprt_create *xprtargs,
2597 int (*setup)(struct rpc_clnt *,
2598 struct rpc_xprt_switch *,
2599 struct rpc_xprt *,
2600 void *),
2601 void *data)
2602{
2603 struct rpc_xprt_switch *xps;
2604 struct rpc_xprt *xprt;
2605 unsigned char resvport;
2606 int ret = 0;
2607
2608 rcu_read_lock();
2609 xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
2610 xprt = xprt_iter_xprt(&clnt->cl_xpi);
2611 if (xps == NULL || xprt == NULL) {
2612 rcu_read_unlock();
2613 return -EAGAIN;
2614 }
2615 resvport = xprt->resvport;
2616 rcu_read_unlock();
2617
2618 xprt = xprt_create_transport(xprtargs);
2619 if (IS_ERR(xprt)) {
2620 ret = PTR_ERR(xprt);
2621 goto out_put_switch;
2622 }
2623 xprt->resvport = resvport;
2624
2625 rpc_xprt_switch_set_roundrobin(xps);
2626 if (setup) {
2627 ret = setup(clnt, xps, xprt, data);
2628 if (ret != 0)
2629 goto out_put_xprt;
2630 }
2631 rpc_xprt_switch_add_xprt(xps, xprt);
2632out_put_xprt:
2633 xprt_put(xprt);
2634out_put_switch:
2635 xprt_switch_put(xps);
2636 return ret;
2637}
2638EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt);
2639
2440#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 2640#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
2441static void rpc_show_header(void) 2641static void rpc_show_header(void)
2442{ 2642{
@@ -2483,57 +2683,39 @@ void rpc_show_tasks(struct net *net)
2483#endif 2683#endif
2484 2684
2485#if IS_ENABLED(CONFIG_SUNRPC_SWAP) 2685#if IS_ENABLED(CONFIG_SUNRPC_SWAP)
2686static int
2687rpc_clnt_swap_activate_callback(struct rpc_clnt *clnt,
2688 struct rpc_xprt *xprt,
2689 void *dummy)
2690{
2691 return xprt_enable_swap(xprt);
2692}
2693
2486int 2694int
2487rpc_clnt_swap_activate(struct rpc_clnt *clnt) 2695rpc_clnt_swap_activate(struct rpc_clnt *clnt)
2488{ 2696{
2489 int ret = 0; 2697 if (atomic_inc_return(&clnt->cl_swapper) == 1)
2490 struct rpc_xprt *xprt; 2698 return rpc_clnt_iterate_for_each_xprt(clnt,
2491 2699 rpc_clnt_swap_activate_callback, NULL);
2492 if (atomic_inc_return(&clnt->cl_swapper) == 1) { 2700 return 0;
2493retry:
2494 rcu_read_lock();
2495 xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
2496 rcu_read_unlock();
2497 if (!xprt) {
2498 /*
2499 * If we didn't get a reference, then we likely are
2500 * racing with a migration event. Wait for a grace
2501 * period and try again.
2502 */
2503 synchronize_rcu();
2504 goto retry;
2505 }
2506
2507 ret = xprt_enable_swap(xprt);
2508 xprt_put(xprt);
2509 }
2510 return ret;
2511} 2701}
2512EXPORT_SYMBOL_GPL(rpc_clnt_swap_activate); 2702EXPORT_SYMBOL_GPL(rpc_clnt_swap_activate);
2513 2703
2704static int
2705rpc_clnt_swap_deactivate_callback(struct rpc_clnt *clnt,
2706 struct rpc_xprt *xprt,
2707 void *dummy)
2708{
2709 xprt_disable_swap(xprt);
2710 return 0;
2711}
2712
2514void 2713void
2515rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) 2714rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
2516{ 2715{
2517 struct rpc_xprt *xprt; 2716 if (atomic_dec_if_positive(&clnt->cl_swapper) == 0)
2518 2717 rpc_clnt_iterate_for_each_xprt(clnt,
2519 if (atomic_dec_if_positive(&clnt->cl_swapper) == 0) { 2718 rpc_clnt_swap_deactivate_callback, NULL);
2520retry:
2521 rcu_read_lock();
2522 xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
2523 rcu_read_unlock();
2524 if (!xprt) {
2525 /*
2526 * If we didn't get a reference, then we likely are
2527 * racing with a migration event. Wait for a grace
2528 * period and try again.
2529 */
2530 synchronize_rcu();
2531 goto retry;
2532 }
2533
2534 xprt_disable_swap(xprt);
2535 xprt_put(xprt);
2536 }
2537} 2719}
2538EXPORT_SYMBOL_GPL(rpc_clnt_swap_deactivate); 2720EXPORT_SYMBOL_GPL(rpc_clnt_swap_deactivate);
2539#endif /* CONFIG_SUNRPC_SWAP */ 2721#endif /* CONFIG_SUNRPC_SWAP */
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 31789ef3e614..fc48eca21fd2 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1390,8 +1390,8 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
1390 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); 1390 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1391 int err; 1391 int err;
1392 1392
1393 sb->s_blocksize = PAGE_CACHE_SIZE; 1393 sb->s_blocksize = PAGE_SIZE;
1394 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 1394 sb->s_blocksize_bits = PAGE_SHIFT;
1395 sb->s_magic = RPCAUTH_GSSMAGIC; 1395 sb->s_magic = RPCAUTH_GSSMAGIC;
1396 sb->s_op = &s_ops; 1396 sb->s_op = &s_ops;
1397 sb->s_d_op = &simple_dentry_operations; 1397 sb->s_d_op = &simple_dentry_operations;
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index cf5770d8f49a..5b30603596d0 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -648,10 +648,10 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
648static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt) 648static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt)
649{ 649{
650 struct rpc_clnt *parent = clnt->cl_parent; 650 struct rpc_clnt *parent = clnt->cl_parent;
651 struct rpc_xprt *xprt = rcu_dereference(clnt->cl_xprt); 651 struct rpc_xprt_switch *xps = rcu_access_pointer(clnt->cl_xpi.xpi_xpswitch);
652 652
653 while (parent != clnt) { 653 while (parent != clnt) {
654 if (rcu_dereference(parent->cl_xprt) != xprt) 654 if (rcu_access_pointer(parent->cl_xpi.xpi_xpswitch) != xps)
655 break; 655 break;
656 if (clnt->cl_autobind) 656 if (clnt->cl_autobind)
657 break; 657 break;
@@ -683,11 +683,9 @@ void rpcb_getport_async(struct rpc_task *task)
683 int status; 683 int status;
684 684
685 rcu_read_lock(); 685 rcu_read_lock();
686 do { 686 clnt = rpcb_find_transport_owner(task->tk_client);
687 clnt = rpcb_find_transport_owner(task->tk_client);
688 xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
689 } while (xprt == NULL);
690 rcu_read_unlock(); 687 rcu_read_unlock();
688 xprt = xprt_get(task->tk_xprt);
691 689
692 dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", 690 dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
693 task->tk_pid, __func__, 691 task->tk_pid, __func__,
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 73ad57a59989..fcfd48d263f6 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -909,6 +909,8 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
909 /* Initialize workqueue for async tasks */ 909 /* Initialize workqueue for async tasks */
910 task->tk_workqueue = task_setup_data->workqueue; 910 task->tk_workqueue = task_setup_data->workqueue;
911 911
912 task->tk_xprt = xprt_get(task_setup_data->rpc_xprt);
913
912 if (task->tk_ops->rpc_call_prepare != NULL) 914 if (task->tk_ops->rpc_call_prepare != NULL)
913 task->tk_action = rpc_prepare_task; 915 task->tk_action = rpc_prepare_task;
914 916
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 2df87f78e518..de70c78025d7 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -96,8 +96,8 @@ ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct
96 if (base || xdr->page_base) { 96 if (base || xdr->page_base) {
97 pglen -= base; 97 pglen -= base;
98 base += xdr->page_base; 98 base += xdr->page_base;
99 ppage += base >> PAGE_CACHE_SHIFT; 99 ppage += base >> PAGE_SHIFT;
100 base &= ~PAGE_CACHE_MASK; 100 base &= ~PAGE_MASK;
101 } 101 }
102 do { 102 do {
103 char *kaddr; 103 char *kaddr;
@@ -113,7 +113,7 @@ ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct
113 } 113 }
114 } 114 }
115 115
116 len = PAGE_CACHE_SIZE; 116 len = PAGE_SIZE;
117 kaddr = kmap_atomic(*ppage); 117 kaddr = kmap_atomic(*ppage);
118 if (base) { 118 if (base) {
119 len -= base; 119 len -= base;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 4439ac4c1b53..6bdb3865212d 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -164,7 +164,7 @@ EXPORT_SYMBOL_GPL(xdr_inline_pages);
164 * Note: the addresses pgto_base and pgfrom_base are both calculated in 164 * Note: the addresses pgto_base and pgfrom_base are both calculated in
165 * the same way: 165 * the same way:
166 * if a memory area starts at byte 'base' in page 'pages[i]', 166 * if a memory area starts at byte 'base' in page 'pages[i]',
167 * then its address is given as (i << PAGE_CACHE_SHIFT) + base 167 * then its address is given as (i << PAGE_SHIFT) + base
168 * Also note: pgfrom_base must be < pgto_base, but the memory areas 168 * Also note: pgfrom_base must be < pgto_base, but the memory areas
169 * they point to may overlap. 169 * they point to may overlap.
170 */ 170 */
@@ -181,20 +181,20 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base,
181 pgto_base += len; 181 pgto_base += len;
182 pgfrom_base += len; 182 pgfrom_base += len;
183 183
184 pgto = pages + (pgto_base >> PAGE_CACHE_SHIFT); 184 pgto = pages + (pgto_base >> PAGE_SHIFT);
185 pgfrom = pages + (pgfrom_base >> PAGE_CACHE_SHIFT); 185 pgfrom = pages + (pgfrom_base >> PAGE_SHIFT);
186 186
187 pgto_base &= ~PAGE_CACHE_MASK; 187 pgto_base &= ~PAGE_MASK;
188 pgfrom_base &= ~PAGE_CACHE_MASK; 188 pgfrom_base &= ~PAGE_MASK;
189 189
190 do { 190 do {
191 /* Are any pointers crossing a page boundary? */ 191 /* Are any pointers crossing a page boundary? */
192 if (pgto_base == 0) { 192 if (pgto_base == 0) {
193 pgto_base = PAGE_CACHE_SIZE; 193 pgto_base = PAGE_SIZE;
194 pgto--; 194 pgto--;
195 } 195 }
196 if (pgfrom_base == 0) { 196 if (pgfrom_base == 0) {
197 pgfrom_base = PAGE_CACHE_SIZE; 197 pgfrom_base = PAGE_SIZE;
198 pgfrom--; 198 pgfrom--;
199 } 199 }
200 200
@@ -236,11 +236,11 @@ _copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len)
236 char *vto; 236 char *vto;
237 size_t copy; 237 size_t copy;
238 238
239 pgto = pages + (pgbase >> PAGE_CACHE_SHIFT); 239 pgto = pages + (pgbase >> PAGE_SHIFT);
240 pgbase &= ~PAGE_CACHE_MASK; 240 pgbase &= ~PAGE_MASK;
241 241
242 for (;;) { 242 for (;;) {
243 copy = PAGE_CACHE_SIZE - pgbase; 243 copy = PAGE_SIZE - pgbase;
244 if (copy > len) 244 if (copy > len)
245 copy = len; 245 copy = len;
246 246
@@ -253,7 +253,7 @@ _copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len)
253 break; 253 break;
254 254
255 pgbase += copy; 255 pgbase += copy;
256 if (pgbase == PAGE_CACHE_SIZE) { 256 if (pgbase == PAGE_SIZE) {
257 flush_dcache_page(*pgto); 257 flush_dcache_page(*pgto);
258 pgbase = 0; 258 pgbase = 0;
259 pgto++; 259 pgto++;
@@ -280,11 +280,11 @@ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len)
280 char *vfrom; 280 char *vfrom;
281 size_t copy; 281 size_t copy;
282 282
283 pgfrom = pages + (pgbase >> PAGE_CACHE_SHIFT); 283 pgfrom = pages + (pgbase >> PAGE_SHIFT);
284 pgbase &= ~PAGE_CACHE_MASK; 284 pgbase &= ~PAGE_MASK;
285 285
286 do { 286 do {
287 copy = PAGE_CACHE_SIZE - pgbase; 287 copy = PAGE_SIZE - pgbase;
288 if (copy > len) 288 if (copy > len)
289 copy = len; 289 copy = len;
290 290
@@ -293,7 +293,7 @@ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len)
293 kunmap_atomic(vfrom); 293 kunmap_atomic(vfrom);
294 294
295 pgbase += copy; 295 pgbase += copy;
296 if (pgbase == PAGE_CACHE_SIZE) { 296 if (pgbase == PAGE_SIZE) {
297 pgbase = 0; 297 pgbase = 0;
298 pgfrom++; 298 pgfrom++;
299 } 299 }
@@ -1038,8 +1038,8 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
1038 if (base < buf->page_len) { 1038 if (base < buf->page_len) {
1039 subbuf->page_len = min(buf->page_len - base, len); 1039 subbuf->page_len = min(buf->page_len - base, len);
1040 base += buf->page_base; 1040 base += buf->page_base;
1041 subbuf->page_base = base & ~PAGE_CACHE_MASK; 1041 subbuf->page_base = base & ~PAGE_MASK;
1042 subbuf->pages = &buf->pages[base >> PAGE_CACHE_SHIFT]; 1042 subbuf->pages = &buf->pages[base >> PAGE_SHIFT];
1043 len -= subbuf->page_len; 1043 len -= subbuf->page_len;
1044 base = 0; 1044 base = 0;
1045 } else { 1045 } else {
@@ -1297,9 +1297,9 @@ xdr_xcode_array2(struct xdr_buf *buf, unsigned int base,
1297 todo -= avail_here; 1297 todo -= avail_here;
1298 1298
1299 base += buf->page_base; 1299 base += buf->page_base;
1300 ppages = buf->pages + (base >> PAGE_CACHE_SHIFT); 1300 ppages = buf->pages + (base >> PAGE_SHIFT);
1301 base &= ~PAGE_CACHE_MASK; 1301 base &= ~PAGE_MASK;
1302 avail_page = min_t(unsigned int, PAGE_CACHE_SIZE - base, 1302 avail_page = min_t(unsigned int, PAGE_SIZE - base,
1303 avail_here); 1303 avail_here);
1304 c = kmap(*ppages) + base; 1304 c = kmap(*ppages) + base;
1305 1305
@@ -1383,7 +1383,7 @@ xdr_xcode_array2(struct xdr_buf *buf, unsigned int base,
1383 } 1383 }
1384 1384
1385 avail_page = min(avail_here, 1385 avail_page = min(avail_here,
1386 (unsigned int) PAGE_CACHE_SIZE); 1386 (unsigned int) PAGE_SIZE);
1387 } 1387 }
1388 base = buf->page_len; /* align to start of tail */ 1388 base = buf->page_len; /* align to start of tail */
1389 } 1389 }
@@ -1479,9 +1479,9 @@ xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len,
1479 if (page_len > len) 1479 if (page_len > len)
1480 page_len = len; 1480 page_len = len;
1481 len -= page_len; 1481 len -= page_len;
1482 page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1); 1482 page_offset = (offset + buf->page_base) & (PAGE_SIZE - 1);
1483 i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT; 1483 i = (offset + buf->page_base) >> PAGE_SHIFT;
1484 thislen = PAGE_CACHE_SIZE - page_offset; 1484 thislen = PAGE_SIZE - page_offset;
1485 do { 1485 do {
1486 if (thislen > page_len) 1486 if (thislen > page_len)
1487 thislen = page_len; 1487 thislen = page_len;
@@ -1492,7 +1492,7 @@ xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len,
1492 page_len -= thislen; 1492 page_len -= thislen;
1493 i++; 1493 i++;
1494 page_offset = 0; 1494 page_offset = 0;
1495 thislen = PAGE_CACHE_SIZE; 1495 thislen = PAGE_SIZE;
1496 } while (page_len != 0); 1496 } while (page_len != 0);
1497 offset = 0; 1497 offset = 0;
1498 } 1498 }
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 37edea6fa92d..216a1385718a 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -48,6 +48,7 @@
48#include <linux/sunrpc/clnt.h> 48#include <linux/sunrpc/clnt.h>
49#include <linux/sunrpc/metrics.h> 49#include <linux/sunrpc/metrics.h>
50#include <linux/sunrpc/bc_xprt.h> 50#include <linux/sunrpc/bc_xprt.h>
51#include <linux/rcupdate.h>
51 52
52#include <trace/events/sunrpc.h> 53#include <trace/events/sunrpc.h>
53 54
@@ -1166,7 +1167,7 @@ void xprt_free(struct rpc_xprt *xprt)
1166{ 1167{
1167 put_net(xprt->xprt_net); 1168 put_net(xprt->xprt_net);
1168 xprt_free_all_slots(xprt); 1169 xprt_free_all_slots(xprt);
1169 kfree(xprt); 1170 kfree_rcu(xprt, rcu);
1170} 1171}
1171EXPORT_SYMBOL_GPL(xprt_free); 1172EXPORT_SYMBOL_GPL(xprt_free);
1172 1173
@@ -1180,7 +1181,7 @@ EXPORT_SYMBOL_GPL(xprt_free);
1180 */ 1181 */
1181void xprt_reserve(struct rpc_task *task) 1182void xprt_reserve(struct rpc_task *task)
1182{ 1183{
1183 struct rpc_xprt *xprt; 1184 struct rpc_xprt *xprt = task->tk_xprt;
1184 1185
1185 task->tk_status = 0; 1186 task->tk_status = 0;
1186 if (task->tk_rqstp != NULL) 1187 if (task->tk_rqstp != NULL)
@@ -1188,11 +1189,8 @@ void xprt_reserve(struct rpc_task *task)
1188 1189
1189 task->tk_timeout = 0; 1190 task->tk_timeout = 0;
1190 task->tk_status = -EAGAIN; 1191 task->tk_status = -EAGAIN;
1191 rcu_read_lock();
1192 xprt = rcu_dereference(task->tk_client->cl_xprt);
1193 if (!xprt_throttle_congested(xprt, task)) 1192 if (!xprt_throttle_congested(xprt, task))
1194 xprt->ops->alloc_slot(xprt, task); 1193 xprt->ops->alloc_slot(xprt, task);
1195 rcu_read_unlock();
1196} 1194}
1197 1195
1198/** 1196/**
@@ -1206,7 +1204,7 @@ void xprt_reserve(struct rpc_task *task)
1206 */ 1204 */
1207void xprt_retry_reserve(struct rpc_task *task) 1205void xprt_retry_reserve(struct rpc_task *task)
1208{ 1206{
1209 struct rpc_xprt *xprt; 1207 struct rpc_xprt *xprt = task->tk_xprt;
1210 1208
1211 task->tk_status = 0; 1209 task->tk_status = 0;
1212 if (task->tk_rqstp != NULL) 1210 if (task->tk_rqstp != NULL)
@@ -1214,10 +1212,7 @@ void xprt_retry_reserve(struct rpc_task *task)
1214 1212
1215 task->tk_timeout = 0; 1213 task->tk_timeout = 0;
1216 task->tk_status = -EAGAIN; 1214 task->tk_status = -EAGAIN;
1217 rcu_read_lock();
1218 xprt = rcu_dereference(task->tk_client->cl_xprt);
1219 xprt->ops->alloc_slot(xprt, task); 1215 xprt->ops->alloc_slot(xprt, task);
1220 rcu_read_unlock();
1221} 1216}
1222 1217
1223static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) 1218static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
@@ -1264,11 +1259,9 @@ void xprt_release(struct rpc_task *task)
1264 1259
1265 if (req == NULL) { 1260 if (req == NULL) {
1266 if (task->tk_client) { 1261 if (task->tk_client) {
1267 rcu_read_lock(); 1262 xprt = task->tk_xprt;
1268 xprt = rcu_dereference(task->tk_client->cl_xprt);
1269 if (xprt->snd_task == task) 1263 if (xprt->snd_task == task)
1270 xprt_release_write(xprt, task); 1264 xprt_release_write(xprt, task);
1271 rcu_read_unlock();
1272 } 1265 }
1273 return; 1266 return;
1274 } 1267 }
@@ -1307,7 +1300,7 @@ void xprt_release(struct rpc_task *task)
1307 1300
1308static void xprt_init(struct rpc_xprt *xprt, struct net *net) 1301static void xprt_init(struct rpc_xprt *xprt, struct net *net)
1309{ 1302{
1310 atomic_set(&xprt->count, 1); 1303 kref_init(&xprt->kref);
1311 1304
1312 spin_lock_init(&xprt->transport_lock); 1305 spin_lock_init(&xprt->transport_lock);
1313 spin_lock_init(&xprt->reserve_lock); 1306 spin_lock_init(&xprt->reserve_lock);
@@ -1318,6 +1311,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
1318 spin_lock_init(&xprt->bc_pa_lock); 1311 spin_lock_init(&xprt->bc_pa_lock);
1319 INIT_LIST_HEAD(&xprt->bc_pa_list); 1312 INIT_LIST_HEAD(&xprt->bc_pa_list);
1320#endif /* CONFIG_SUNRPC_BACKCHANNEL */ 1313#endif /* CONFIG_SUNRPC_BACKCHANNEL */
1314 INIT_LIST_HEAD(&xprt->xprt_switch);
1321 1315
1322 xprt->last_used = jiffies; 1316 xprt->last_used = jiffies;
1323 xprt->cwnd = RPC_INITCWND; 1317 xprt->cwnd = RPC_INITCWND;
@@ -1415,6 +1409,24 @@ static void xprt_destroy(struct rpc_xprt *xprt)
1415 xprt->ops->destroy(xprt); 1409 xprt->ops->destroy(xprt);
1416} 1410}
1417 1411
1412static void xprt_destroy_kref(struct kref *kref)
1413{
1414 xprt_destroy(container_of(kref, struct rpc_xprt, kref));
1415}
1416
1417/**
1418 * xprt_get - return a reference to an RPC transport.
1419 * @xprt: pointer to the transport
1420 *
1421 */
1422struct rpc_xprt *xprt_get(struct rpc_xprt *xprt)
1423{
1424 if (xprt != NULL && kref_get_unless_zero(&xprt->kref))
1425 return xprt;
1426 return NULL;
1427}
1428EXPORT_SYMBOL_GPL(xprt_get);
1429
1418/** 1430/**
1419 * xprt_put - release a reference to an RPC transport. 1431 * xprt_put - release a reference to an RPC transport.
1420 * @xprt: pointer to the transport 1432 * @xprt: pointer to the transport
@@ -1422,7 +1434,7 @@ static void xprt_destroy(struct rpc_xprt *xprt)
1422 */ 1434 */
1423void xprt_put(struct rpc_xprt *xprt) 1435void xprt_put(struct rpc_xprt *xprt)
1424{ 1436{
1425 if (atomic_dec_and_test(&xprt->count)) 1437 if (xprt != NULL)
1426 xprt_destroy(xprt); 1438 kref_put(&xprt->kref, xprt_destroy_kref);
1427} 1439}
1428EXPORT_SYMBOL_GPL(xprt_put); 1440EXPORT_SYMBOL_GPL(xprt_put);
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
new file mode 100644
index 000000000000..e7fd76975d86
--- /dev/null
+++ b/net/sunrpc/xprtmultipath.c
@@ -0,0 +1,475 @@
1/*
2 * Multipath support for RPC
3 *
4 * Copyright (c) 2015, 2016, Primary Data, Inc. All rights reserved.
5 *
6 * Trond Myklebust <trond.myklebust@primarydata.com>
7 *
8 */
9#include <linux/types.h>
10#include <linux/kref.h>
11#include <linux/list.h>
12#include <linux/rcupdate.h>
13#include <linux/rculist.h>
14#include <linux/slab.h>
15#include <asm/cmpxchg.h>
16#include <linux/spinlock.h>
17#include <linux/sunrpc/xprt.h>
18#include <linux/sunrpc/xprtmultipath.h>
19
20typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct list_head *head,
21 const struct rpc_xprt *cur);
22
23static const struct rpc_xprt_iter_ops rpc_xprt_iter_singular;
24static const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin;
25static const struct rpc_xprt_iter_ops rpc_xprt_iter_listall;
26
27static void xprt_switch_add_xprt_locked(struct rpc_xprt_switch *xps,
28 struct rpc_xprt *xprt)
29{
30 if (unlikely(xprt_get(xprt) == NULL))
31 return;
32 list_add_tail_rcu(&xprt->xprt_switch, &xps->xps_xprt_list);
33 smp_wmb();
34 if (xps->xps_nxprts == 0)
35 xps->xps_net = xprt->xprt_net;
36 xps->xps_nxprts++;
37}
38
39/**
40 * rpc_xprt_switch_add_xprt - Add a new rpc_xprt to an rpc_xprt_switch
41 * @xps: pointer to struct rpc_xprt_switch
42 * @xprt: pointer to struct rpc_xprt
43 *
44 * Adds xprt to the end of the list of struct rpc_xprt in xps.
45 */
46void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps,
47 struct rpc_xprt *xprt)
48{
49 if (xprt == NULL)
50 return;
51 spin_lock(&xps->xps_lock);
52 if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL)
53 xprt_switch_add_xprt_locked(xps, xprt);
54 spin_unlock(&xps->xps_lock);
55}
56
57static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps,
58 struct rpc_xprt *xprt)
59{
60 if (unlikely(xprt == NULL))
61 return;
62 xps->xps_nxprts--;
63 if (xps->xps_nxprts == 0)
64 xps->xps_net = NULL;
65 smp_wmb();
66 list_del_rcu(&xprt->xprt_switch);
67}
68
69/**
70 * rpc_xprt_switch_remove_xprt - Removes an rpc_xprt from a rpc_xprt_switch
71 * @xps: pointer to struct rpc_xprt_switch
72 * @xprt: pointer to struct rpc_xprt
73 *
74 * Removes xprt from the list of struct rpc_xprt in xps.
75 */
76void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps,
77 struct rpc_xprt *xprt)
78{
79 spin_lock(&xps->xps_lock);
80 xprt_switch_remove_xprt_locked(xps, xprt);
81 spin_unlock(&xps->xps_lock);
82 xprt_put(xprt);
83}
84
85/**
86 * xprt_switch_alloc - Allocate a new struct rpc_xprt_switch
87 * @xprt: pointer to struct rpc_xprt
88 * @gfp_flags: allocation flags
89 *
90 * On success, returns an initialised struct rpc_xprt_switch, containing
91 * the entry xprt. Returns NULL on failure.
92 */
93struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt,
94 gfp_t gfp_flags)
95{
96 struct rpc_xprt_switch *xps;
97
98 xps = kmalloc(sizeof(*xps), gfp_flags);
99 if (xps != NULL) {
100 spin_lock_init(&xps->xps_lock);
101 kref_init(&xps->xps_kref);
102 xps->xps_nxprts = 0;
103 INIT_LIST_HEAD(&xps->xps_xprt_list);
104 xps->xps_iter_ops = &rpc_xprt_iter_singular;
105 xprt_switch_add_xprt_locked(xps, xprt);
106 }
107
108 return xps;
109}
110
111static void xprt_switch_free_entries(struct rpc_xprt_switch *xps)
112{
113 spin_lock(&xps->xps_lock);
114 while (!list_empty(&xps->xps_xprt_list)) {
115 struct rpc_xprt *xprt;
116
117 xprt = list_first_entry(&xps->xps_xprt_list,
118 struct rpc_xprt, xprt_switch);
119 xprt_switch_remove_xprt_locked(xps, xprt);
120 spin_unlock(&xps->xps_lock);
121 xprt_put(xprt);
122 spin_lock(&xps->xps_lock);
123 }
124 spin_unlock(&xps->xps_lock);
125}
126
127static void xprt_switch_free(struct kref *kref)
128{
129 struct rpc_xprt_switch *xps = container_of(kref,
130 struct rpc_xprt_switch, xps_kref);
131
132 xprt_switch_free_entries(xps);
133 kfree_rcu(xps, xps_rcu);
134}
135
136/**
137 * xprt_switch_get - Return a reference to a rpc_xprt_switch
138 * @xps: pointer to struct rpc_xprt_switch
139 *
140 * Returns a reference to xps unless the refcount is already zero.
141 */
142struct rpc_xprt_switch *xprt_switch_get(struct rpc_xprt_switch *xps)
143{
144 if (xps != NULL && kref_get_unless_zero(&xps->xps_kref))
145 return xps;
146 return NULL;
147}
148
149/**
150 * xprt_switch_put - Release a reference to a rpc_xprt_switch
151 * @xps: pointer to struct rpc_xprt_switch
152 *
153 * Release the reference to xps, and free it once the refcount is zero.
154 */
155void xprt_switch_put(struct rpc_xprt_switch *xps)
156{
157 if (xps != NULL)
158 kref_put(&xps->xps_kref, xprt_switch_free);
159}
160
161/**
162 * rpc_xprt_switch_set_roundrobin - Set a round-robin policy on rpc_xprt_switch
163 * @xps: pointer to struct rpc_xprt_switch
164 *
165 * Sets a round-robin default policy for iterators acting on xps.
166 */
167void rpc_xprt_switch_set_roundrobin(struct rpc_xprt_switch *xps)
168{
169 if (READ_ONCE(xps->xps_iter_ops) != &rpc_xprt_iter_roundrobin)
170 WRITE_ONCE(xps->xps_iter_ops, &rpc_xprt_iter_roundrobin);
171}
172
173static
174const struct rpc_xprt_iter_ops *xprt_iter_ops(const struct rpc_xprt_iter *xpi)
175{
176 if (xpi->xpi_ops != NULL)
177 return xpi->xpi_ops;
178 return rcu_dereference(xpi->xpi_xpswitch)->xps_iter_ops;
179}
180
181static
182void xprt_iter_no_rewind(struct rpc_xprt_iter *xpi)
183{
184}
185
186static
187void xprt_iter_default_rewind(struct rpc_xprt_iter *xpi)
188{
189 WRITE_ONCE(xpi->xpi_cursor, NULL);
190}
191
192static
193struct rpc_xprt *xprt_switch_find_first_entry(struct list_head *head)
194{
195 return list_first_or_null_rcu(head, struct rpc_xprt, xprt_switch);
196}
197
198static
199struct rpc_xprt *xprt_iter_first_entry(struct rpc_xprt_iter *xpi)
200{
201 struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
202
203 if (xps == NULL)
204 return NULL;
205 return xprt_switch_find_first_entry(&xps->xps_xprt_list);
206}
207
208static
209struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head,
210 const struct rpc_xprt *cur)
211{
212 struct rpc_xprt *pos;
213
214 list_for_each_entry_rcu(pos, head, xprt_switch) {
215 if (cur == pos)
216 return pos;
217 }
218 return NULL;
219}
220
221static
222struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi)
223{
224 struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
225 struct list_head *head;
226
227 if (xps == NULL)
228 return NULL;
229 head = &xps->xps_xprt_list;
230 if (xpi->xpi_cursor == NULL || xps->xps_nxprts < 2)
231 return xprt_switch_find_first_entry(head);
232 return xprt_switch_find_current_entry(head, xpi->xpi_cursor);
233}
234
235static
236struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
237 const struct rpc_xprt *cur)
238{
239 struct rpc_xprt *pos, *prev = NULL;
240
241 list_for_each_entry_rcu(pos, head, xprt_switch) {
242 if (cur == prev)
243 return pos;
244 prev = pos;
245 }
246 return NULL;
247}
248
249static
250struct rpc_xprt *xprt_switch_set_next_cursor(struct list_head *head,
251 struct rpc_xprt **cursor,
252 xprt_switch_find_xprt_t find_next)
253{
254 struct rpc_xprt *cur, *pos, *old;
255
256 cur = READ_ONCE(*cursor);
257 for (;;) {
258 old = cur;
259 pos = find_next(head, old);
260 if (pos == NULL)
261 break;
262 cur = cmpxchg_relaxed(cursor, old, pos);
263 if (cur == old)
264 break;
265 }
266 return pos;
267}
268
269static
270struct rpc_xprt *xprt_iter_next_entry_multiple(struct rpc_xprt_iter *xpi,
271 xprt_switch_find_xprt_t find_next)
272{
273 struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
274 struct list_head *head;
275
276 if (xps == NULL)
277 return NULL;
278 head = &xps->xps_xprt_list;
279 if (xps->xps_nxprts < 2)
280 return xprt_switch_find_first_entry(head);
281 return xprt_switch_set_next_cursor(head, &xpi->xpi_cursor, find_next);
282}
283
284static
285struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct list_head *head,
286 const struct rpc_xprt *cur)
287{
288 struct rpc_xprt *ret;
289
290 ret = xprt_switch_find_next_entry(head, cur);
291 if (ret != NULL)
292 return ret;
293 return xprt_switch_find_first_entry(head);
294}
295
296static
297struct rpc_xprt *xprt_iter_next_entry_roundrobin(struct rpc_xprt_iter *xpi)
298{
299 return xprt_iter_next_entry_multiple(xpi,
300 xprt_switch_find_next_entry_roundrobin);
301}
302
303static
304struct rpc_xprt *xprt_iter_next_entry_all(struct rpc_xprt_iter *xpi)
305{
306 return xprt_iter_next_entry_multiple(xpi, xprt_switch_find_next_entry);
307}
308
309/*
310 * xprt_iter_rewind - Resets the xprt iterator
311 * @xpi: pointer to rpc_xprt_iter
312 *
313 * Resets xpi to ensure that it points to the first entry in the list
314 * of transports.
315 */
316static
317void xprt_iter_rewind(struct rpc_xprt_iter *xpi)
318{
319 rcu_read_lock();
320 xprt_iter_ops(xpi)->xpi_rewind(xpi);
321 rcu_read_unlock();
322}
323
324static void __xprt_iter_init(struct rpc_xprt_iter *xpi,
325 struct rpc_xprt_switch *xps,
326 const struct rpc_xprt_iter_ops *ops)
327{
328 rcu_assign_pointer(xpi->xpi_xpswitch, xprt_switch_get(xps));
329 xpi->xpi_cursor = NULL;
330 xpi->xpi_ops = ops;
331}
332
333/**
334 * xprt_iter_init - Initialise an xprt iterator
335 * @xpi: pointer to rpc_xprt_iter
336 * @xps: pointer to rpc_xprt_switch
337 *
338 * Initialises the iterator to use the default iterator ops
339 * as set in xps. This function is mainly intended for internal
340 * use in the rpc_client.
341 */
342void xprt_iter_init(struct rpc_xprt_iter *xpi,
343 struct rpc_xprt_switch *xps)
344{
345 __xprt_iter_init(xpi, xps, NULL);
346}
347
348/**
349 * xprt_iter_init_listall - Initialise an xprt iterator
350 * @xpi: pointer to rpc_xprt_iter
351 * @xps: pointer to rpc_xprt_switch
352 *
353 * Initialises the iterator to iterate once through the entire list
354 * of entries in xps.
355 */
356void xprt_iter_init_listall(struct rpc_xprt_iter *xpi,
357 struct rpc_xprt_switch *xps)
358{
359 __xprt_iter_init(xpi, xps, &rpc_xprt_iter_listall);
360}
361
362/**
363 * xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch
364 * @xpi: pointer to rpc_xprt_iter
365 * @xps: pointer to a new rpc_xprt_switch or NULL
366 *
367 * Swaps out the existing xpi->xpi_xpswitch with a new value.
368 */
369struct rpc_xprt_switch *xprt_iter_xchg_switch(struct rpc_xprt_iter *xpi,
370 struct rpc_xprt_switch *newswitch)
371{
372 struct rpc_xprt_switch __rcu *oldswitch;
373
374 /* Atomically swap out the old xpswitch */
375 oldswitch = xchg(&xpi->xpi_xpswitch, RCU_INITIALIZER(newswitch));
376 if (newswitch != NULL)
377 xprt_iter_rewind(xpi);
378 return rcu_dereference_protected(oldswitch, true);
379}
380
381/**
382 * xprt_iter_destroy - Destroys the xprt iterator
383 * @xpi pointer to rpc_xprt_iter
384 */
385void xprt_iter_destroy(struct rpc_xprt_iter *xpi)
386{
387 xprt_switch_put(xprt_iter_xchg_switch(xpi, NULL));
388}
389
390/**
391 * xprt_iter_xprt - Returns the rpc_xprt pointed to by the cursor
392 * @xpi: pointer to rpc_xprt_iter
393 *
394 * Returns a pointer to the struct rpc_xprt that is currently
395 * pointed to by the cursor.
396 * Caller must be holding rcu_read_lock().
397 */
398struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi)
399{
400 WARN_ON_ONCE(!rcu_read_lock_held());
401 return xprt_iter_ops(xpi)->xpi_xprt(xpi);
402}
403
404static
405struct rpc_xprt *xprt_iter_get_helper(struct rpc_xprt_iter *xpi,
406 struct rpc_xprt *(*fn)(struct rpc_xprt_iter *))
407{
408 struct rpc_xprt *ret;
409
410 do {
411 ret = fn(xpi);
412 if (ret == NULL)
413 break;
414 ret = xprt_get(ret);
415 } while (ret == NULL);
416 return ret;
417}
418
419/**
420 * xprt_iter_get_xprt - Returns the rpc_xprt pointed to by the cursor
421 * @xpi: pointer to rpc_xprt_iter
422 *
423 * Returns a reference to the struct rpc_xprt that is currently
424 * pointed to by the cursor.
425 */
426struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi)
427{
428 struct rpc_xprt *xprt;
429
430 rcu_read_lock();
431 xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_xprt);
432 rcu_read_unlock();
433 return xprt;
434}
435
436/**
437 * xprt_iter_get_next - Returns the next rpc_xprt following the cursor
438 * @xpi: pointer to rpc_xprt_iter
439 *
440 * Returns a reference to the struct rpc_xprt that immediately follows the
441 * entry pointed to by the cursor.
442 */
443struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi)
444{
445 struct rpc_xprt *xprt;
446
447 rcu_read_lock();
448 xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_next);
449 rcu_read_unlock();
450 return xprt;
451}
452
453/* Policy for always returning the first entry in the rpc_xprt_switch */
454static
455const struct rpc_xprt_iter_ops rpc_xprt_iter_singular = {
456 .xpi_rewind = xprt_iter_no_rewind,
457 .xpi_xprt = xprt_iter_first_entry,
458 .xpi_next = xprt_iter_first_entry,
459};
460
461/* Policy for round-robin iteration of entries in the rpc_xprt_switch */
462static
463const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin = {
464 .xpi_rewind = xprt_iter_default_rewind,
465 .xpi_xprt = xprt_iter_current_entry,
466 .xpi_next = xprt_iter_next_entry_roundrobin,
467};
468
469/* Policy for once-through iteration of entries in the rpc_xprt_switch */
470static
471const struct rpc_xprt_iter_ops rpc_xprt_iter_listall = {
472 .xpi_rewind = xprt_iter_default_rewind,
473 .xpi_xprt = xprt_iter_current_entry,
474 .xpi_next = xprt_iter_next_entry_all,
475};
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index c14f3a4bff68..b289e106540b 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -80,13 +80,13 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
80 if (!r) 80 if (!r)
81 goto out; 81 goto out;
82 82
83 r->r.fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES * 83 r->fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES *
84 sizeof(u64), GFP_KERNEL); 84 sizeof(u64), GFP_KERNEL);
85 if (!r->r.fmr.physaddrs) 85 if (!r->fmr.physaddrs)
86 goto out_free; 86 goto out_free;
87 87
88 r->r.fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr); 88 r->fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
89 if (IS_ERR(r->r.fmr.fmr)) 89 if (IS_ERR(r->fmr.fmr))
90 goto out_fmr_err; 90 goto out_fmr_err;
91 91
92 list_add(&r->mw_list, &buf->rb_mws); 92 list_add(&r->mw_list, &buf->rb_mws);
@@ -95,9 +95,9 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
95 return 0; 95 return 0;
96 96
97out_fmr_err: 97out_fmr_err:
98 rc = PTR_ERR(r->r.fmr.fmr); 98 rc = PTR_ERR(r->fmr.fmr);
99 dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc); 99 dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc);
100 kfree(r->r.fmr.physaddrs); 100 kfree(r->fmr.physaddrs);
101out_free: 101out_free:
102 kfree(r); 102 kfree(r);
103out: 103out:
@@ -109,7 +109,7 @@ __fmr_unmap(struct rpcrdma_mw *r)
109{ 109{
110 LIST_HEAD(l); 110 LIST_HEAD(l);
111 111
112 list_add(&r->r.fmr.fmr->list, &l); 112 list_add(&r->fmr.fmr->list, &l);
113 return ib_unmap_fmr(&l); 113 return ib_unmap_fmr(&l);
114} 114}
115 115
@@ -148,7 +148,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
148 nsegs = RPCRDMA_MAX_FMR_SGES; 148 nsegs = RPCRDMA_MAX_FMR_SGES;
149 for (i = 0; i < nsegs;) { 149 for (i = 0; i < nsegs;) {
150 rpcrdma_map_one(device, seg, direction); 150 rpcrdma_map_one(device, seg, direction);
151 mw->r.fmr.physaddrs[i] = seg->mr_dma; 151 mw->fmr.physaddrs[i] = seg->mr_dma;
152 len += seg->mr_len; 152 len += seg->mr_len;
153 ++seg; 153 ++seg;
154 ++i; 154 ++i;
@@ -158,13 +158,13 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
158 break; 158 break;
159 } 159 }
160 160
161 rc = ib_map_phys_fmr(mw->r.fmr.fmr, mw->r.fmr.physaddrs, 161 rc = ib_map_phys_fmr(mw->fmr.fmr, mw->fmr.physaddrs,
162 i, seg1->mr_dma); 162 i, seg1->mr_dma);
163 if (rc) 163 if (rc)
164 goto out_maperr; 164 goto out_maperr;
165 165
166 seg1->rl_mw = mw; 166 seg1->rl_mw = mw;
167 seg1->mr_rkey = mw->r.fmr.fmr->rkey; 167 seg1->mr_rkey = mw->fmr.fmr->rkey;
168 seg1->mr_base = seg1->mr_dma + pageoff; 168 seg1->mr_base = seg1->mr_dma + pageoff;
169 seg1->mr_nsegs = i; 169 seg1->mr_nsegs = i;
170 seg1->mr_len = len; 170 seg1->mr_len = len;
@@ -219,7 +219,7 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
219 seg = &req->rl_segments[i]; 219 seg = &req->rl_segments[i];
220 mw = seg->rl_mw; 220 mw = seg->rl_mw;
221 221
222 list_add(&mw->r.fmr.fmr->list, &unmap_list); 222 list_add(&mw->fmr.fmr->list, &unmap_list);
223 223
224 i += seg->mr_nsegs; 224 i += seg->mr_nsegs;
225 } 225 }
@@ -281,9 +281,9 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)
281 while (!list_empty(&buf->rb_all)) { 281 while (!list_empty(&buf->rb_all)) {
282 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); 282 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
283 list_del(&r->mw_all); 283 list_del(&r->mw_all);
284 kfree(r->r.fmr.physaddrs); 284 kfree(r->fmr.physaddrs);
285 285
286 rc = ib_dealloc_fmr(r->r.fmr.fmr); 286 rc = ib_dealloc_fmr(r->fmr.fmr);
287 if (rc) 287 if (rc)
288 dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", 288 dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
289 __func__, rc); 289 __func__, rc);
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index e16567389e28..c250924a9fd3 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -109,20 +109,20 @@ static void
109__frwr_recovery_worker(struct work_struct *work) 109__frwr_recovery_worker(struct work_struct *work)
110{ 110{
111 struct rpcrdma_mw *r = container_of(work, struct rpcrdma_mw, 111 struct rpcrdma_mw *r = container_of(work, struct rpcrdma_mw,
112 r.frmr.fr_work); 112 frmr.fr_work);
113 struct rpcrdma_xprt *r_xprt = r->r.frmr.fr_xprt; 113 struct rpcrdma_xprt *r_xprt = r->frmr.fr_xprt;
114 unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; 114 unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
115 struct ib_pd *pd = r_xprt->rx_ia.ri_pd; 115 struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
116 116
117 if (ib_dereg_mr(r->r.frmr.fr_mr)) 117 if (ib_dereg_mr(r->frmr.fr_mr))
118 goto out_fail; 118 goto out_fail;
119 119
120 r->r.frmr.fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); 120 r->frmr.fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
121 if (IS_ERR(r->r.frmr.fr_mr)) 121 if (IS_ERR(r->frmr.fr_mr))
122 goto out_fail; 122 goto out_fail;
123 123
124 dprintk("RPC: %s: recovered FRMR %p\n", __func__, r); 124 dprintk("RPC: %s: recovered FRMR %p\n", __func__, r);
125 r->r.frmr.fr_state = FRMR_IS_INVALID; 125 r->frmr.fr_state = FRMR_IS_INVALID;
126 rpcrdma_put_mw(r_xprt, r); 126 rpcrdma_put_mw(r_xprt, r);
127 return; 127 return;
128 128
@@ -137,15 +137,15 @@ out_fail:
137static void 137static void
138__frwr_queue_recovery(struct rpcrdma_mw *r) 138__frwr_queue_recovery(struct rpcrdma_mw *r)
139{ 139{
140 INIT_WORK(&r->r.frmr.fr_work, __frwr_recovery_worker); 140 INIT_WORK(&r->frmr.fr_work, __frwr_recovery_worker);
141 queue_work(frwr_recovery_wq, &r->r.frmr.fr_work); 141 queue_work(frwr_recovery_wq, &r->frmr.fr_work);
142} 142}
143 143
144static int 144static int
145__frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, 145__frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
146 unsigned int depth) 146 unsigned int depth)
147{ 147{
148 struct rpcrdma_frmr *f = &r->r.frmr; 148 struct rpcrdma_frmr *f = &r->frmr;
149 int rc; 149 int rc;
150 150
151 f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); 151 f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
@@ -158,6 +158,8 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
158 158
159 sg_init_table(f->sg, depth); 159 sg_init_table(f->sg, depth);
160 160
161 init_completion(&f->fr_linv_done);
162
161 return 0; 163 return 0;
162 164
163out_mr_err: 165out_mr_err:
@@ -179,11 +181,11 @@ __frwr_release(struct rpcrdma_mw *r)
179{ 181{
180 int rc; 182 int rc;
181 183
182 rc = ib_dereg_mr(r->r.frmr.fr_mr); 184 rc = ib_dereg_mr(r->frmr.fr_mr);
183 if (rc) 185 if (rc)
184 dprintk("RPC: %s: ib_dereg_mr status %i\n", 186 dprintk("RPC: %s: ib_dereg_mr status %i\n",
185 __func__, rc); 187 __func__, rc);
186 kfree(r->r.frmr.sg); 188 kfree(r->frmr.sg);
187} 189}
188 190
189static int 191static int
@@ -244,39 +246,76 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
244 rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth); 246 rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
245} 247}
246 248
247/* If FAST_REG or LOCAL_INV failed, indicate the frmr needs 249static void
248 * to be reset. 250__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_frmr *frmr,
251 const char *wr)
252{
253 frmr->fr_state = FRMR_IS_STALE;
254 if (wc->status != IB_WC_WR_FLUSH_ERR)
255 pr_err("rpcrdma: %s: %s (%u/0x%x)\n",
256 wr, ib_wc_status_msg(wc->status),
257 wc->status, wc->vendor_err);
258}
259
260/**
261 * frwr_wc_fastreg - Invoked by RDMA provider for each polled FastReg WC
262 * @cq: completion queue (ignored)
263 * @wc: completed WR
249 * 264 *
250 * WARNING: Only wr_id and status are reliable at this point
251 */ 265 */
252static void 266static void
253__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_mw *r) 267frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
254{ 268{
255 if (likely(wc->status == IB_WC_SUCCESS)) 269 struct rpcrdma_frmr *frmr;
256 return; 270 struct ib_cqe *cqe;
257
258 /* WARNING: Only wr_id and status are reliable at this point */
259 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
260 if (wc->status == IB_WC_WR_FLUSH_ERR)
261 dprintk("RPC: %s: frmr %p flushed\n", __func__, r);
262 else
263 pr_warn("RPC: %s: frmr %p error, status %s (%d)\n",
264 __func__, r, ib_wc_status_msg(wc->status), wc->status);
265 271
266 r->r.frmr.fr_state = FRMR_IS_STALE; 272 /* WARNING: Only wr_cqe and status are reliable at this point */
273 if (wc->status != IB_WC_SUCCESS) {
274 cqe = wc->wr_cqe;
275 frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
276 __frwr_sendcompletion_flush(wc, frmr, "fastreg");
277 }
267} 278}
268 279
280/**
281 * frwr_wc_localinv - Invoked by RDMA provider for each polled LocalInv WC
282 * @cq: completion queue (ignored)
283 * @wc: completed WR
284 *
285 */
269static void 286static void
270frwr_sendcompletion(struct ib_wc *wc) 287frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
271{ 288{
272 struct rpcrdma_mw *r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; 289 struct rpcrdma_frmr *frmr;
273 struct rpcrdma_frmr *f = &r->r.frmr; 290 struct ib_cqe *cqe;
274 291
275 if (unlikely(wc->status != IB_WC_SUCCESS)) 292 /* WARNING: Only wr_cqe and status are reliable at this point */
276 __frwr_sendcompletion_flush(wc, r); 293 if (wc->status != IB_WC_SUCCESS) {
294 cqe = wc->wr_cqe;
295 frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
296 __frwr_sendcompletion_flush(wc, frmr, "localinv");
297 }
298}
277 299
278 if (f->fr_waiter) 300/**
279 complete(&f->fr_linv_done); 301 * frwr_wc_localinv - Invoked by RDMA provider for each polled LocalInv WC
302 * @cq: completion queue (ignored)
303 * @wc: completed WR
304 *
305 * Awaken anyone waiting for an MR to finish being fenced.
306 */
307static void
308frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
309{
310 struct rpcrdma_frmr *frmr;
311 struct ib_cqe *cqe;
312
313 /* WARNING: Only wr_cqe and status are reliable at this point */
314 cqe = wc->wr_cqe;
315 frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
316 if (wc->status != IB_WC_SUCCESS)
317 __frwr_sendcompletion_flush(wc, frmr, "localinv");
318 complete_all(&frmr->fr_linv_done);
280} 319}
281 320
282static int 321static int
@@ -313,8 +352,7 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt)
313 352
314 list_add(&r->mw_list, &buf->rb_mws); 353 list_add(&r->mw_list, &buf->rb_mws);
315 list_add(&r->mw_all, &buf->rb_all); 354 list_add(&r->mw_all, &buf->rb_all);
316 r->mw_sendcompletion = frwr_sendcompletion; 355 r->frmr.fr_xprt = r_xprt;
317 r->r.frmr.fr_xprt = r_xprt;
318 } 356 }
319 357
320 return 0; 358 return 0;
@@ -347,10 +385,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
347 mw = rpcrdma_get_mw(r_xprt); 385 mw = rpcrdma_get_mw(r_xprt);
348 if (!mw) 386 if (!mw)
349 return -ENOMEM; 387 return -ENOMEM;
350 } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); 388 } while (mw->frmr.fr_state != FRMR_IS_INVALID);
351 frmr = &mw->r.frmr; 389 frmr = &mw->frmr;
352 frmr->fr_state = FRMR_IS_VALID; 390 frmr->fr_state = FRMR_IS_VALID;
353 frmr->fr_waiter = false;
354 mr = frmr->fr_mr; 391 mr = frmr->fr_mr;
355 reg_wr = &frmr->fr_regwr; 392 reg_wr = &frmr->fr_regwr;
356 393
@@ -400,7 +437,8 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
400 437
401 reg_wr->wr.next = NULL; 438 reg_wr->wr.next = NULL;
402 reg_wr->wr.opcode = IB_WR_REG_MR; 439 reg_wr->wr.opcode = IB_WR_REG_MR;
403 reg_wr->wr.wr_id = (uintptr_t)mw; 440 frmr->fr_cqe.done = frwr_wc_fastreg;
441 reg_wr->wr.wr_cqe = &frmr->fr_cqe;
404 reg_wr->wr.num_sge = 0; 442 reg_wr->wr.num_sge = 0;
405 reg_wr->wr.send_flags = 0; 443 reg_wr->wr.send_flags = 0;
406 reg_wr->mr = mr; 444 reg_wr->mr = mr;
@@ -434,15 +472,15 @@ static struct ib_send_wr *
434__frwr_prepare_linv_wr(struct rpcrdma_mr_seg *seg) 472__frwr_prepare_linv_wr(struct rpcrdma_mr_seg *seg)
435{ 473{
436 struct rpcrdma_mw *mw = seg->rl_mw; 474 struct rpcrdma_mw *mw = seg->rl_mw;
437 struct rpcrdma_frmr *f = &mw->r.frmr; 475 struct rpcrdma_frmr *f = &mw->frmr;
438 struct ib_send_wr *invalidate_wr; 476 struct ib_send_wr *invalidate_wr;
439 477
440 f->fr_waiter = false;
441 f->fr_state = FRMR_IS_INVALID; 478 f->fr_state = FRMR_IS_INVALID;
442 invalidate_wr = &f->fr_invwr; 479 invalidate_wr = &f->fr_invwr;
443 480
444 memset(invalidate_wr, 0, sizeof(*invalidate_wr)); 481 memset(invalidate_wr, 0, sizeof(*invalidate_wr));
445 invalidate_wr->wr_id = (unsigned long)(void *)mw; 482 f->fr_cqe.done = frwr_wc_localinv;
483 invalidate_wr->wr_cqe = &f->fr_cqe;
446 invalidate_wr->opcode = IB_WR_LOCAL_INV; 484 invalidate_wr->opcode = IB_WR_LOCAL_INV;
447 invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey; 485 invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey;
448 486
@@ -455,7 +493,7 @@ __frwr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
455{ 493{
456 struct ib_device *device = r_xprt->rx_ia.ri_device; 494 struct ib_device *device = r_xprt->rx_ia.ri_device;
457 struct rpcrdma_mw *mw = seg->rl_mw; 495 struct rpcrdma_mw *mw = seg->rl_mw;
458 struct rpcrdma_frmr *f = &mw->r.frmr; 496 struct rpcrdma_frmr *f = &mw->frmr;
459 497
460 seg->rl_mw = NULL; 498 seg->rl_mw = NULL;
461 499
@@ -504,15 +542,15 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
504 542
505 i += seg->mr_nsegs; 543 i += seg->mr_nsegs;
506 } 544 }
507 f = &seg->rl_mw->r.frmr; 545 f = &seg->rl_mw->frmr;
508 546
509 /* Strong send queue ordering guarantees that when the 547 /* Strong send queue ordering guarantees that when the
510 * last WR in the chain completes, all WRs in the chain 548 * last WR in the chain completes, all WRs in the chain
511 * are complete. 549 * are complete.
512 */ 550 */
513 f->fr_invwr.send_flags = IB_SEND_SIGNALED; 551 f->fr_invwr.send_flags = IB_SEND_SIGNALED;
514 f->fr_waiter = true; 552 f->fr_cqe.done = frwr_wc_localinv_wake;
515 init_completion(&f->fr_linv_done); 553 reinit_completion(&f->fr_linv_done);
516 INIT_CQCOUNT(&r_xprt->rx_ep); 554 INIT_CQCOUNT(&r_xprt->rx_ep);
517 555
518 /* Transport disconnect drains the receive CQ before it 556 /* Transport disconnect drains the receive CQ before it
@@ -520,14 +558,18 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
520 * unless ri_id->qp is a valid pointer. 558 * unless ri_id->qp is a valid pointer.
521 */ 559 */
522 rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr); 560 rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr);
523 if (rc) 561 if (rc) {
524 pr_warn("%s: ib_post_send failed %i\n", __func__, rc); 562 pr_warn("%s: ib_post_send failed %i\n", __func__, rc);
563 rdma_disconnect(ia->ri_id);
564 goto unmap;
565 }
525 566
526 wait_for_completion(&f->fr_linv_done); 567 wait_for_completion(&f->fr_linv_done);
527 568
528 /* ORDER: Now DMA unmap all of the req's MRs, and return 569 /* ORDER: Now DMA unmap all of the req's MRs, and return
529 * them to the free MW list. 570 * them to the free MW list.
530 */ 571 */
572unmap:
531 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { 573 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
532 seg = &req->rl_segments[i]; 574 seg = &req->rl_segments[i];
533 575
@@ -549,7 +591,7 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
549 struct rpcrdma_mr_seg *seg1 = seg; 591 struct rpcrdma_mr_seg *seg1 = seg;
550 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 592 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
551 struct rpcrdma_mw *mw = seg1->rl_mw; 593 struct rpcrdma_mw *mw = seg1->rl_mw;
552 struct rpcrdma_frmr *frmr = &mw->r.frmr; 594 struct rpcrdma_frmr *frmr = &mw->frmr;
553 struct ib_send_wr *invalidate_wr, *bad_wr; 595 struct ib_send_wr *invalidate_wr, *bad_wr;
554 int rc, nsegs = seg->mr_nsegs; 596 int rc, nsegs = seg->mr_nsegs;
555 597
@@ -557,10 +599,11 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
557 599
558 seg1->rl_mw = NULL; 600 seg1->rl_mw = NULL;
559 frmr->fr_state = FRMR_IS_INVALID; 601 frmr->fr_state = FRMR_IS_INVALID;
560 invalidate_wr = &mw->r.frmr.fr_invwr; 602 invalidate_wr = &mw->frmr.fr_invwr;
561 603
562 memset(invalidate_wr, 0, sizeof(*invalidate_wr)); 604 memset(invalidate_wr, 0, sizeof(*invalidate_wr));
563 invalidate_wr->wr_id = (uintptr_t)mw; 605 frmr->fr_cqe.done = frwr_wc_localinv;
606 invalidate_wr->wr_cqe = &frmr->fr_cqe;
564 invalidate_wr->opcode = IB_WR_LOCAL_INV; 607 invalidate_wr->opcode = IB_WR_LOCAL_INV;
565 invalidate_wr->ex.invalidate_rkey = frmr->fr_mr->rkey; 608 invalidate_wr->ex.invalidate_rkey = frmr->fr_mr->rkey;
566 DECR_CQCOUNT(&r_xprt->rx_ep); 609 DECR_CQCOUNT(&r_xprt->rx_ep);
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
index dbb302ecf590..481b9b6f4a15 100644
--- a/net/sunrpc/xprtrdma/physical_ops.c
+++ b/net/sunrpc/xprtrdma/physical_ops.c
@@ -68,7 +68,6 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
68 rpcrdma_map_one(ia->ri_device, seg, rpcrdma_data_dir(writing)); 68 rpcrdma_map_one(ia->ri_device, seg, rpcrdma_data_dir(writing));
69 seg->mr_rkey = ia->ri_dma_mr->rkey; 69 seg->mr_rkey = ia->ri_dma_mr->rkey;
70 seg->mr_base = seg->mr_dma; 70 seg->mr_base = seg->mr_dma;
71 seg->mr_nsegs = 1;
72 return 1; 71 return 1;
73} 72}
74 73
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 0f28f2d743ed..888823bb6dae 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -132,6 +132,33 @@ rpcrdma_tail_pullup(struct xdr_buf *buf)
132 return tlen; 132 return tlen;
133} 133}
134 134
135/* Split "vec" on page boundaries into segments. FMR registers pages,
136 * not a byte range. Other modes coalesce these segments into a single
137 * MR when they can.
138 */
139static int
140rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg,
141 int n, int nsegs)
142{
143 size_t page_offset;
144 u32 remaining;
145 char *base;
146
147 base = vec->iov_base;
148 page_offset = offset_in_page(base);
149 remaining = vec->iov_len;
150 while (remaining && n < nsegs) {
151 seg[n].mr_page = NULL;
152 seg[n].mr_offset = base;
153 seg[n].mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining);
154 remaining -= seg[n].mr_len;
155 base += seg[n].mr_len;
156 ++n;
157 page_offset = 0;
158 }
159 return n;
160}
161
135/* 162/*
136 * Chunk assembly from upper layer xdr_buf. 163 * Chunk assembly from upper layer xdr_buf.
137 * 164 *
@@ -150,11 +177,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
150 int page_base; 177 int page_base;
151 struct page **ppages; 178 struct page **ppages;
152 179
153 if (pos == 0 && xdrbuf->head[0].iov_len) { 180 if (pos == 0) {
154 seg[n].mr_page = NULL; 181 n = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, n, nsegs);
155 seg[n].mr_offset = xdrbuf->head[0].iov_base; 182 if (n == nsegs)
156 seg[n].mr_len = xdrbuf->head[0].iov_len; 183 return -EIO;
157 ++n;
158 } 184 }
159 185
160 len = xdrbuf->page_len; 186 len = xdrbuf->page_len;
@@ -192,13 +218,9 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
192 * xdr pad bytes, saving the server an RDMA operation. */ 218 * xdr pad bytes, saving the server an RDMA operation. */
193 if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) 219 if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
194 return n; 220 return n;
221 n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n, nsegs);
195 if (n == nsegs) 222 if (n == nsegs)
196 /* Tail remains, but we're out of segments */
197 return -EIO; 223 return -EIO;
198 seg[n].mr_page = NULL;
199 seg[n].mr_offset = xdrbuf->tail[0].iov_base;
200 seg[n].mr_len = xdrbuf->tail[0].iov_len;
201 ++n;
202 } 224 }
203 225
204 return n; 226 return n;
@@ -773,20 +795,17 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
773 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; 795 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
774 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 796 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
775 __be32 *iptr; 797 __be32 *iptr;
776 int rdmalen, status; 798 int rdmalen, status, rmerr;
777 unsigned long cwnd; 799 unsigned long cwnd;
778 u32 credits;
779 800
780 dprintk("RPC: %s: incoming rep %p\n", __func__, rep); 801 dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
781 802
782 if (rep->rr_len == RPCRDMA_BAD_LEN) 803 if (rep->rr_len == RPCRDMA_BAD_LEN)
783 goto out_badstatus; 804 goto out_badstatus;
784 if (rep->rr_len < RPCRDMA_HDRLEN_MIN) 805 if (rep->rr_len < RPCRDMA_HDRLEN_ERR)
785 goto out_shortreply; 806 goto out_shortreply;
786 807
787 headerp = rdmab_to_msg(rep->rr_rdmabuf); 808 headerp = rdmab_to_msg(rep->rr_rdmabuf);
788 if (headerp->rm_vers != rpcrdma_version)
789 goto out_badversion;
790#if defined(CONFIG_SUNRPC_BACKCHANNEL) 809#if defined(CONFIG_SUNRPC_BACKCHANNEL)
791 if (rpcrdma_is_bcall(headerp)) 810 if (rpcrdma_is_bcall(headerp))
792 goto out_bcall; 811 goto out_bcall;
@@ -809,15 +828,16 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
809 */ 828 */
810 list_del_init(&rqst->rq_list); 829 list_del_init(&rqst->rq_list);
811 spin_unlock_bh(&xprt->transport_lock); 830 spin_unlock_bh(&xprt->transport_lock);
812 dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" 831 dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
813 " RPC request 0x%p xid 0x%08x\n", 832 __func__, rep, req, be32_to_cpu(headerp->rm_xid));
814 __func__, rep, req, rqst,
815 be32_to_cpu(headerp->rm_xid));
816 833
817 /* from here on, the reply is no longer an orphan */ 834 /* from here on, the reply is no longer an orphan */
818 req->rl_reply = rep; 835 req->rl_reply = rep;
819 xprt->reestablish_timeout = 0; 836 xprt->reestablish_timeout = 0;
820 837
838 if (headerp->rm_vers != rpcrdma_version)
839 goto out_badversion;
840
821 /* check for expected message types */ 841 /* check for expected message types */
822 /* The order of some of these tests is important. */ 842 /* The order of some of these tests is important. */
823 switch (headerp->rm_type) { 843 switch (headerp->rm_type) {
@@ -878,6 +898,9 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
878 status = rdmalen; 898 status = rdmalen;
879 break; 899 break;
880 900
901 case rdma_error:
902 goto out_rdmaerr;
903
881badheader: 904badheader:
882 default: 905 default:
883 dprintk("%s: invalid rpcrdma reply header (type %d):" 906 dprintk("%s: invalid rpcrdma reply header (type %d):"
@@ -893,6 +916,7 @@ badheader:
893 break; 916 break;
894 } 917 }
895 918
919out:
896 /* Invalidate and flush the data payloads before waking the 920 /* Invalidate and flush the data payloads before waking the
897 * waiting application. This guarantees the memory region is 921 * waiting application. This guarantees the memory region is
898 * properly fenced from the server before the application 922 * properly fenced from the server before the application
@@ -903,15 +927,9 @@ badheader:
903 if (req->rl_nchunks) 927 if (req->rl_nchunks)
904 r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req); 928 r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req);
905 929
906 credits = be32_to_cpu(headerp->rm_credit);
907 if (credits == 0)
908 credits = 1; /* don't deadlock */
909 else if (credits > r_xprt->rx_buf.rb_max_requests)
910 credits = r_xprt->rx_buf.rb_max_requests;
911
912 spin_lock_bh(&xprt->transport_lock); 930 spin_lock_bh(&xprt->transport_lock);
913 cwnd = xprt->cwnd; 931 cwnd = xprt->cwnd;
914 xprt->cwnd = credits << RPC_CWNDSHIFT; 932 xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
915 if (xprt->cwnd > cwnd) 933 if (xprt->cwnd > cwnd)
916 xprt_release_rqst_cong(rqst->rq_task); 934 xprt_release_rqst_cong(rqst->rq_task);
917 935
@@ -935,13 +953,43 @@ out_bcall:
935 return; 953 return;
936#endif 954#endif
937 955
938out_shortreply: 956/* If the incoming reply terminated a pending RPC, the next
939 dprintk("RPC: %s: short/invalid reply\n", __func__); 957 * RPC call will post a replacement receive buffer as it is
940 goto repost; 958 * being marshaled.
941 959 */
942out_badversion: 960out_badversion:
943 dprintk("RPC: %s: invalid version %d\n", 961 dprintk("RPC: %s: invalid version %d\n",
944 __func__, be32_to_cpu(headerp->rm_vers)); 962 __func__, be32_to_cpu(headerp->rm_vers));
963 status = -EIO;
964 r_xprt->rx_stats.bad_reply_count++;
965 goto out;
966
967out_rdmaerr:
968 rmerr = be32_to_cpu(headerp->rm_body.rm_error.rm_err);
969 switch (rmerr) {
970 case ERR_VERS:
971 pr_err("%s: server reports header version error (%u-%u)\n",
972 __func__,
973 be32_to_cpu(headerp->rm_body.rm_error.rm_vers_low),
974 be32_to_cpu(headerp->rm_body.rm_error.rm_vers_high));
975 break;
976 case ERR_CHUNK:
977 pr_err("%s: server reports header decoding error\n",
978 __func__);
979 break;
980 default:
981 pr_err("%s: server reports unknown error %d\n",
982 __func__, rmerr);
983 }
984 status = -EREMOTEIO;
985 r_xprt->rx_stats.bad_reply_count++;
986 goto out;
987
988/* If no pending RPC transaction was matched, post a replacement
989 * receive buffer before returning.
990 */
991out_shortreply:
992 dprintk("RPC: %s: short/invalid reply\n", __func__);
945 goto repost; 993 goto repost;
946 994
947out_nomatch: 995out_nomatch:
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 65a7c232a345..a2a7519b0f23 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -107,26 +107,18 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
107 int ret; 107 int ret;
108 108
109 vec = svc_rdma_get_req_map(rdma); 109 vec = svc_rdma_get_req_map(rdma);
110 ret = svc_rdma_map_xdr(rdma, sndbuf, vec); 110 ret = svc_rdma_map_xdr(rdma, sndbuf, vec, false);
111 if (ret) 111 if (ret)
112 goto out_err; 112 goto out_err;
113 113
114 /* Post a recv buffer to handle the reply for this request. */ 114 ret = svc_rdma_repost_recv(rdma, GFP_NOIO);
115 ret = svc_rdma_post_recv(rdma, GFP_NOIO); 115 if (ret)
116 if (ret) {
117 pr_err("svcrdma: Failed to post bc receive buffer, err=%d.\n",
118 ret);
119 pr_err("svcrdma: closing transport %p.\n", rdma);
120 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
121 ret = -ENOTCONN;
122 goto out_err; 116 goto out_err;
123 }
124 117
125 ctxt = svc_rdma_get_context(rdma); 118 ctxt = svc_rdma_get_context(rdma);
126 ctxt->pages[0] = virt_to_page(rqst->rq_buffer); 119 ctxt->pages[0] = virt_to_page(rqst->rq_buffer);
127 ctxt->count = 1; 120 ctxt->count = 1;
128 121
129 ctxt->wr_op = IB_WR_SEND;
130 ctxt->direction = DMA_TO_DEVICE; 122 ctxt->direction = DMA_TO_DEVICE;
131 ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey; 123 ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
132 ctxt->sge[0].length = sndbuf->len; 124 ctxt->sge[0].length = sndbuf->len;
@@ -140,7 +132,8 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
140 atomic_inc(&rdma->sc_dma_used); 132 atomic_inc(&rdma->sc_dma_used);
141 133
142 memset(&send_wr, 0, sizeof(send_wr)); 134 memset(&send_wr, 0, sizeof(send_wr));
143 send_wr.wr_id = (unsigned long)ctxt; 135 ctxt->cqe.done = svc_rdma_wc_send;
136 send_wr.wr_cqe = &ctxt->cqe;
144 send_wr.sg_list = ctxt->sge; 137 send_wr.sg_list = ctxt->sge;
145 send_wr.num_sge = 1; 138 send_wr.num_sge = 1;
146 send_wr.opcode = IB_WR_SEND; 139 send_wr.opcode = IB_WR_SEND;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index e2fca7617242..765bca47c74d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -145,29 +145,44 @@ static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
145 return (__be32 *)&ary->wc_array[nchunks]; 145 return (__be32 *)&ary->wc_array[nchunks];
146} 146}
147 147
148int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req, 148int svc_rdma_xdr_decode_req(struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp)
149 struct svc_rqst *rqstp)
150{ 149{
151 struct rpcrdma_msg *rmsgp = NULL;
152 __be32 *va, *vaend; 150 __be32 *va, *vaend;
151 unsigned int len;
153 u32 hdr_len; 152 u32 hdr_len;
154 153
155 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
156
157 /* Verify that there's enough bytes for header + something */ 154 /* Verify that there's enough bytes for header + something */
158 if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) { 155 if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_ERR) {
159 dprintk("svcrdma: header too short = %d\n", 156 dprintk("svcrdma: header too short = %d\n",
160 rqstp->rq_arg.len); 157 rqstp->rq_arg.len);
161 return -EINVAL; 158 return -EINVAL;
162 } 159 }
163 160
164 if (rmsgp->rm_vers != rpcrdma_version) 161 if (rmsgp->rm_vers != rpcrdma_version) {
165 return -ENOSYS; 162 dprintk("%s: bad version %u\n", __func__,
166 163 be32_to_cpu(rmsgp->rm_vers));
167 /* Pull in the extra for the padded case and bump our pointer */ 164 return -EPROTONOSUPPORT;
168 if (rmsgp->rm_type == rdma_msgp) { 165 }
169 int hdrlen;
170 166
167 switch (be32_to_cpu(rmsgp->rm_type)) {
168 case RDMA_MSG:
169 case RDMA_NOMSG:
170 break;
171
172 case RDMA_DONE:
173 /* Just drop it */
174 dprintk("svcrdma: dropping RDMA_DONE message\n");
175 return 0;
176
177 case RDMA_ERROR:
178 /* Possible if this is a backchannel reply.
179 * XXX: We should cancel this XID, though.
180 */
181 dprintk("svcrdma: dropping RDMA_ERROR message\n");
182 return 0;
183
184 case RDMA_MSGP:
185 /* Pull in the extra for the padded case, bump our pointer */
171 rmsgp->rm_body.rm_padded.rm_align = 186 rmsgp->rm_body.rm_padded.rm_align =
172 be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align); 187 be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align);
173 rmsgp->rm_body.rm_padded.rm_thresh = 188 rmsgp->rm_body.rm_padded.rm_thresh =
@@ -175,11 +190,15 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
175 190
176 va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; 191 va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
177 rqstp->rq_arg.head[0].iov_base = va; 192 rqstp->rq_arg.head[0].iov_base = va;
178 hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp); 193 len = (u32)((unsigned long)va - (unsigned long)rmsgp);
179 rqstp->rq_arg.head[0].iov_len -= hdrlen; 194 rqstp->rq_arg.head[0].iov_len -= len;
180 if (hdrlen > rqstp->rq_arg.len) 195 if (len > rqstp->rq_arg.len)
181 return -EINVAL; 196 return -EINVAL;
182 return hdrlen; 197 return len;
198 default:
199 dprintk("svcrdma: bad rdma procedure (%u)\n",
200 be32_to_cpu(rmsgp->rm_type));
201 return -EINVAL;
183 } 202 }
184 203
185 /* The chunk list may contain either a read chunk list or a write 204 /* The chunk list may contain either a read chunk list or a write
@@ -188,20 +207,25 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
188 va = &rmsgp->rm_body.rm_chunks[0]; 207 va = &rmsgp->rm_body.rm_chunks[0];
189 vaend = (__be32 *)((unsigned long)rmsgp + rqstp->rq_arg.len); 208 vaend = (__be32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
190 va = decode_read_list(va, vaend); 209 va = decode_read_list(va, vaend);
191 if (!va) 210 if (!va) {
211 dprintk("svcrdma: failed to decode read list\n");
192 return -EINVAL; 212 return -EINVAL;
213 }
193 va = decode_write_list(va, vaend); 214 va = decode_write_list(va, vaend);
194 if (!va) 215 if (!va) {
216 dprintk("svcrdma: failed to decode write list\n");
195 return -EINVAL; 217 return -EINVAL;
218 }
196 va = decode_reply_array(va, vaend); 219 va = decode_reply_array(va, vaend);
197 if (!va) 220 if (!va) {
221 dprintk("svcrdma: failed to decode reply chunk\n");
198 return -EINVAL; 222 return -EINVAL;
223 }
199 224
200 rqstp->rq_arg.head[0].iov_base = va; 225 rqstp->rq_arg.head[0].iov_base = va;
201 hdr_len = (unsigned long)va - (unsigned long)rmsgp; 226 hdr_len = (unsigned long)va - (unsigned long)rmsgp;
202 rqstp->rq_arg.head[0].iov_len -= hdr_len; 227 rqstp->rq_arg.head[0].iov_len -= hdr_len;
203 228
204 *rdma_req = rmsgp;
205 return hdr_len; 229 return hdr_len;
206} 230}
207 231
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index c8b8a8b4181e..3b24a646eb46 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -180,9 +180,9 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
180 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); 180 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
181 181
182 memset(&read_wr, 0, sizeof(read_wr)); 182 memset(&read_wr, 0, sizeof(read_wr));
183 read_wr.wr.wr_id = (unsigned long)ctxt; 183 ctxt->cqe.done = svc_rdma_wc_read;
184 read_wr.wr.wr_cqe = &ctxt->cqe;
184 read_wr.wr.opcode = IB_WR_RDMA_READ; 185 read_wr.wr.opcode = IB_WR_RDMA_READ;
185 ctxt->wr_op = read_wr.wr.opcode;
186 read_wr.wr.send_flags = IB_SEND_SIGNALED; 186 read_wr.wr.send_flags = IB_SEND_SIGNALED;
187 read_wr.rkey = rs_handle; 187 read_wr.rkey = rs_handle;
188 read_wr.remote_addr = rs_offset; 188 read_wr.remote_addr = rs_offset;
@@ -299,8 +299,9 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
299 ctxt->read_hdr = head; 299 ctxt->read_hdr = head;
300 300
301 /* Prepare REG WR */ 301 /* Prepare REG WR */
302 ctxt->reg_cqe.done = svc_rdma_wc_reg;
303 reg_wr.wr.wr_cqe = &ctxt->reg_cqe;
302 reg_wr.wr.opcode = IB_WR_REG_MR; 304 reg_wr.wr.opcode = IB_WR_REG_MR;
303 reg_wr.wr.wr_id = 0;
304 reg_wr.wr.send_flags = IB_SEND_SIGNALED; 305 reg_wr.wr.send_flags = IB_SEND_SIGNALED;
305 reg_wr.wr.num_sge = 0; 306 reg_wr.wr.num_sge = 0;
306 reg_wr.mr = frmr->mr; 307 reg_wr.mr = frmr->mr;
@@ -310,6 +311,8 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
310 311
311 /* Prepare RDMA_READ */ 312 /* Prepare RDMA_READ */
312 memset(&read_wr, 0, sizeof(read_wr)); 313 memset(&read_wr, 0, sizeof(read_wr));
314 ctxt->cqe.done = svc_rdma_wc_read;
315 read_wr.wr.wr_cqe = &ctxt->cqe;
313 read_wr.wr.send_flags = IB_SEND_SIGNALED; 316 read_wr.wr.send_flags = IB_SEND_SIGNALED;
314 read_wr.rkey = rs_handle; 317 read_wr.rkey = rs_handle;
315 read_wr.remote_addr = rs_offset; 318 read_wr.remote_addr = rs_offset;
@@ -317,19 +320,18 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
317 read_wr.wr.num_sge = 1; 320 read_wr.wr.num_sge = 1;
318 if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { 321 if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
319 read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV; 322 read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
320 read_wr.wr.wr_id = (unsigned long)ctxt;
321 read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; 323 read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
322 } else { 324 } else {
323 read_wr.wr.opcode = IB_WR_RDMA_READ; 325 read_wr.wr.opcode = IB_WR_RDMA_READ;
324 read_wr.wr.next = &inv_wr; 326 read_wr.wr.next = &inv_wr;
325 /* Prepare invalidate */ 327 /* Prepare invalidate */
326 memset(&inv_wr, 0, sizeof(inv_wr)); 328 memset(&inv_wr, 0, sizeof(inv_wr));
327 inv_wr.wr_id = (unsigned long)ctxt; 329 ctxt->inv_cqe.done = svc_rdma_wc_inv;
330 inv_wr.wr_cqe = &ctxt->inv_cqe;
328 inv_wr.opcode = IB_WR_LOCAL_INV; 331 inv_wr.opcode = IB_WR_LOCAL_INV;
329 inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE; 332 inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
330 inv_wr.ex.invalidate_rkey = frmr->mr->lkey; 333 inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
331 } 334 }
332 ctxt->wr_op = read_wr.wr.opcode;
333 335
334 /* Post the chain */ 336 /* Post the chain */
335 ret = svc_rdma_send(xprt, &reg_wr.wr); 337 ret = svc_rdma_send(xprt, &reg_wr.wr);
@@ -612,7 +614,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
612 struct svc_rdma_op_ctxt *ctxt = NULL; 614 struct svc_rdma_op_ctxt *ctxt = NULL;
613 struct rpcrdma_msg *rmsgp; 615 struct rpcrdma_msg *rmsgp;
614 int ret = 0; 616 int ret = 0;
615 int len;
616 617
617 dprintk("svcrdma: rqstp=%p\n", rqstp); 618 dprintk("svcrdma: rqstp=%p\n", rqstp);
618 619
@@ -642,8 +643,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
642 * transport list 643 * transport list
643 */ 644 */
644 if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) 645 if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
645 goto close_out; 646 goto defer;
646
647 goto out; 647 goto out;
648 } 648 }
649 dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", 649 dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
@@ -654,15 +654,13 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
654 rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len); 654 rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
655 655
656 /* Decode the RDMA header. */ 656 /* Decode the RDMA header. */
657 len = svc_rdma_xdr_decode_req(&rmsgp, rqstp); 657 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
658 rqstp->rq_xprt_hlen = len; 658 ret = svc_rdma_xdr_decode_req(rmsgp, rqstp);
659 659 if (ret < 0)
660 /* If the request is invalid, reply with an error */ 660 goto out_err;
661 if (len < 0) { 661 if (ret == 0)
662 if (len == -ENOSYS) 662 goto out_drop;
663 svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS); 663 rqstp->rq_xprt_hlen = ret;
664 goto close_out;
665 }
666 664
667 if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) { 665 if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) {
668 ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp, 666 ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp,
@@ -698,26 +696,16 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
698 svc_xprt_copy_addrs(rqstp, xprt); 696 svc_xprt_copy_addrs(rqstp, xprt);
699 return ret; 697 return ret;
700 698
701 close_out: 699out_err:
702 if (ctxt) 700 svc_rdma_send_error(rdma_xprt, rmsgp, ret);
703 svc_rdma_put_context(ctxt, 1); 701 svc_rdma_put_context(ctxt, 0);
704 dprintk("svcrdma: transport %p is closing\n", xprt); 702 return 0;
705 /* 703
706 * Set the close bit and enqueue it. svc_recv will see the
707 * close bit and call svc_xprt_delete
708 */
709 set_bit(XPT_CLOSE, &xprt->xpt_flags);
710defer: 704defer:
711 return 0; 705 return 0;
712 706
707out_drop:
708 svc_rdma_put_context(ctxt, 1);
713repost: 709repost:
714 ret = svc_rdma_post_recv(rdma_xprt, GFP_KERNEL); 710 return svc_rdma_repost_recv(rdma_xprt, GFP_KERNEL);
715 if (ret) {
716 pr_err("svcrdma: could not post a receive buffer, err=%d.\n",
717 ret);
718 pr_err("svcrdma: closing transport %p.\n", rdma_xprt);
719 set_bit(XPT_CLOSE, &rdma_xprt->sc_xprt.xpt_flags);
720 ret = -ENOTCONN;
721 }
722 return ret;
723} 711}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index df57f3ce6cd2..4f1b1c4f45f9 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -50,9 +50,15 @@
50 50
51#define RPCDBG_FACILITY RPCDBG_SVCXPRT 51#define RPCDBG_FACILITY RPCDBG_SVCXPRT
52 52
53static u32 xdr_padsize(u32 len)
54{
55 return (len & 3) ? (4 - (len & 3)) : 0;
56}
57
53int svc_rdma_map_xdr(struct svcxprt_rdma *xprt, 58int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
54 struct xdr_buf *xdr, 59 struct xdr_buf *xdr,
55 struct svc_rdma_req_map *vec) 60 struct svc_rdma_req_map *vec,
61 bool write_chunk_present)
56{ 62{
57 int sge_no; 63 int sge_no;
58 u32 sge_bytes; 64 u32 sge_bytes;
@@ -92,9 +98,20 @@ int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
92 98
93 /* Tail SGE */ 99 /* Tail SGE */
94 if (xdr->tail[0].iov_len) { 100 if (xdr->tail[0].iov_len) {
95 vec->sge[sge_no].iov_base = xdr->tail[0].iov_base; 101 unsigned char *base = xdr->tail[0].iov_base;
96 vec->sge[sge_no].iov_len = xdr->tail[0].iov_len; 102 size_t len = xdr->tail[0].iov_len;
97 sge_no++; 103 u32 xdr_pad = xdr_padsize(xdr->page_len);
104
105 if (write_chunk_present && xdr_pad) {
106 base += xdr_pad;
107 len -= xdr_pad;
108 }
109
110 if (len) {
111 vec->sge[sge_no].iov_base = base;
112 vec->sge[sge_no].iov_len = len;
113 sge_no++;
114 }
98 } 115 }
99 116
100 dprintk("svcrdma: %s: sge_no %d page_no %d " 117 dprintk("svcrdma: %s: sge_no %d page_no %d "
@@ -166,10 +183,10 @@ svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp)
166 * reply array is present 183 * reply array is present
167 */ 184 */
168static struct rpcrdma_write_array * 185static struct rpcrdma_write_array *
169svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp) 186svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp,
187 struct rpcrdma_write_array *wr_ary)
170{ 188{
171 struct rpcrdma_read_chunk *rch; 189 struct rpcrdma_read_chunk *rch;
172 struct rpcrdma_write_array *wr_ary;
173 struct rpcrdma_write_array *rp_ary; 190 struct rpcrdma_write_array *rp_ary;
174 191
175 /* XXX: Need to fix when reply chunk may occur with read list 192 /* XXX: Need to fix when reply chunk may occur with read list
@@ -191,7 +208,6 @@ svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp)
191 goto found_it; 208 goto found_it;
192 } 209 }
193 210
194 wr_ary = svc_rdma_get_write_array(rmsgp);
195 if (wr_ary) { 211 if (wr_ary) {
196 int chunk = be32_to_cpu(wr_ary->wc_nchunks); 212 int chunk = be32_to_cpu(wr_ary->wc_nchunks);
197 213
@@ -281,8 +297,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
281 297
282 /* Prepare WRITE WR */ 298 /* Prepare WRITE WR */
283 memset(&write_wr, 0, sizeof write_wr); 299 memset(&write_wr, 0, sizeof write_wr);
284 ctxt->wr_op = IB_WR_RDMA_WRITE; 300 ctxt->cqe.done = svc_rdma_wc_write;
285 write_wr.wr.wr_id = (unsigned long)ctxt; 301 write_wr.wr.wr_cqe = &ctxt->cqe;
286 write_wr.wr.sg_list = &sge[0]; 302 write_wr.wr.sg_list = &sge[0];
287 write_wr.wr.num_sge = sge_no; 303 write_wr.wr.num_sge = sge_no;
288 write_wr.wr.opcode = IB_WR_RDMA_WRITE; 304 write_wr.wr.opcode = IB_WR_RDMA_WRITE;
@@ -298,41 +314,37 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
298 err: 314 err:
299 svc_rdma_unmap_dma(ctxt); 315 svc_rdma_unmap_dma(ctxt);
300 svc_rdma_put_context(ctxt, 0); 316 svc_rdma_put_context(ctxt, 0);
301 /* Fatal error, close transport */
302 return -EIO; 317 return -EIO;
303} 318}
304 319
320noinline
305static int send_write_chunks(struct svcxprt_rdma *xprt, 321static int send_write_chunks(struct svcxprt_rdma *xprt,
306 struct rpcrdma_msg *rdma_argp, 322 struct rpcrdma_write_array *wr_ary,
307 struct rpcrdma_msg *rdma_resp, 323 struct rpcrdma_msg *rdma_resp,
308 struct svc_rqst *rqstp, 324 struct svc_rqst *rqstp,
309 struct svc_rdma_req_map *vec) 325 struct svc_rdma_req_map *vec)
310{ 326{
311 u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; 327 u32 xfer_len = rqstp->rq_res.page_len;
312 int write_len; 328 int write_len;
313 u32 xdr_off; 329 u32 xdr_off;
314 int chunk_off; 330 int chunk_off;
315 int chunk_no; 331 int chunk_no;
316 int nchunks; 332 int nchunks;
317 struct rpcrdma_write_array *arg_ary;
318 struct rpcrdma_write_array *res_ary; 333 struct rpcrdma_write_array *res_ary;
319 int ret; 334 int ret;
320 335
321 arg_ary = svc_rdma_get_write_array(rdma_argp);
322 if (!arg_ary)
323 return 0;
324 res_ary = (struct rpcrdma_write_array *) 336 res_ary = (struct rpcrdma_write_array *)
325 &rdma_resp->rm_body.rm_chunks[1]; 337 &rdma_resp->rm_body.rm_chunks[1];
326 338
327 /* Write chunks start at the pagelist */ 339 /* Write chunks start at the pagelist */
328 nchunks = be32_to_cpu(arg_ary->wc_nchunks); 340 nchunks = be32_to_cpu(wr_ary->wc_nchunks);
329 for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; 341 for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
330 xfer_len && chunk_no < nchunks; 342 xfer_len && chunk_no < nchunks;
331 chunk_no++) { 343 chunk_no++) {
332 struct rpcrdma_segment *arg_ch; 344 struct rpcrdma_segment *arg_ch;
333 u64 rs_offset; 345 u64 rs_offset;
334 346
335 arg_ch = &arg_ary->wc_array[chunk_no].wc_target; 347 arg_ch = &wr_ary->wc_array[chunk_no].wc_target;
336 write_len = min(xfer_len, be32_to_cpu(arg_ch->rs_length)); 348 write_len = min(xfer_len, be32_to_cpu(arg_ch->rs_length));
337 349
338 /* Prepare the response chunk given the length actually 350 /* Prepare the response chunk given the length actually
@@ -350,11 +362,8 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
350 xdr_off, 362 xdr_off,
351 write_len, 363 write_len,
352 vec); 364 vec);
353 if (ret <= 0) { 365 if (ret <= 0)
354 dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", 366 goto out_err;
355 ret);
356 return -EIO;
357 }
358 chunk_off += ret; 367 chunk_off += ret;
359 xdr_off += ret; 368 xdr_off += ret;
360 xfer_len -= ret; 369 xfer_len -= ret;
@@ -364,11 +373,16 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
364 /* Update the req with the number of chunks actually used */ 373 /* Update the req with the number of chunks actually used */
365 svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no); 374 svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no);
366 375
367 return rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; 376 return rqstp->rq_res.page_len;
377
378out_err:
379 pr_err("svcrdma: failed to send write chunks, rc=%d\n", ret);
380 return -EIO;
368} 381}
369 382
383noinline
370static int send_reply_chunks(struct svcxprt_rdma *xprt, 384static int send_reply_chunks(struct svcxprt_rdma *xprt,
371 struct rpcrdma_msg *rdma_argp, 385 struct rpcrdma_write_array *rp_ary,
372 struct rpcrdma_msg *rdma_resp, 386 struct rpcrdma_msg *rdma_resp,
373 struct svc_rqst *rqstp, 387 struct svc_rqst *rqstp,
374 struct svc_rdma_req_map *vec) 388 struct svc_rdma_req_map *vec)
@@ -380,25 +394,21 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
380 int chunk_off; 394 int chunk_off;
381 int nchunks; 395 int nchunks;
382 struct rpcrdma_segment *ch; 396 struct rpcrdma_segment *ch;
383 struct rpcrdma_write_array *arg_ary;
384 struct rpcrdma_write_array *res_ary; 397 struct rpcrdma_write_array *res_ary;
385 int ret; 398 int ret;
386 399
387 arg_ary = svc_rdma_get_reply_array(rdma_argp);
388 if (!arg_ary)
389 return 0;
390 /* XXX: need to fix when reply lists occur with read-list and or 400 /* XXX: need to fix when reply lists occur with read-list and or
391 * write-list */ 401 * write-list */
392 res_ary = (struct rpcrdma_write_array *) 402 res_ary = (struct rpcrdma_write_array *)
393 &rdma_resp->rm_body.rm_chunks[2]; 403 &rdma_resp->rm_body.rm_chunks[2];
394 404
395 /* xdr offset starts at RPC message */ 405 /* xdr offset starts at RPC message */
396 nchunks = be32_to_cpu(arg_ary->wc_nchunks); 406 nchunks = be32_to_cpu(rp_ary->wc_nchunks);
397 for (xdr_off = 0, chunk_no = 0; 407 for (xdr_off = 0, chunk_no = 0;
398 xfer_len && chunk_no < nchunks; 408 xfer_len && chunk_no < nchunks;
399 chunk_no++) { 409 chunk_no++) {
400 u64 rs_offset; 410 u64 rs_offset;
401 ch = &arg_ary->wc_array[chunk_no].wc_target; 411 ch = &rp_ary->wc_array[chunk_no].wc_target;
402 write_len = min(xfer_len, be32_to_cpu(ch->rs_length)); 412 write_len = min(xfer_len, be32_to_cpu(ch->rs_length));
403 413
404 /* Prepare the reply chunk given the length actually 414 /* Prepare the reply chunk given the length actually
@@ -415,11 +425,8 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
415 xdr_off, 425 xdr_off,
416 write_len, 426 write_len,
417 vec); 427 vec);
418 if (ret <= 0) { 428 if (ret <= 0)
419 dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", 429 goto out_err;
420 ret);
421 return -EIO;
422 }
423 chunk_off += ret; 430 chunk_off += ret;
424 xdr_off += ret; 431 xdr_off += ret;
425 xfer_len -= ret; 432 xfer_len -= ret;
@@ -430,6 +437,10 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
430 svc_rdma_xdr_encode_reply_array(res_ary, chunk_no); 437 svc_rdma_xdr_encode_reply_array(res_ary, chunk_no);
431 438
432 return rqstp->rq_res.len; 439 return rqstp->rq_res.len;
440
441out_err:
442 pr_err("svcrdma: failed to send reply chunks, rc=%d\n", ret);
443 return -EIO;
433} 444}
434 445
435/* This function prepares the portion of the RPCRDMA message to be 446/* This function prepares the portion of the RPCRDMA message to be
@@ -464,13 +475,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
464 int pages; 475 int pages;
465 int ret; 476 int ret;
466 477
467 /* Post a recv buffer to handle another request. */ 478 ret = svc_rdma_repost_recv(rdma, GFP_KERNEL);
468 ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
469 if (ret) { 479 if (ret) {
470 printk(KERN_INFO
471 "svcrdma: could not post a receive buffer, err=%d."
472 "Closing transport %p.\n", ret, rdma);
473 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
474 svc_rdma_put_context(ctxt, 0); 480 svc_rdma_put_context(ctxt, 0);
475 return -ENOTCONN; 481 return -ENOTCONN;
476 } 482 }
@@ -543,8 +549,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
543 goto err; 549 goto err;
544 } 550 }
545 memset(&send_wr, 0, sizeof send_wr); 551 memset(&send_wr, 0, sizeof send_wr);
546 ctxt->wr_op = IB_WR_SEND; 552 ctxt->cqe.done = svc_rdma_wc_send;
547 send_wr.wr_id = (unsigned long)ctxt; 553 send_wr.wr_cqe = &ctxt->cqe;
548 send_wr.sg_list = ctxt->sge; 554 send_wr.sg_list = ctxt->sge;
549 send_wr.num_sge = sge_no; 555 send_wr.num_sge = sge_no;
550 send_wr.opcode = IB_WR_SEND; 556 send_wr.opcode = IB_WR_SEND;
@@ -559,6 +565,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
559 err: 565 err:
560 svc_rdma_unmap_dma(ctxt); 566 svc_rdma_unmap_dma(ctxt);
561 svc_rdma_put_context(ctxt, 1); 567 svc_rdma_put_context(ctxt, 1);
568 pr_err("svcrdma: failed to send reply, rc=%d\n", ret);
562 return -EIO; 569 return -EIO;
563} 570}
564 571
@@ -573,7 +580,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
573 container_of(xprt, struct svcxprt_rdma, sc_xprt); 580 container_of(xprt, struct svcxprt_rdma, sc_xprt);
574 struct rpcrdma_msg *rdma_argp; 581 struct rpcrdma_msg *rdma_argp;
575 struct rpcrdma_msg *rdma_resp; 582 struct rpcrdma_msg *rdma_resp;
576 struct rpcrdma_write_array *reply_ary; 583 struct rpcrdma_write_array *wr_ary, *rp_ary;
577 enum rpcrdma_proc reply_type; 584 enum rpcrdma_proc reply_type;
578 int ret; 585 int ret;
579 int inline_bytes; 586 int inline_bytes;
@@ -587,12 +594,14 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
587 * places this at the start of page 0. 594 * places this at the start of page 0.
588 */ 595 */
589 rdma_argp = page_address(rqstp->rq_pages[0]); 596 rdma_argp = page_address(rqstp->rq_pages[0]);
597 wr_ary = svc_rdma_get_write_array(rdma_argp);
598 rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary);
590 599
591 /* Build an req vec for the XDR */ 600 /* Build an req vec for the XDR */
592 ctxt = svc_rdma_get_context(rdma); 601 ctxt = svc_rdma_get_context(rdma);
593 ctxt->direction = DMA_TO_DEVICE; 602 ctxt->direction = DMA_TO_DEVICE;
594 vec = svc_rdma_get_req_map(rdma); 603 vec = svc_rdma_get_req_map(rdma);
595 ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec); 604 ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL);
596 if (ret) 605 if (ret)
597 goto err0; 606 goto err0;
598 inline_bytes = rqstp->rq_res.len; 607 inline_bytes = rqstp->rq_res.len;
@@ -603,8 +612,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
603 if (!res_page) 612 if (!res_page)
604 goto err0; 613 goto err0;
605 rdma_resp = page_address(res_page); 614 rdma_resp = page_address(res_page);
606 reply_ary = svc_rdma_get_reply_array(rdma_argp); 615 if (rp_ary)
607 if (reply_ary)
608 reply_type = RDMA_NOMSG; 616 reply_type = RDMA_NOMSG;
609 else 617 else
610 reply_type = RDMA_MSG; 618 reply_type = RDMA_MSG;
@@ -612,27 +620,26 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
612 rdma_resp, reply_type); 620 rdma_resp, reply_type);
613 621
614 /* Send any write-chunk data and build resp write-list */ 622 /* Send any write-chunk data and build resp write-list */
615 ret = send_write_chunks(rdma, rdma_argp, rdma_resp, 623 if (wr_ary) {
616 rqstp, vec); 624 ret = send_write_chunks(rdma, wr_ary, rdma_resp, rqstp, vec);
617 if (ret < 0) { 625 if (ret < 0)
618 printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", 626 goto err1;
619 ret); 627 inline_bytes -= ret + xdr_padsize(ret);
620 goto err1;
621 } 628 }
622 inline_bytes -= ret;
623 629
624 /* Send any reply-list data and update resp reply-list */ 630 /* Send any reply-list data and update resp reply-list */
625 ret = send_reply_chunks(rdma, rdma_argp, rdma_resp, 631 if (rp_ary) {
626 rqstp, vec); 632 ret = send_reply_chunks(rdma, rp_ary, rdma_resp, rqstp, vec);
627 if (ret < 0) { 633 if (ret < 0)
628 printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", 634 goto err1;
629 ret); 635 inline_bytes -= ret;
630 goto err1;
631 } 636 }
632 inline_bytes -= ret;
633 637
634 ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec, 638 ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
635 inline_bytes); 639 inline_bytes);
640 if (ret < 0)
641 goto err1;
642
636 svc_rdma_put_req_map(rdma, vec); 643 svc_rdma_put_req_map(rdma, vec);
637 dprintk("svcrdma: send_reply returns %d\n", ret); 644 dprintk("svcrdma: send_reply returns %d\n", ret);
638 return ret; 645 return ret;
@@ -642,5 +649,68 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
642 err0: 649 err0:
643 svc_rdma_put_req_map(rdma, vec); 650 svc_rdma_put_req_map(rdma, vec);
644 svc_rdma_put_context(ctxt, 0); 651 svc_rdma_put_context(ctxt, 0);
645 return ret; 652 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
653 return -ENOTCONN;
654}
655
656void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
657 int status)
658{
659 struct ib_send_wr err_wr;
660 struct page *p;
661 struct svc_rdma_op_ctxt *ctxt;
662 enum rpcrdma_errcode err;
663 __be32 *va;
664 int length;
665 int ret;
666
667 ret = svc_rdma_repost_recv(xprt, GFP_KERNEL);
668 if (ret)
669 return;
670
671 p = alloc_page(GFP_KERNEL);
672 if (!p)
673 return;
674 va = page_address(p);
675
676 /* XDR encode an error reply */
677 err = ERR_CHUNK;
678 if (status == -EPROTONOSUPPORT)
679 err = ERR_VERS;
680 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
681
682 ctxt = svc_rdma_get_context(xprt);
683 ctxt->direction = DMA_TO_DEVICE;
684 ctxt->count = 1;
685 ctxt->pages[0] = p;
686
687 /* Prepare SGE for local address */
688 ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey;
689 ctxt->sge[0].length = length;
690 ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device,
691 p, 0, length, DMA_TO_DEVICE);
692 if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
693 dprintk("svcrdma: Error mapping buffer for protocol error\n");
694 svc_rdma_put_context(ctxt, 1);
695 return;
696 }
697 atomic_inc(&xprt->sc_dma_used);
698
699 /* Prepare SEND WR */
700 memset(&err_wr, 0, sizeof(err_wr));
701 ctxt->cqe.done = svc_rdma_wc_send;
702 err_wr.wr_cqe = &ctxt->cqe;
703 err_wr.sg_list = ctxt->sge;
704 err_wr.num_sge = 1;
705 err_wr.opcode = IB_WR_SEND;
706 err_wr.send_flags = IB_SEND_SIGNALED;
707
708 /* Post It */
709 ret = svc_rdma_send(xprt, &err_wr);
710 if (ret) {
711 dprintk("svcrdma: Error %d posting send for protocol error\n",
712 ret);
713 svc_rdma_unmap_dma(ctxt);
714 svc_rdma_put_context(ctxt, 1);
715 }
646} 716}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 5763825d09bf..90668969d559 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -63,17 +63,10 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
63 int flags); 63 int flags);
64static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); 64static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
65static void svc_rdma_release_rqst(struct svc_rqst *); 65static void svc_rdma_release_rqst(struct svc_rqst *);
66static void dto_tasklet_func(unsigned long data);
67static void svc_rdma_detach(struct svc_xprt *xprt); 66static void svc_rdma_detach(struct svc_xprt *xprt);
68static void svc_rdma_free(struct svc_xprt *xprt); 67static void svc_rdma_free(struct svc_xprt *xprt);
69static int svc_rdma_has_wspace(struct svc_xprt *xprt); 68static int svc_rdma_has_wspace(struct svc_xprt *xprt);
70static int svc_rdma_secure_port(struct svc_rqst *); 69static int svc_rdma_secure_port(struct svc_rqst *);
71static void rq_cq_reap(struct svcxprt_rdma *xprt);
72static void sq_cq_reap(struct svcxprt_rdma *xprt);
73
74static DECLARE_TASKLET(dto_tasklet, dto_tasklet_func, 0UL);
75static DEFINE_SPINLOCK(dto_lock);
76static LIST_HEAD(dto_xprt_q);
77 70
78static struct svc_xprt_ops svc_rdma_ops = { 71static struct svc_xprt_ops svc_rdma_ops = {
79 .xpo_create = svc_rdma_create, 72 .xpo_create = svc_rdma_create,
@@ -352,15 +345,6 @@ static void svc_rdma_destroy_maps(struct svcxprt_rdma *xprt)
352 } 345 }
353} 346}
354 347
355/* ib_cq event handler */
356static void cq_event_handler(struct ib_event *event, void *context)
357{
358 struct svc_xprt *xprt = context;
359 dprintk("svcrdma: received CQ event %s (%d), context=%p\n",
360 ib_event_msg(event->event), event->event, context);
361 set_bit(XPT_CLOSE, &xprt->xpt_flags);
362}
363
364/* QP event handler */ 348/* QP event handler */
365static void qp_event_handler(struct ib_event *event, void *context) 349static void qp_event_handler(struct ib_event *event, void *context)
366{ 350{
@@ -392,251 +376,171 @@ static void qp_event_handler(struct ib_event *event, void *context)
392 } 376 }
393} 377}
394 378
395/* 379/**
396 * Data Transfer Operation Tasklet 380 * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
381 * @cq: completion queue
382 * @wc: completed WR
397 * 383 *
398 * Walks a list of transports with I/O pending, removing entries as
399 * they are added to the server's I/O pending list. Two bits indicate
400 * if SQ, RQ, or both have I/O pending. The dto_lock is an irqsave
401 * spinlock that serializes access to the transport list with the RQ
402 * and SQ interrupt handlers.
403 */ 384 */
404static void dto_tasklet_func(unsigned long data) 385static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
405{ 386{
406 struct svcxprt_rdma *xprt; 387 struct svcxprt_rdma *xprt = cq->cq_context;
407 unsigned long flags; 388 struct ib_cqe *cqe = wc->wr_cqe;
389 struct svc_rdma_op_ctxt *ctxt;
408 390
409 spin_lock_irqsave(&dto_lock, flags); 391 /* WARNING: Only wc->wr_cqe and wc->status are reliable */
410 while (!list_empty(&dto_xprt_q)) { 392 ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
411 xprt = list_entry(dto_xprt_q.next, 393 ctxt->wc_status = wc->status;
412 struct svcxprt_rdma, sc_dto_q); 394 svc_rdma_unmap_dma(ctxt);
413 list_del_init(&xprt->sc_dto_q);
414 spin_unlock_irqrestore(&dto_lock, flags);
415 395
416 rq_cq_reap(xprt); 396 if (wc->status != IB_WC_SUCCESS)
417 sq_cq_reap(xprt); 397 goto flushed;
418 398
419 svc_xprt_put(&xprt->sc_xprt); 399 /* All wc fields are now known to be valid */
420 spin_lock_irqsave(&dto_lock, flags); 400 ctxt->byte_len = wc->byte_len;
421 } 401 spin_lock(&xprt->sc_rq_dto_lock);
422 spin_unlock_irqrestore(&dto_lock, flags); 402 list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q);
403 spin_unlock(&xprt->sc_rq_dto_lock);
404
405 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
406 if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
407 goto out;
408 svc_xprt_enqueue(&xprt->sc_xprt);
409 goto out;
410
411flushed:
412 if (wc->status != IB_WC_WR_FLUSH_ERR)
413 pr_warn("svcrdma: receive: %s (%u/0x%x)\n",
414 ib_wc_status_msg(wc->status),
415 wc->status, wc->vendor_err);
416 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
417 svc_rdma_put_context(ctxt, 1);
418
419out:
420 svc_xprt_put(&xprt->sc_xprt);
423} 421}
424 422
425/* 423static void svc_rdma_send_wc_common(struct svcxprt_rdma *xprt,
426 * Receive Queue Completion Handler 424 struct ib_wc *wc,
427 * 425 const char *opname)
428 * Since an RQ completion handler is called on interrupt context, we
429 * need to defer the handling of the I/O to a tasklet
430 */
431static void rq_comp_handler(struct ib_cq *cq, void *cq_context)
432{ 426{
433 struct svcxprt_rdma *xprt = cq_context; 427 if (wc->status != IB_WC_SUCCESS)
434 unsigned long flags; 428 goto err;
435
436 /* Guard against unconditional flush call for destroyed QP */
437 if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0)
438 return;
439 429
440 /* 430out:
441 * Set the bit regardless of whether or not it's on the list 431 atomic_dec(&xprt->sc_sq_count);
442 * because it may be on the list already due to an SQ 432 wake_up(&xprt->sc_send_wait);
443 * completion. 433 return;
444 */ 434
445 set_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags); 435err:
436 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
437 if (wc->status != IB_WC_WR_FLUSH_ERR)
438 pr_err("svcrdma: %s: %s (%u/0x%x)\n",
439 opname, ib_wc_status_msg(wc->status),
440 wc->status, wc->vendor_err);
441 goto out;
442}
446 443
447 /* 444static void svc_rdma_send_wc_common_put(struct ib_cq *cq, struct ib_wc *wc,
448 * If this transport is not already on the DTO transport queue, 445 const char *opname)
449 * add it 446{
450 */ 447 struct svcxprt_rdma *xprt = cq->cq_context;
451 spin_lock_irqsave(&dto_lock, flags);
452 if (list_empty(&xprt->sc_dto_q)) {
453 svc_xprt_get(&xprt->sc_xprt);
454 list_add_tail(&xprt->sc_dto_q, &dto_xprt_q);
455 }
456 spin_unlock_irqrestore(&dto_lock, flags);
457 448
458 /* Tasklet does all the work to avoid irqsave locks. */ 449 svc_rdma_send_wc_common(xprt, wc, opname);
459 tasklet_schedule(&dto_tasklet); 450 svc_xprt_put(&xprt->sc_xprt);
460} 451}
461 452
462/* 453/**
463 * rq_cq_reap - Process the RQ CQ. 454 * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC
464 * 455 * @cq: completion queue
465 * Take all completing WC off the CQE and enqueue the associated DTO 456 * @wc: completed WR
466 * context on the dto_q for the transport.
467 * 457 *
468 * Note that caller must hold a transport reference.
469 */ 458 */
470static void rq_cq_reap(struct svcxprt_rdma *xprt) 459void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
471{ 460{
472 int ret; 461 struct ib_cqe *cqe = wc->wr_cqe;
473 struct ib_wc wc; 462 struct svc_rdma_op_ctxt *ctxt;
474 struct svc_rdma_op_ctxt *ctxt = NULL;
475 463
476 if (!test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) 464 svc_rdma_send_wc_common_put(cq, wc, "send");
477 return;
478 465
479 ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP); 466 ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
480 atomic_inc(&rdma_stat_rq_poll); 467 svc_rdma_unmap_dma(ctxt);
468 svc_rdma_put_context(ctxt, 1);
469}
481 470
482 while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) { 471/**
483 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; 472 * svc_rdma_wc_write - Invoked by RDMA provider for each polled Write WC
484 ctxt->wc_status = wc.status; 473 * @cq: completion queue
485 ctxt->byte_len = wc.byte_len; 474 * @wc: completed WR
486 svc_rdma_unmap_dma(ctxt); 475 *
487 if (wc.status != IB_WC_SUCCESS) { 476 */
488 /* Close the transport */ 477void svc_rdma_wc_write(struct ib_cq *cq, struct ib_wc *wc)
489 dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt); 478{
490 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 479 struct ib_cqe *cqe = wc->wr_cqe;
491 svc_rdma_put_context(ctxt, 1); 480 struct svc_rdma_op_ctxt *ctxt;
492 svc_xprt_put(&xprt->sc_xprt);
493 continue;
494 }
495 spin_lock_bh(&xprt->sc_rq_dto_lock);
496 list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q);
497 spin_unlock_bh(&xprt->sc_rq_dto_lock);
498 svc_xprt_put(&xprt->sc_xprt);
499 }
500 481
501 if (ctxt) 482 svc_rdma_send_wc_common_put(cq, wc, "write");
502 atomic_inc(&rdma_stat_rq_prod);
503 483
504 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); 484 ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
505 /* 485 svc_rdma_unmap_dma(ctxt);
506 * If data arrived before established event, 486 svc_rdma_put_context(ctxt, 0);
507 * don't enqueue. This defers RPC I/O until the
508 * RDMA connection is complete.
509 */
510 if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
511 svc_xprt_enqueue(&xprt->sc_xprt);
512} 487}
513 488
514/* 489/**
515 * Process a completion context 490 * svc_rdma_wc_reg - Invoked by RDMA provider for each polled FASTREG WC
491 * @cq: completion queue
492 * @wc: completed WR
493 *
516 */ 494 */
517static void process_context(struct svcxprt_rdma *xprt, 495void svc_rdma_wc_reg(struct ib_cq *cq, struct ib_wc *wc)
518 struct svc_rdma_op_ctxt *ctxt)
519{ 496{
520 struct svc_rdma_op_ctxt *read_hdr; 497 svc_rdma_send_wc_common_put(cq, wc, "fastreg");
521 int free_pages = 0; 498}
522
523 svc_rdma_unmap_dma(ctxt);
524 499
525 switch (ctxt->wr_op) { 500/**
526 case IB_WR_SEND: 501 * svc_rdma_wc_read - Invoked by RDMA provider for each polled Read WC
527 free_pages = 1; 502 * @cq: completion queue
528 break; 503 * @wc: completed WR
504 *
505 */
506void svc_rdma_wc_read(struct ib_cq *cq, struct ib_wc *wc)
507{
508 struct svcxprt_rdma *xprt = cq->cq_context;
509 struct ib_cqe *cqe = wc->wr_cqe;
510 struct svc_rdma_op_ctxt *ctxt;
529 511
530 case IB_WR_RDMA_WRITE: 512 svc_rdma_send_wc_common(xprt, wc, "read");
531 break;
532 513
533 case IB_WR_RDMA_READ: 514 ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
534 case IB_WR_RDMA_READ_WITH_INV: 515 svc_rdma_unmap_dma(ctxt);
535 svc_rdma_put_frmr(xprt, ctxt->frmr); 516 svc_rdma_put_frmr(xprt, ctxt->frmr);
536 517
537 if (!test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) 518 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
538 break; 519 struct svc_rdma_op_ctxt *read_hdr;
539 520
540 read_hdr = ctxt->read_hdr; 521 read_hdr = ctxt->read_hdr;
541 svc_rdma_put_context(ctxt, 0); 522 spin_lock(&xprt->sc_rq_dto_lock);
542
543 spin_lock_bh(&xprt->sc_rq_dto_lock);
544 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
545 list_add_tail(&read_hdr->dto_q, 523 list_add_tail(&read_hdr->dto_q,
546 &xprt->sc_read_complete_q); 524 &xprt->sc_read_complete_q);
547 spin_unlock_bh(&xprt->sc_rq_dto_lock); 525 spin_unlock(&xprt->sc_rq_dto_lock);
548 svc_xprt_enqueue(&xprt->sc_xprt);
549 return;
550 526
551 default: 527 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
552 dprintk("svcrdma: unexpected completion opcode=%d\n", 528 svc_xprt_enqueue(&xprt->sc_xprt);
553 ctxt->wr_op);
554 break;
555 } 529 }
556 530
557 svc_rdma_put_context(ctxt, free_pages); 531 svc_rdma_put_context(ctxt, 0);
532 svc_xprt_put(&xprt->sc_xprt);
558} 533}
559 534
560/* 535/**
561 * Send Queue Completion Handler - potentially called on interrupt context. 536 * svc_rdma_wc_inv - Invoked by RDMA provider for each polled LOCAL_INV WC
537 * @cq: completion queue
538 * @wc: completed WR
562 * 539 *
563 * Note that caller must hold a transport reference.
564 */ 540 */
565static void sq_cq_reap(struct svcxprt_rdma *xprt) 541void svc_rdma_wc_inv(struct ib_cq *cq, struct ib_wc *wc)
566{
567 struct svc_rdma_op_ctxt *ctxt = NULL;
568 struct ib_wc wc_a[6];
569 struct ib_wc *wc;
570 struct ib_cq *cq = xprt->sc_sq_cq;
571 int ret;
572
573 memset(wc_a, 0, sizeof(wc_a));
574
575 if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
576 return;
577
578 ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
579 atomic_inc(&rdma_stat_sq_poll);
580 while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) {
581 int i;
582
583 for (i = 0; i < ret; i++) {
584 wc = &wc_a[i];
585 if (wc->status != IB_WC_SUCCESS) {
586 dprintk("svcrdma: sq wc err status %s (%d)\n",
587 ib_wc_status_msg(wc->status),
588 wc->status);
589
590 /* Close the transport */
591 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
592 }
593
594 /* Decrement used SQ WR count */
595 atomic_dec(&xprt->sc_sq_count);
596 wake_up(&xprt->sc_send_wait);
597
598 ctxt = (struct svc_rdma_op_ctxt *)
599 (unsigned long)wc->wr_id;
600 if (ctxt)
601 process_context(xprt, ctxt);
602
603 svc_xprt_put(&xprt->sc_xprt);
604 }
605 }
606
607 if (ctxt)
608 atomic_inc(&rdma_stat_sq_prod);
609}
610
611static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
612{ 542{
613 struct svcxprt_rdma *xprt = cq_context; 543 svc_rdma_send_wc_common_put(cq, wc, "localInv");
614 unsigned long flags;
615
616 /* Guard against unconditional flush call for destroyed QP */
617 if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0)
618 return;
619
620 /*
621 * Set the bit regardless of whether or not it's on the list
622 * because it may be on the list already due to an RQ
623 * completion.
624 */
625 set_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags);
626
627 /*
628 * If this transport is not already on the DTO transport queue,
629 * add it
630 */
631 spin_lock_irqsave(&dto_lock, flags);
632 if (list_empty(&xprt->sc_dto_q)) {
633 svc_xprt_get(&xprt->sc_xprt);
634 list_add_tail(&xprt->sc_dto_q, &dto_xprt_q);
635 }
636 spin_unlock_irqrestore(&dto_lock, flags);
637
638 /* Tasklet does all the work to avoid irqsave locks. */
639 tasklet_schedule(&dto_tasklet);
640} 544}
641 545
642static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, 546static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
@@ -681,6 +585,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
681 ctxt = svc_rdma_get_context(xprt); 585 ctxt = svc_rdma_get_context(xprt);
682 buflen = 0; 586 buflen = 0;
683 ctxt->direction = DMA_FROM_DEVICE; 587 ctxt->direction = DMA_FROM_DEVICE;
588 ctxt->cqe.done = svc_rdma_wc_receive;
684 for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) { 589 for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) {
685 if (sge_no >= xprt->sc_max_sge) { 590 if (sge_no >= xprt->sc_max_sge) {
686 pr_err("svcrdma: Too many sges (%d)\n", sge_no); 591 pr_err("svcrdma: Too many sges (%d)\n", sge_no);
@@ -705,7 +610,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
705 recv_wr.next = NULL; 610 recv_wr.next = NULL;
706 recv_wr.sg_list = &ctxt->sge[0]; 611 recv_wr.sg_list = &ctxt->sge[0];
707 recv_wr.num_sge = ctxt->count; 612 recv_wr.num_sge = ctxt->count;
708 recv_wr.wr_id = (u64)(unsigned long)ctxt; 613 recv_wr.wr_cqe = &ctxt->cqe;
709 614
710 svc_xprt_get(&xprt->sc_xprt); 615 svc_xprt_get(&xprt->sc_xprt);
711 ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); 616 ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
@@ -722,6 +627,21 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
722 return -ENOMEM; 627 return -ENOMEM;
723} 628}
724 629
630int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags)
631{
632 int ret = 0;
633
634 ret = svc_rdma_post_recv(xprt, flags);
635 if (ret) {
636 pr_err("svcrdma: could not post a receive buffer, err=%d.\n",
637 ret);
638 pr_err("svcrdma: closing transport %p.\n", xprt);
639 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
640 ret = -ENOTCONN;
641 }
642 return ret;
643}
644
725/* 645/*
726 * This function handles the CONNECT_REQUEST event on a listening 646 * This function handles the CONNECT_REQUEST event on a listening
727 * endpoint. It is passed the cma_id for the _new_ connection. The context in 647 * endpoint. It is passed the cma_id for the _new_ connection. The context in
@@ -1011,7 +931,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
1011 struct svcxprt_rdma *listen_rdma; 931 struct svcxprt_rdma *listen_rdma;
1012 struct svcxprt_rdma *newxprt = NULL; 932 struct svcxprt_rdma *newxprt = NULL;
1013 struct rdma_conn_param conn_param; 933 struct rdma_conn_param conn_param;
1014 struct ib_cq_init_attr cq_attr = {};
1015 struct ib_qp_init_attr qp_attr; 934 struct ib_qp_init_attr qp_attr;
1016 struct ib_device *dev; 935 struct ib_device *dev;
1017 unsigned int i; 936 unsigned int i;
@@ -1069,22 +988,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
1069 dprintk("svcrdma: error creating PD for connect request\n"); 988 dprintk("svcrdma: error creating PD for connect request\n");
1070 goto errout; 989 goto errout;
1071 } 990 }
1072 cq_attr.cqe = newxprt->sc_sq_depth; 991 newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth,
1073 newxprt->sc_sq_cq = ib_create_cq(dev, 992 0, IB_POLL_SOFTIRQ);
1074 sq_comp_handler,
1075 cq_event_handler,
1076 newxprt,
1077 &cq_attr);
1078 if (IS_ERR(newxprt->sc_sq_cq)) { 993 if (IS_ERR(newxprt->sc_sq_cq)) {
1079 dprintk("svcrdma: error creating SQ CQ for connect request\n"); 994 dprintk("svcrdma: error creating SQ CQ for connect request\n");
1080 goto errout; 995 goto errout;
1081 } 996 }
1082 cq_attr.cqe = newxprt->sc_rq_depth; 997 newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_rq_depth,
1083 newxprt->sc_rq_cq = ib_create_cq(dev, 998 0, IB_POLL_SOFTIRQ);
1084 rq_comp_handler,
1085 cq_event_handler,
1086 newxprt,
1087 &cq_attr);
1088 if (IS_ERR(newxprt->sc_rq_cq)) { 999 if (IS_ERR(newxprt->sc_rq_cq)) {
1089 dprintk("svcrdma: error creating RQ CQ for connect request\n"); 1000 dprintk("svcrdma: error creating RQ CQ for connect request\n");
1090 goto errout; 1001 goto errout;
@@ -1173,13 +1084,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
1173 /* Swap out the handler */ 1084 /* Swap out the handler */
1174 newxprt->sc_cm_id->event_handler = rdma_cma_handler; 1085 newxprt->sc_cm_id->event_handler = rdma_cma_handler;
1175 1086
1176 /*
1177 * Arm the CQs for the SQ and RQ before accepting so we can't
1178 * miss the first message
1179 */
1180 ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP);
1181 ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP);
1182
1183 /* Accept Connection */ 1087 /* Accept Connection */
1184 set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); 1088 set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
1185 memset(&conn_param, 0, sizeof conn_param); 1089 memset(&conn_param, 0, sizeof conn_param);
@@ -1319,10 +1223,10 @@ static void __svc_rdma_free(struct work_struct *work)
1319 ib_destroy_qp(rdma->sc_qp); 1223 ib_destroy_qp(rdma->sc_qp);
1320 1224
1321 if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq)) 1225 if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq))
1322 ib_destroy_cq(rdma->sc_sq_cq); 1226 ib_free_cq(rdma->sc_sq_cq);
1323 1227
1324 if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq)) 1228 if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
1325 ib_destroy_cq(rdma->sc_rq_cq); 1229 ib_free_cq(rdma->sc_rq_cq);
1326 1230
1327 if (rdma->sc_pd && !IS_ERR(rdma->sc_pd)) 1231 if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
1328 ib_dealloc_pd(rdma->sc_pd); 1232 ib_dealloc_pd(rdma->sc_pd);
@@ -1383,9 +1287,6 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1383 spin_unlock_bh(&xprt->sc_lock); 1287 spin_unlock_bh(&xprt->sc_lock);
1384 atomic_inc(&rdma_stat_sq_starve); 1288 atomic_inc(&rdma_stat_sq_starve);
1385 1289
1386 /* See if we can opportunistically reap SQ WR to make room */
1387 sq_cq_reap(xprt);
1388
1389 /* Wait until SQ WR available if SQ still full */ 1290 /* Wait until SQ WR available if SQ still full */
1390 wait_event(xprt->sc_send_wait, 1291 wait_event(xprt->sc_send_wait,
1391 atomic_read(&xprt->sc_sq_count) < 1292 atomic_read(&xprt->sc_sq_count) <
@@ -1418,57 +1319,3 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1418 } 1319 }
1419 return ret; 1320 return ret;
1420} 1321}
1421
1422void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1423 enum rpcrdma_errcode err)
1424{
1425 struct ib_send_wr err_wr;
1426 struct page *p;
1427 struct svc_rdma_op_ctxt *ctxt;
1428 __be32 *va;
1429 int length;
1430 int ret;
1431
1432 p = alloc_page(GFP_KERNEL);
1433 if (!p)
1434 return;
1435 va = page_address(p);
1436
1437 /* XDR encode error */
1438 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
1439
1440 ctxt = svc_rdma_get_context(xprt);
1441 ctxt->direction = DMA_FROM_DEVICE;
1442 ctxt->count = 1;
1443 ctxt->pages[0] = p;
1444
1445 /* Prepare SGE for local address */
1446 ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device,
1447 p, 0, length, DMA_FROM_DEVICE);
1448 if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
1449 put_page(p);
1450 svc_rdma_put_context(ctxt, 1);
1451 return;
1452 }
1453 atomic_inc(&xprt->sc_dma_used);
1454 ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey;
1455 ctxt->sge[0].length = length;
1456
1457 /* Prepare SEND WR */
1458 memset(&err_wr, 0, sizeof err_wr);
1459 ctxt->wr_op = IB_WR_SEND;
1460 err_wr.wr_id = (unsigned long)ctxt;
1461 err_wr.sg_list = ctxt->sge;
1462 err_wr.num_sge = 1;
1463 err_wr.opcode = IB_WR_SEND;
1464 err_wr.send_flags = IB_SEND_SIGNALED;
1465
1466 /* Post It */
1467 ret = svc_rdma_send(xprt, &err_wr);
1468 if (ret) {
1469 dprintk("svcrdma: Error %d posting send for protocol error\n",
1470 ret);
1471 svc_rdma_unmap_dma(ctxt);
1472 svc_rdma_put_context(ctxt, 1);
1473 }
1474}
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 878f1bfb1db9..f5ed9f982cd7 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -112,89 +112,65 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
112 } 112 }
113} 113}
114 114
115/**
116 * rpcrdma_wc_send - Invoked by RDMA provider for each polled Send WC
117 * @cq: completion queue (ignored)
118 * @wc: completed WR
119 *
120 */
115static void 121static void
116rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) 122rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
117{ 123{
118 struct rpcrdma_ep *ep = context; 124 /* WARNING: Only wr_cqe and status are reliable at this point */
119 125 if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
120 pr_err("RPC: %s: %s on device %s ep %p\n", 126 pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
121 __func__, ib_event_msg(event->event), 127 ib_wc_status_msg(wc->status),
122 event->device->name, context); 128 wc->status, wc->vendor_err);
123 if (ep->rep_connected == 1) {
124 ep->rep_connected = -EIO;
125 rpcrdma_conn_func(ep);
126 wake_up_all(&ep->rep_connect_wait);
127 }
128} 129}
129 130
130static void 131static void
131rpcrdma_sendcq_process_wc(struct ib_wc *wc) 132rpcrdma_receive_worker(struct work_struct *work)
132{ 133{
133 /* WARNING: Only wr_id and status are reliable at this point */ 134 struct rpcrdma_rep *rep =
134 if (wc->wr_id == RPCRDMA_IGNORE_COMPLETION) { 135 container_of(work, struct rpcrdma_rep, rr_work);
135 if (wc->status != IB_WC_SUCCESS &&
136 wc->status != IB_WC_WR_FLUSH_ERR)
137 pr_err("RPC: %s: SEND: %s\n",
138 __func__, ib_wc_status_msg(wc->status));
139 } else {
140 struct rpcrdma_mw *r;
141 136
142 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; 137 rpcrdma_reply_handler(rep);
143 r->mw_sendcompletion(wc);
144 }
145} 138}
146 139
147/* The common case is a single send completion is waiting. By 140/* Perform basic sanity checking to avoid using garbage
148 * passing two WC entries to ib_poll_cq, a return code of 1 141 * to update the credit grant value.
149 * means there is exactly one WC waiting and no more. We don't
150 * have to invoke ib_poll_cq again to know that the CQ has been
151 * properly drained.
152 */ 142 */
153static void 143static void
154rpcrdma_sendcq_poll(struct ib_cq *cq) 144rpcrdma_update_granted_credits(struct rpcrdma_rep *rep)
155{ 145{
156 struct ib_wc *pos, wcs[2]; 146 struct rpcrdma_msg *rmsgp = rdmab_to_msg(rep->rr_rdmabuf);
157 int count, rc; 147 struct rpcrdma_buffer *buffer = &rep->rr_rxprt->rx_buf;
148 u32 credits;
158 149
159 do { 150 if (rep->rr_len < RPCRDMA_HDRLEN_ERR)
160 pos = wcs; 151 return;
161 152
162 rc = ib_poll_cq(cq, ARRAY_SIZE(wcs), pos); 153 credits = be32_to_cpu(rmsgp->rm_credit);
163 if (rc < 0) 154 if (credits == 0)
164 break; 155 credits = 1; /* don't deadlock */
156 else if (credits > buffer->rb_max_requests)
157 credits = buffer->rb_max_requests;
165 158
166 count = rc; 159 atomic_set(&buffer->rb_credits, credits);
167 while (count-- > 0)
168 rpcrdma_sendcq_process_wc(pos++);
169 } while (rc == ARRAY_SIZE(wcs));
170 return;
171} 160}
172 161
173/* Handle provider send completion upcalls. 162/**
163 * rpcrdma_receive_wc - Invoked by RDMA provider for each polled Receive WC
164 * @cq: completion queue (ignored)
165 * @wc: completed WR
166 *
174 */ 167 */
175static void 168static void
176rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context) 169rpcrdma_receive_wc(struct ib_cq *cq, struct ib_wc *wc)
177{ 170{
178 do { 171 struct ib_cqe *cqe = wc->wr_cqe;
179 rpcrdma_sendcq_poll(cq); 172 struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep,
180 } while (ib_req_notify_cq(cq, IB_CQ_NEXT_COMP | 173 rr_cqe);
181 IB_CQ_REPORT_MISSED_EVENTS) > 0);
182}
183
184static void
185rpcrdma_receive_worker(struct work_struct *work)
186{
187 struct rpcrdma_rep *rep =
188 container_of(work, struct rpcrdma_rep, rr_work);
189
190 rpcrdma_reply_handler(rep);
191}
192
193static void
194rpcrdma_recvcq_process_wc(struct ib_wc *wc)
195{
196 struct rpcrdma_rep *rep =
197 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
198 174
199 /* WARNING: Only wr_id and status are reliable at this point */ 175 /* WARNING: Only wr_id and status are reliable at this point */
200 if (wc->status != IB_WC_SUCCESS) 176 if (wc->status != IB_WC_SUCCESS)
@@ -211,7 +187,8 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc)
211 ib_dma_sync_single_for_cpu(rep->rr_device, 187 ib_dma_sync_single_for_cpu(rep->rr_device,
212 rdmab_addr(rep->rr_rdmabuf), 188 rdmab_addr(rep->rr_rdmabuf),
213 rep->rr_len, DMA_FROM_DEVICE); 189 rep->rr_len, DMA_FROM_DEVICE);
214 prefetch(rdmab_to_msg(rep->rr_rdmabuf)); 190
191 rpcrdma_update_granted_credits(rep);
215 192
216out_schedule: 193out_schedule:
217 queue_work(rpcrdma_receive_wq, &rep->rr_work); 194 queue_work(rpcrdma_receive_wq, &rep->rr_work);
@@ -219,57 +196,20 @@ out_schedule:
219 196
220out_fail: 197out_fail:
221 if (wc->status != IB_WC_WR_FLUSH_ERR) 198 if (wc->status != IB_WC_WR_FLUSH_ERR)
222 pr_err("RPC: %s: rep %p: %s\n", 199 pr_err("rpcrdma: Recv: %s (%u/0x%x)\n",
223 __func__, rep, ib_wc_status_msg(wc->status)); 200 ib_wc_status_msg(wc->status),
201 wc->status, wc->vendor_err);
224 rep->rr_len = RPCRDMA_BAD_LEN; 202 rep->rr_len = RPCRDMA_BAD_LEN;
225 goto out_schedule; 203 goto out_schedule;
226} 204}
227 205
228/* The wc array is on stack: automatic memory is always CPU-local.
229 *
230 * struct ib_wc is 64 bytes, making the poll array potentially
231 * large. But this is at the bottom of the call chain. Further
232 * substantial work is done in another thread.
233 */
234static void
235rpcrdma_recvcq_poll(struct ib_cq *cq)
236{
237 struct ib_wc *pos, wcs[4];
238 int count, rc;
239
240 do {
241 pos = wcs;
242
243 rc = ib_poll_cq(cq, ARRAY_SIZE(wcs), pos);
244 if (rc < 0)
245 break;
246
247 count = rc;
248 while (count-- > 0)
249 rpcrdma_recvcq_process_wc(pos++);
250 } while (rc == ARRAY_SIZE(wcs));
251}
252
253/* Handle provider receive completion upcalls.
254 */
255static void
256rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
257{
258 do {
259 rpcrdma_recvcq_poll(cq);
260 } while (ib_req_notify_cq(cq, IB_CQ_NEXT_COMP |
261 IB_CQ_REPORT_MISSED_EVENTS) > 0);
262}
263
264static void 206static void
265rpcrdma_flush_cqs(struct rpcrdma_ep *ep) 207rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
266{ 208{
267 struct ib_wc wc; 209 struct ib_wc wc;
268 210
269 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0) 211 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
270 rpcrdma_recvcq_process_wc(&wc); 212 rpcrdma_receive_wc(NULL, &wc);
271 while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
272 rpcrdma_sendcq_process_wc(&wc);
273} 213}
274 214
275static int 215static int
@@ -330,6 +270,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
330connected: 270connected:
331 dprintk("RPC: %s: %sconnected\n", 271 dprintk("RPC: %s: %sconnected\n",
332 __func__, connstate > 0 ? "" : "dis"); 272 __func__, connstate > 0 ? "" : "dis");
273 atomic_set(&xprt->rx_buf.rb_credits, 1);
333 ep->rep_connected = connstate; 274 ep->rep_connected = connstate;
334 rpcrdma_conn_func(ep); 275 rpcrdma_conn_func(ep);
335 wake_up_all(&ep->rep_connect_wait); 276 wake_up_all(&ep->rep_connect_wait);
@@ -560,9 +501,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
560 struct rpcrdma_create_data_internal *cdata) 501 struct rpcrdma_create_data_internal *cdata)
561{ 502{
562 struct ib_cq *sendcq, *recvcq; 503 struct ib_cq *sendcq, *recvcq;
563 struct ib_cq_init_attr cq_attr = {};
564 unsigned int max_qp_wr; 504 unsigned int max_qp_wr;
565 int rc, err; 505 int rc;
566 506
567 if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) { 507 if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) {
568 dprintk("RPC: %s: insufficient sge's available\n", 508 dprintk("RPC: %s: insufficient sge's available\n",
@@ -614,9 +554,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
614 init_waitqueue_head(&ep->rep_connect_wait); 554 init_waitqueue_head(&ep->rep_connect_wait);
615 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); 555 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
616 556
617 cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1; 557 sendcq = ib_alloc_cq(ia->ri_device, NULL,
618 sendcq = ib_create_cq(ia->ri_device, rpcrdma_sendcq_upcall, 558 ep->rep_attr.cap.max_send_wr + 1,
619 rpcrdma_cq_async_error_upcall, NULL, &cq_attr); 559 0, IB_POLL_SOFTIRQ);
620 if (IS_ERR(sendcq)) { 560 if (IS_ERR(sendcq)) {
621 rc = PTR_ERR(sendcq); 561 rc = PTR_ERR(sendcq);
622 dprintk("RPC: %s: failed to create send CQ: %i\n", 562 dprintk("RPC: %s: failed to create send CQ: %i\n",
@@ -624,16 +564,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
624 goto out1; 564 goto out1;
625 } 565 }
626 566
627 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP); 567 recvcq = ib_alloc_cq(ia->ri_device, NULL,
628 if (rc) { 568 ep->rep_attr.cap.max_recv_wr + 1,
629 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", 569 0, IB_POLL_SOFTIRQ);
630 __func__, rc);
631 goto out2;
632 }
633
634 cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1;
635 recvcq = ib_create_cq(ia->ri_device, rpcrdma_recvcq_upcall,
636 rpcrdma_cq_async_error_upcall, NULL, &cq_attr);
637 if (IS_ERR(recvcq)) { 570 if (IS_ERR(recvcq)) {
638 rc = PTR_ERR(recvcq); 571 rc = PTR_ERR(recvcq);
639 dprintk("RPC: %s: failed to create recv CQ: %i\n", 572 dprintk("RPC: %s: failed to create recv CQ: %i\n",
@@ -641,14 +574,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
641 goto out2; 574 goto out2;
642 } 575 }
643 576
644 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
645 if (rc) {
646 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
647 __func__, rc);
648 ib_destroy_cq(recvcq);
649 goto out2;
650 }
651
652 ep->rep_attr.send_cq = sendcq; 577 ep->rep_attr.send_cq = sendcq;
653 ep->rep_attr.recv_cq = recvcq; 578 ep->rep_attr.recv_cq = recvcq;
654 579
@@ -673,10 +598,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
673 return 0; 598 return 0;
674 599
675out2: 600out2:
676 err = ib_destroy_cq(sendcq); 601 ib_free_cq(sendcq);
677 if (err)
678 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
679 __func__, err);
680out1: 602out1:
681 if (ia->ri_dma_mr) 603 if (ia->ri_dma_mr)
682 ib_dereg_mr(ia->ri_dma_mr); 604 ib_dereg_mr(ia->ri_dma_mr);
@@ -711,15 +633,8 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
711 ia->ri_id->qp = NULL; 633 ia->ri_id->qp = NULL;
712 } 634 }
713 635
714 rc = ib_destroy_cq(ep->rep_attr.recv_cq); 636 ib_free_cq(ep->rep_attr.recv_cq);
715 if (rc) 637 ib_free_cq(ep->rep_attr.send_cq);
716 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
717 __func__, rc);
718
719 rc = ib_destroy_cq(ep->rep_attr.send_cq);
720 if (rc)
721 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
722 __func__, rc);
723 638
724 if (ia->ri_dma_mr) { 639 if (ia->ri_dma_mr) {
725 rc = ib_dereg_mr(ia->ri_dma_mr); 640 rc = ib_dereg_mr(ia->ri_dma_mr);
@@ -898,6 +813,7 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
898 spin_lock(&buffer->rb_reqslock); 813 spin_lock(&buffer->rb_reqslock);
899 list_add(&req->rl_all, &buffer->rb_allreqs); 814 list_add(&req->rl_all, &buffer->rb_allreqs);
900 spin_unlock(&buffer->rb_reqslock); 815 spin_unlock(&buffer->rb_reqslock);
816 req->rl_cqe.done = rpcrdma_wc_send;
901 req->rl_buffer = &r_xprt->rx_buf; 817 req->rl_buffer = &r_xprt->rx_buf;
902 return req; 818 return req;
903} 819}
@@ -923,6 +839,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
923 } 839 }
924 840
925 rep->rr_device = ia->ri_device; 841 rep->rr_device = ia->ri_device;
842 rep->rr_cqe.done = rpcrdma_receive_wc;
926 rep->rr_rxprt = r_xprt; 843 rep->rr_rxprt = r_xprt;
927 INIT_WORK(&rep->rr_work, rpcrdma_receive_worker); 844 INIT_WORK(&rep->rr_work, rpcrdma_receive_worker);
928 return rep; 845 return rep;
@@ -943,6 +860,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
943 buf->rb_max_requests = r_xprt->rx_data.max_requests; 860 buf->rb_max_requests = r_xprt->rx_data.max_requests;
944 buf->rb_bc_srv_max_requests = 0; 861 buf->rb_bc_srv_max_requests = 0;
945 spin_lock_init(&buf->rb_lock); 862 spin_lock_init(&buf->rb_lock);
863 atomic_set(&buf->rb_credits, 1);
946 864
947 rc = ia->ri_ops->ro_init(r_xprt); 865 rc = ia->ri_ops->ro_init(r_xprt);
948 if (rc) 866 if (rc)
@@ -1259,7 +1177,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
1259 } 1177 }
1260 1178
1261 send_wr.next = NULL; 1179 send_wr.next = NULL;
1262 send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION; 1180 send_wr.wr_cqe = &req->rl_cqe;
1263 send_wr.sg_list = iov; 1181 send_wr.sg_list = iov;
1264 send_wr.num_sge = req->rl_niovs; 1182 send_wr.num_sge = req->rl_niovs;
1265 send_wr.opcode = IB_WR_SEND; 1183 send_wr.opcode = IB_WR_SEND;
@@ -1297,7 +1215,7 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1297 int rc; 1215 int rc;
1298 1216
1299 recv_wr.next = NULL; 1217 recv_wr.next = NULL;
1300 recv_wr.wr_id = (u64) (unsigned long) rep; 1218 recv_wr.wr_cqe = &rep->rr_cqe;
1301 recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; 1219 recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
1302 recv_wr.num_sge = 1; 1220 recv_wr.num_sge = 1;
1303 1221
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 38fe11b09875..2ebc743cb96f 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -95,10 +95,6 @@ struct rpcrdma_ep {
95#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) 95#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
96#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) 96#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
97 97
98/* Force completion handler to ignore the signal
99 */
100#define RPCRDMA_IGNORE_COMPLETION (0ULL)
101
102/* Pre-allocate extra Work Requests for handling backward receives 98/* Pre-allocate extra Work Requests for handling backward receives
103 * and sends. This is a fixed value because the Work Queues are 99 * and sends. This is a fixed value because the Work Queues are
104 * allocated when the forward channel is set up. 100 * allocated when the forward channel is set up.
@@ -171,6 +167,7 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
171struct rpcrdma_buffer; 167struct rpcrdma_buffer;
172 168
173struct rpcrdma_rep { 169struct rpcrdma_rep {
170 struct ib_cqe rr_cqe;
174 unsigned int rr_len; 171 unsigned int rr_len;
175 struct ib_device *rr_device; 172 struct ib_device *rr_device;
176 struct rpcrdma_xprt *rr_rxprt; 173 struct rpcrdma_xprt *rr_rxprt;
@@ -204,11 +201,11 @@ struct rpcrdma_frmr {
204 struct scatterlist *sg; 201 struct scatterlist *sg;
205 int sg_nents; 202 int sg_nents;
206 struct ib_mr *fr_mr; 203 struct ib_mr *fr_mr;
204 struct ib_cqe fr_cqe;
207 enum rpcrdma_frmr_state fr_state; 205 enum rpcrdma_frmr_state fr_state;
206 struct completion fr_linv_done;
208 struct work_struct fr_work; 207 struct work_struct fr_work;
209 struct rpcrdma_xprt *fr_xprt; 208 struct rpcrdma_xprt *fr_xprt;
210 bool fr_waiter;
211 struct completion fr_linv_done;;
212 union { 209 union {
213 struct ib_reg_wr fr_regwr; 210 struct ib_reg_wr fr_regwr;
214 struct ib_send_wr fr_invwr; 211 struct ib_send_wr fr_invwr;
@@ -224,8 +221,7 @@ struct rpcrdma_mw {
224 union { 221 union {
225 struct rpcrdma_fmr fmr; 222 struct rpcrdma_fmr fmr;
226 struct rpcrdma_frmr frmr; 223 struct rpcrdma_frmr frmr;
227 } r; 224 };
228 void (*mw_sendcompletion)(struct ib_wc *);
229 struct list_head mw_list; 225 struct list_head mw_list;
230 struct list_head mw_all; 226 struct list_head mw_all;
231}; 227};
@@ -281,6 +277,7 @@ struct rpcrdma_req {
281 struct rpcrdma_regbuf *rl_sendbuf; 277 struct rpcrdma_regbuf *rl_sendbuf;
282 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; 278 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
283 279
280 struct ib_cqe rl_cqe;
284 struct list_head rl_all; 281 struct list_head rl_all;
285 bool rl_backchannel; 282 bool rl_backchannel;
286}; 283};
@@ -311,6 +308,7 @@ struct rpcrdma_buffer {
311 struct list_head rb_send_bufs; 308 struct list_head rb_send_bufs;
312 struct list_head rb_recv_bufs; 309 struct list_head rb_recv_bufs;
313 u32 rb_max_requests; 310 u32 rb_max_requests;
311 atomic_t rb_credits; /* most recent credit grant */
314 312
315 u32 rb_bc_srv_max_requests; 313 u32 rb_bc_srv_max_requests;
316 spinlock_t rb_reqslock; /* protect rb_allreqs */ 314 spinlock_t rb_reqslock; /* protect rb_allreqs */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index fde2138b81e7..65e759569e48 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1844,9 +1844,7 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
1844 */ 1844 */
1845static void xs_local_rpcbind(struct rpc_task *task) 1845static void xs_local_rpcbind(struct rpc_task *task)
1846{ 1846{
1847 rcu_read_lock(); 1847 xprt_set_bound(task->tk_xprt);
1848 xprt_set_bound(rcu_dereference(task->tk_client->cl_xprt));
1849 rcu_read_unlock();
1850} 1848}
1851 1849
1852static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port) 1850static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port)
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 8b5833c1ff2e..b7e01d88bdc5 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -305,6 +305,8 @@ static void switchdev_port_attr_set_deferred(struct net_device *dev,
305 if (err && err != -EOPNOTSUPP) 305 if (err && err != -EOPNOTSUPP)
306 netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n", 306 netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n",
307 err, attr->id); 307 err, attr->id);
308 if (attr->complete)
309 attr->complete(dev, err, attr->complete_priv);
308} 310}
309 311
310static int switchdev_port_attr_set_defer(struct net_device *dev, 312static int switchdev_port_attr_set_defer(struct net_device *dev,
@@ -434,6 +436,8 @@ static void switchdev_port_obj_add_deferred(struct net_device *dev,
434 if (err && err != -EOPNOTSUPP) 436 if (err && err != -EOPNOTSUPP)
435 netdev_err(dev, "failed (err=%d) to add object (id=%d)\n", 437 netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
436 err, obj->id); 438 err, obj->id);
439 if (obj->complete)
440 obj->complete(dev, err, obj->complete_priv);
437} 441}
438 442
439static int switchdev_port_obj_add_defer(struct net_device *dev, 443static int switchdev_port_obj_add_defer(struct net_device *dev,
@@ -502,6 +506,8 @@ static void switchdev_port_obj_del_deferred(struct net_device *dev,
502 if (err && err != -EOPNOTSUPP) 506 if (err && err != -EOPNOTSUPP)
503 netdev_err(dev, "failed (err=%d) to del object (id=%d)\n", 507 netdev_err(dev, "failed (err=%d) to del object (id=%d)\n",
504 err, obj->id); 508 err, obj->id);
509 if (obj->complete)
510 obj->complete(dev, err, obj->complete_priv);
505} 511}
506 512
507static int switchdev_port_obj_del_defer(struct net_device *dev, 513static int switchdev_port_obj_del_defer(struct net_device *dev,
@@ -1079,7 +1085,7 @@ nla_put_failure:
1079 * @filter_dev: filter device 1085 * @filter_dev: filter device
1080 * @idx: 1086 * @idx:
1081 * 1087 *
1082 * Delete FDB entry from switch device. 1088 * Dump FDB entries from switch device.
1083 */ 1089 */
1084int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, 1090int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
1085 struct net_device *dev, 1091 struct net_device *dev,
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index e401108360a2..ae469b37d852 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -412,11 +412,6 @@ enomem:
412 return -ENOMEM; 412 return -ENOMEM;
413} 413}
414 414
415void tipc_bcast_reinit(struct net *net)
416{
417 tipc_link_reinit(tipc_bc_sndlink(net), tipc_own_addr(net));
418}
419
420void tipc_bcast_stop(struct net *net) 415void tipc_bcast_stop(struct net *net)
421{ 416{
422 struct tipc_net *tn = net_generic(net, tipc_net_id); 417 struct tipc_net *tn = net_generic(net, tipc_net_id);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 1944c6c00bb9..d5e79b3767fd 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -46,7 +46,6 @@ struct tipc_node_map;
46extern const char tipc_bclink_name[]; 46extern const char tipc_bclink_name[];
47 47
48int tipc_bcast_init(struct net *net); 48int tipc_bcast_init(struct net *net);
49void tipc_bcast_reinit(struct net *net);
50void tipc_bcast_stop(struct net *net); 49void tipc_bcast_stop(struct net *net);
51void tipc_bcast_add_peer(struct net *net, struct tipc_link *l, 50void tipc_bcast_add_peer(struct net *net, struct tipc_link *l,
52 struct sk_buff_head *xmitq); 51 struct sk_buff_head *xmitq);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 802ffad3200d..27a5406213c6 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -40,6 +40,7 @@
40#include "link.h" 40#include "link.h"
41#include "discover.h" 41#include "discover.h"
42#include "bcast.h" 42#include "bcast.h"
43#include "netlink.h"
43 44
44#define MAX_ADDR_STR 60 45#define MAX_ADDR_STR 60
45 46
@@ -54,23 +55,6 @@ static struct tipc_media * const media_info_array[] = {
54 NULL 55 NULL
55}; 56};
56 57
57static const struct nla_policy
58tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = {
59 [TIPC_NLA_BEARER_UNSPEC] = { .type = NLA_UNSPEC },
60 [TIPC_NLA_BEARER_NAME] = {
61 .type = NLA_STRING,
62 .len = TIPC_MAX_BEARER_NAME
63 },
64 [TIPC_NLA_BEARER_PROP] = { .type = NLA_NESTED },
65 [TIPC_NLA_BEARER_DOMAIN] = { .type = NLA_U32 }
66};
67
68static const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = {
69 [TIPC_NLA_MEDIA_UNSPEC] = { .type = NLA_UNSPEC },
70 [TIPC_NLA_MEDIA_NAME] = { .type = NLA_STRING },
71 [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED }
72};
73
74static void bearer_disable(struct net *net, struct tipc_bearer *b); 58static void bearer_disable(struct net *net, struct tipc_bearer *b);
75 59
76/** 60/**
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 03a842870c52..e2bdb07a49a2 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -69,6 +69,7 @@ static int __net_init tipc_init_net(struct net *net)
69 if (err) 69 if (err)
70 goto out_nametbl; 70 goto out_nametbl;
71 71
72 INIT_LIST_HEAD(&tn->dist_queue);
72 err = tipc_topsrv_start(net); 73 err = tipc_topsrv_start(net);
73 if (err) 74 if (err)
74 goto out_subscr; 75 goto out_subscr;
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 5504d63503df..eff58dc53aa1 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -103,6 +103,9 @@ struct tipc_net {
103 spinlock_t nametbl_lock; 103 spinlock_t nametbl_lock;
104 struct name_table *nametbl; 104 struct name_table *nametbl;
105 105
106 /* Name dist queue */
107 struct list_head dist_queue;
108
106 /* Topology subscription server */ 109 /* Topology subscription server */
107 struct tipc_server *topsrv; 110 struct tipc_server *topsrv;
108 atomic_t subscription_count; 111 atomic_t subscription_count;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 347cdc99ed09..7d2bb3e70baa 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * net/tipc/link.c: TIPC link code 2 * net/tipc/link.c: TIPC link code
3 * 3 *
4 * Copyright (c) 1996-2007, 2012-2015, Ericsson AB 4 * Copyright (c) 1996-2007, 2012-2016, Ericsson AB
5 * Copyright (c) 2004-2007, 2010-2013, Wind River Systems 5 * Copyright (c) 2004-2007, 2010-2013, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
@@ -123,11 +123,11 @@ struct tipc_stats {
123struct tipc_link { 123struct tipc_link {
124 u32 addr; 124 u32 addr;
125 char name[TIPC_MAX_LINK_NAME]; 125 char name[TIPC_MAX_LINK_NAME];
126 struct tipc_media_addr *media_addr;
127 struct net *net; 126 struct net *net;
128 127
129 /* Management and link supervision data */ 128 /* Management and link supervision data */
130 u32 peer_session; 129 u32 peer_session;
130 u32 session;
131 u32 peer_bearer_id; 131 u32 peer_bearer_id;
132 u32 bearer_id; 132 u32 bearer_id;
133 u32 tolerance; 133 u32 tolerance;
@@ -137,11 +137,7 @@ struct tipc_link {
137 u16 peer_caps; 137 u16 peer_caps;
138 bool active; 138 bool active;
139 u32 silent_intv_cnt; 139 u32 silent_intv_cnt;
140 struct { 140 char if_name[TIPC_MAX_IF_NAME];
141 unchar hdr[INT_H_SIZE];
142 unchar body[TIPC_MAX_IF_NAME];
143 } proto_msg;
144 struct tipc_msg *pmsg;
145 u32 priority; 141 u32 priority;
146 char net_plane; 142 char net_plane;
147 143
@@ -196,14 +192,6 @@ struct tipc_link {
196static const char *link_co_err = "Link tunneling error, "; 192static const char *link_co_err = "Link tunneling error, ";
197static const char *link_rst_msg = "Resetting link "; 193static const char *link_rst_msg = "Resetting link ";
198 194
199/* Properties valid for media, bearar and link */
200static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
201 [TIPC_NLA_PROP_UNSPEC] = { .type = NLA_UNSPEC },
202 [TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 },
203 [TIPC_NLA_PROP_TOL] = { .type = NLA_U32 },
204 [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 }
205};
206
207/* Send states for broadcast NACKs 195/* Send states for broadcast NACKs
208 */ 196 */
209enum { 197enum {
@@ -216,10 +204,11 @@ enum {
216 * Interval between NACKs when packets arrive out of order 204 * Interval between NACKs when packets arrive out of order
217 */ 205 */
218#define TIPC_NACK_INTV (TIPC_MIN_LINK_WIN * 2) 206#define TIPC_NACK_INTV (TIPC_MIN_LINK_WIN * 2)
219/* 207
220 * Out-of-range value for link session numbers 208/* Wildcard value for link session numbers. When it is known that
209 * peer endpoint is down, any session number must be accepted.
221 */ 210 */
222#define WILDCARD_SESSION 0x10000 211#define ANY_SESSION 0x10000
223 212
224/* Link FSM states: 213/* Link FSM states:
225 */ 214 */
@@ -399,16 +388,6 @@ char *tipc_link_name(struct tipc_link *l)
399 return l->name; 388 return l->name;
400} 389}
401 390
402static u32 link_own_addr(struct tipc_link *l)
403{
404 return msg_prevnode(l->pmsg);
405}
406
407void tipc_link_reinit(struct tipc_link *l, u32 addr)
408{
409 msg_set_prevnode(l->pmsg, addr);
410}
411
412/** 391/**
413 * tipc_link_create - create a new link 392 * tipc_link_create - create a new link
414 * @n: pointer to associated node 393 * @n: pointer to associated node
@@ -442,29 +421,22 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
442 struct tipc_link **link) 421 struct tipc_link **link)
443{ 422{
444 struct tipc_link *l; 423 struct tipc_link *l;
445 struct tipc_msg *hdr;
446 424
447 l = kzalloc(sizeof(*l), GFP_ATOMIC); 425 l = kzalloc(sizeof(*l), GFP_ATOMIC);
448 if (!l) 426 if (!l)
449 return false; 427 return false;
450 *link = l; 428 *link = l;
451 l->pmsg = (struct tipc_msg *)&l->proto_msg; 429 l->session = session;
452 hdr = l->pmsg;
453 tipc_msg_init(ownnode, hdr, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, peer);
454 msg_set_size(hdr, sizeof(l->proto_msg));
455 msg_set_session(hdr, session);
456 msg_set_bearer_id(hdr, l->bearer_id);
457 430
458 /* Note: peer i/f name is completed by reset/activate message */ 431 /* Note: peer i/f name is completed by reset/activate message */
459 sprintf(l->name, "%u.%u.%u:%s-%u.%u.%u:unknown", 432 sprintf(l->name, "%u.%u.%u:%s-%u.%u.%u:unknown",
460 tipc_zone(ownnode), tipc_cluster(ownnode), tipc_node(ownnode), 433 tipc_zone(ownnode), tipc_cluster(ownnode), tipc_node(ownnode),
461 if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); 434 if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
462 strcpy((char *)msg_data(hdr), if_name); 435 strcpy(l->if_name, if_name);
463
464 l->addr = peer; 436 l->addr = peer;
465 l->peer_caps = peer_caps; 437 l->peer_caps = peer_caps;
466 l->net = net; 438 l->net = net;
467 l->peer_session = WILDCARD_SESSION; 439 l->peer_session = ANY_SESSION;
468 l->bearer_id = bearer_id; 440 l->bearer_id = bearer_id;
469 l->tolerance = tolerance; 441 l->tolerance = tolerance;
470 l->net_plane = net_plane; 442 l->net_plane = net_plane;
@@ -791,7 +763,7 @@ static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list)
791 struct tipc_msg *msg = buf_msg(skb_peek(list)); 763 struct tipc_msg *msg = buf_msg(skb_peek(list));
792 int imp = msg_importance(msg); 764 int imp = msg_importance(msg);
793 u32 oport = msg_origport(msg); 765 u32 oport = msg_origport(msg);
794 u32 addr = link_own_addr(link); 766 u32 addr = tipc_own_addr(link->net);
795 struct sk_buff *skb; 767 struct sk_buff *skb;
796 768
797 /* This really cannot happen... */ 769 /* This really cannot happen... */
@@ -840,16 +812,9 @@ void link_prepare_wakeup(struct tipc_link *l)
840 812
841void tipc_link_reset(struct tipc_link *l) 813void tipc_link_reset(struct tipc_link *l)
842{ 814{
843 /* Link is down, accept any session */ 815 l->peer_session = ANY_SESSION;
844 l->peer_session = WILDCARD_SESSION; 816 l->session++;
845
846 /* If peer is up, it only accepts an incremented session number */
847 msg_set_session(l->pmsg, msg_session(l->pmsg) + 1);
848
849 /* Prepare for renewed mtu size negotiation */
850 l->mtu = l->advertised_mtu; 817 l->mtu = l->advertised_mtu;
851
852 /* Clean up all queues and counters: */
853 __skb_queue_purge(&l->transmq); 818 __skb_queue_purge(&l->transmq);
854 __skb_queue_purge(&l->deferdq); 819 __skb_queue_purge(&l->deferdq);
855 skb_queue_splice_init(&l->wakeupq, l->inputq); 820 skb_queue_splice_init(&l->wakeupq, l->inputq);
@@ -904,8 +869,10 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
904 if (unlikely(l->backlog[i].len >= l->backlog[i].limit)) 869 if (unlikely(l->backlog[i].len >= l->backlog[i].limit))
905 return link_schedule_user(l, list); 870 return link_schedule_user(l, list);
906 } 871 }
907 if (unlikely(msg_size(hdr) > mtu)) 872 if (unlikely(msg_size(hdr) > mtu)) {
873 skb_queue_purge(list);
908 return -EMSGSIZE; 874 return -EMSGSIZE;
875 }
909 876
910 /* Prepare each packet for sending, and add to relevant queue: */ 877 /* Prepare each packet for sending, and add to relevant queue: */
911 while (skb_queue_len(list)) { 878 while (skb_queue_len(list)) {
@@ -917,8 +884,10 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
917 884
918 if (likely(skb_queue_len(transmq) < maxwin)) { 885 if (likely(skb_queue_len(transmq) < maxwin)) {
919 _skb = skb_clone(skb, GFP_ATOMIC); 886 _skb = skb_clone(skb, GFP_ATOMIC);
920 if (!_skb) 887 if (!_skb) {
888 skb_queue_purge(list);
921 return -ENOBUFS; 889 return -ENOBUFS;
890 }
922 __skb_dequeue(list); 891 __skb_dequeue(list);
923 __skb_queue_tail(transmq, skb); 892 __skb_queue_tail(transmq, skb);
924 __skb_queue_tail(xmitq, _skb); 893 __skb_queue_tail(xmitq, _skb);
@@ -1153,7 +1122,7 @@ int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
1153 1122
1154 /* Broadcast ACK must be sent via a unicast link => defer to caller */ 1123 /* Broadcast ACK must be sent via a unicast link => defer to caller */
1155 if (link_is_bc_rcvlink(l)) { 1124 if (link_is_bc_rcvlink(l)) {
1156 if (((l->rcv_nxt ^ link_own_addr(l)) & 0xf) != 0xf) 1125 if (((l->rcv_nxt ^ tipc_own_addr(l->net)) & 0xf) != 0xf)
1157 return 0; 1126 return 0;
1158 l->rcv_unacked = 0; 1127 l->rcv_unacked = 0;
1159 return TIPC_LINK_SND_BC_ACK; 1128 return TIPC_LINK_SND_BC_ACK;
@@ -1261,39 +1230,34 @@ drop:
1261 return rc; 1230 return rc;
1262} 1231}
1263 1232
1264/*
1265 * Send protocol message to the other endpoint.
1266 */
1267static void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ,
1268 int probe_msg, u32 gap, u32 tolerance,
1269 u32 priority)
1270{
1271 struct sk_buff *skb = NULL;
1272 struct sk_buff_head xmitq;
1273
1274 __skb_queue_head_init(&xmitq);
1275 tipc_link_build_proto_msg(l, msg_typ, probe_msg, gap,
1276 tolerance, priority, &xmitq);
1277 skb = __skb_dequeue(&xmitq);
1278 if (!skb)
1279 return;
1280 tipc_bearer_xmit_skb(l->net, l->bearer_id, skb, l->media_addr);
1281 l->rcv_unacked = 0;
1282}
1283
1284static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, 1233static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
1285 u16 rcvgap, int tolerance, int priority, 1234 u16 rcvgap, int tolerance, int priority,
1286 struct sk_buff_head *xmitq) 1235 struct sk_buff_head *xmitq)
1287{ 1236{
1288 struct sk_buff *skb = NULL; 1237 struct sk_buff *skb;
1289 struct tipc_msg *hdr = l->pmsg; 1238 struct tipc_msg *hdr;
1239 struct sk_buff_head *dfq = &l->deferdq;
1290 bool node_up = link_is_up(l->bc_rcvlink); 1240 bool node_up = link_is_up(l->bc_rcvlink);
1291 1241
1292 /* Don't send protocol message during reset or link failover */ 1242 /* Don't send protocol message during reset or link failover */
1293 if (tipc_link_is_blocked(l)) 1243 if (tipc_link_is_blocked(l))
1294 return; 1244 return;
1295 1245
1296 msg_set_type(hdr, mtyp); 1246 if (!tipc_link_is_up(l) && (mtyp == STATE_MSG))
1247 return;
1248
1249 if (!skb_queue_empty(dfq))
1250 rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt;
1251
1252 skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE,
1253 TIPC_MAX_IF_NAME, l->addr,
1254 tipc_own_addr(l->net), 0, 0, 0);
1255 if (!skb)
1256 return;
1257
1258 hdr = buf_msg(skb);
1259 msg_set_session(hdr, l->session);
1260 msg_set_bearer_id(hdr, l->bearer_id);
1297 msg_set_net_plane(hdr, l->net_plane); 1261 msg_set_net_plane(hdr, l->net_plane);
1298 msg_set_next_sent(hdr, l->snd_nxt); 1262 msg_set_next_sent(hdr, l->snd_nxt);
1299 msg_set_ack(hdr, l->rcv_nxt - 1); 1263 msg_set_ack(hdr, l->rcv_nxt - 1);
@@ -1303,36 +1267,23 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
1303 msg_set_linkprio(hdr, priority); 1267 msg_set_linkprio(hdr, priority);
1304 msg_set_redundant_link(hdr, node_up); 1268 msg_set_redundant_link(hdr, node_up);
1305 msg_set_seq_gap(hdr, 0); 1269 msg_set_seq_gap(hdr, 0);
1306
1307 /* Compatibility: created msg must not be in sequence with pkt flow */
1308 msg_set_seqno(hdr, l->snd_nxt + U16_MAX / 2); 1270 msg_set_seqno(hdr, l->snd_nxt + U16_MAX / 2);
1309 1271
1310 if (mtyp == STATE_MSG) { 1272 if (mtyp == STATE_MSG) {
1311 if (!tipc_link_is_up(l)) 1273 msg_set_seq_gap(hdr, rcvgap);
1312 return; 1274 msg_set_size(hdr, INT_H_SIZE);
1313
1314 /* Override rcvgap if there are packets in deferred queue */
1315 if (!skb_queue_empty(&l->deferdq))
1316 rcvgap = buf_seqno(skb_peek(&l->deferdq)) - l->rcv_nxt;
1317 if (rcvgap) {
1318 msg_set_seq_gap(hdr, rcvgap);
1319 l->stats.sent_nacks++;
1320 }
1321 msg_set_probe(hdr, probe); 1275 msg_set_probe(hdr, probe);
1322 if (probe)
1323 l->stats.sent_probes++;
1324 l->stats.sent_states++; 1276 l->stats.sent_states++;
1325 l->rcv_unacked = 0; 1277 l->rcv_unacked = 0;
1326 } else { 1278 } else {
1327 /* RESET_MSG or ACTIVATE_MSG */ 1279 /* RESET_MSG or ACTIVATE_MSG */
1328 msg_set_max_pkt(hdr, l->advertised_mtu); 1280 msg_set_max_pkt(hdr, l->advertised_mtu);
1329 msg_set_ack(hdr, l->rcv_nxt - 1); 1281 strcpy(msg_data(hdr), l->if_name);
1330 msg_set_next_sent(hdr, 1);
1331 } 1282 }
1332 skb = tipc_buf_acquire(msg_size(hdr)); 1283 if (probe)
1333 if (!skb) 1284 l->stats.sent_probes++;
1334 return; 1285 if (rcvgap)
1335 skb_copy_to_linear_data(skb, hdr, msg_size(hdr)); 1286 l->stats.sent_nacks++;
1336 skb->priority = TC_PRIO_CONTROL; 1287 skb->priority = TC_PRIO_CONTROL;
1337 __skb_queue_tail(xmitq, skb); 1288 __skb_queue_tail(xmitq, skb);
1338} 1289}
@@ -1357,7 +1308,7 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
1357 1308
1358 /* At least one packet required for safe algorithm => add dummy */ 1309 /* At least one packet required for safe algorithm => add dummy */
1359 skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, 1310 skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG,
1360 BASIC_H_SIZE, 0, l->addr, link_own_addr(l), 1311 BASIC_H_SIZE, 0, l->addr, tipc_own_addr(l->net),
1361 0, 0, TIPC_ERR_NO_PORT); 1312 0, 0, TIPC_ERR_NO_PORT);
1362 if (!skb) { 1313 if (!skb) {
1363 pr_warn("%sunable to create tunnel packet\n", link_co_err); 1314 pr_warn("%sunable to create tunnel packet\n", link_co_err);
@@ -1368,7 +1319,7 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
1368 __skb_queue_purge(&tmpxq); 1319 __skb_queue_purge(&tmpxq);
1369 1320
1370 /* Initialize reusable tunnel packet header */ 1321 /* Initialize reusable tunnel packet header */
1371 tipc_msg_init(link_own_addr(l), &tnlhdr, TUNNEL_PROTOCOL, 1322 tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL,
1372 mtyp, INT_H_SIZE, l->addr); 1323 mtyp, INT_H_SIZE, l->addr);
1373 pktcnt = skb_queue_len(&l->transmq) + skb_queue_len(&l->backlogq); 1324 pktcnt = skb_queue_len(&l->transmq) + skb_queue_len(&l->backlogq);
1374 msg_set_msgcnt(&tnlhdr, pktcnt); 1325 msg_set_msgcnt(&tnlhdr, pktcnt);
@@ -1427,7 +1378,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
1427 if (tipc_link_is_blocked(l) || !xmitq) 1378 if (tipc_link_is_blocked(l) || !xmitq)
1428 goto exit; 1379 goto exit;
1429 1380
1430 if (link_own_addr(l) > msg_prevnode(hdr)) 1381 if (tipc_own_addr(l->net) > msg_prevnode(hdr))
1431 l->net_plane = msg_net_plane(hdr); 1382 l->net_plane = msg_net_plane(hdr);
1432 1383
1433 switch (mtyp) { 1384 switch (mtyp) {
@@ -1435,7 +1386,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
1435 1386
1436 /* Ignore duplicate RESET with old session number */ 1387 /* Ignore duplicate RESET with old session number */
1437 if ((less_eq(msg_session(hdr), l->peer_session)) && 1388 if ((less_eq(msg_session(hdr), l->peer_session)) &&
1438 (l->peer_session != WILDCARD_SESSION)) 1389 (l->peer_session != ANY_SESSION))
1439 break; 1390 break;
1440 /* fall thru' */ 1391 /* fall thru' */
1441 1392
@@ -1479,6 +1430,12 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
1479 if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) 1430 if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
1480 l->tolerance = peers_tol; 1431 l->tolerance = peers_tol;
1481 1432
1433 if (peers_prio && in_range(peers_prio, TIPC_MIN_LINK_PRI,
1434 TIPC_MAX_LINK_PRI)) {
1435 l->priority = peers_prio;
1436 rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
1437 }
1438
1482 l->silent_intv_cnt = 0; 1439 l->silent_intv_cnt = 0;
1483 l->stats.recv_states++; 1440 l->stats.recv_states++;
1484 if (msg_probe(hdr)) 1441 if (msg_probe(hdr))
@@ -1526,7 +1483,7 @@ static bool tipc_link_build_bc_proto_msg(struct tipc_link *l, bool bcast,
1526 u16 gap_to = peers_snd_nxt - 1; 1483 u16 gap_to = peers_snd_nxt - 1;
1527 1484
1528 skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, 1485 skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE,
1529 0, l->addr, link_own_addr(l), 0, 0, 0); 1486 0, l->addr, tipc_own_addr(l->net), 0, 0, 0);
1530 if (!skb) 1487 if (!skb)
1531 return false; 1488 return false;
1532 hdr = buf_msg(skb); 1489 hdr = buf_msg(skb);
@@ -1681,7 +1638,7 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
1681 if (mtyp != STATE_MSG) 1638 if (mtyp != STATE_MSG)
1682 return 0; 1639 return 0;
1683 1640
1684 if (dnode == link_own_addr(l)) { 1641 if (dnode == tipc_own_addr(l->net)) {
1685 tipc_link_bc_ack_rcv(l, acked, xmitq); 1642 tipc_link_bc_ack_rcv(l, acked, xmitq);
1686 rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq); 1643 rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq);
1687 l->stats.recv_nacks++; 1644 l->stats.recv_nacks++;
@@ -2023,16 +1980,18 @@ msg_full:
2023 return -EMSGSIZE; 1980 return -EMSGSIZE;
2024} 1981}
2025 1982
2026void tipc_link_set_tolerance(struct tipc_link *l, u32 tol) 1983void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
1984 struct sk_buff_head *xmitq)
2027{ 1985{
2028 l->tolerance = tol; 1986 l->tolerance = tol;
2029 tipc_link_proto_xmit(l, STATE_MSG, 0, 0, tol, 0); 1987 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, tol, 0, xmitq);
2030} 1988}
2031 1989
2032void tipc_link_set_prio(struct tipc_link *l, u32 prio) 1990void tipc_link_set_prio(struct tipc_link *l, u32 prio,
1991 struct sk_buff_head *xmitq)
2033{ 1992{
2034 l->priority = prio; 1993 l->priority = prio;
2035 tipc_link_proto_xmit(l, STATE_MSG, 0, 0, 0, prio); 1994 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, prio, xmitq);
2036} 1995}
2037 1996
2038void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit) 1997void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit)
diff --git a/net/tipc/link.h b/net/tipc/link.h
index b2ae0f4276af..6a94175ee20a 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -86,7 +86,6 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
86 struct sk_buff_head *namedq, 86 struct sk_buff_head *namedq,
87 struct tipc_link *bc_sndlink, 87 struct tipc_link *bc_sndlink,
88 struct tipc_link **link); 88 struct tipc_link **link);
89void tipc_link_reinit(struct tipc_link *l, u32 addr);
90void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, 89void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
91 int mtyp, struct sk_buff_head *xmitq); 90 int mtyp, struct sk_buff_head *xmitq);
92void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq); 91void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
@@ -112,8 +111,10 @@ char tipc_link_plane(struct tipc_link *l);
112int tipc_link_prio(struct tipc_link *l); 111int tipc_link_prio(struct tipc_link *l);
113int tipc_link_window(struct tipc_link *l); 112int tipc_link_window(struct tipc_link *l);
114unsigned long tipc_link_tolerance(struct tipc_link *l); 113unsigned long tipc_link_tolerance(struct tipc_link *l);
115void tipc_link_set_tolerance(struct tipc_link *l, u32 tol); 114void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
116void tipc_link_set_prio(struct tipc_link *l, u32 prio); 115 struct sk_buff_head *xmitq);
116void tipc_link_set_prio(struct tipc_link *l, u32 prio,
117 struct sk_buff_head *xmitq);
117void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit); 118void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit);
118void tipc_link_set_queue_limits(struct tipc_link *l, u32 window); 119void tipc_link_set_queue_limits(struct tipc_link *l, u32 window);
119int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, 120int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index ebe9d0ff6e9e..6b626a64b517 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -40,11 +40,6 @@
40 40
41int sysctl_tipc_named_timeout __read_mostly = 2000; 41int sysctl_tipc_named_timeout __read_mostly = 2000;
42 42
43/**
44 * struct tipc_dist_queue - queue holding deferred name table updates
45 */
46static struct list_head tipc_dist_queue = LIST_HEAD_INIT(tipc_dist_queue);
47
48struct distr_queue_item { 43struct distr_queue_item {
49 struct distr_item i; 44 struct distr_item i;
50 u32 dtype; 45 u32 dtype;
@@ -229,12 +224,31 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr)
229 kfree_rcu(p, rcu); 224 kfree_rcu(p, rcu);
230} 225}
231 226
227/**
228 * tipc_dist_queue_purge - remove deferred updates from a node that went down
229 */
230static void tipc_dist_queue_purge(struct net *net, u32 addr)
231{
232 struct tipc_net *tn = net_generic(net, tipc_net_id);
233 struct distr_queue_item *e, *tmp;
234
235 spin_lock_bh(&tn->nametbl_lock);
236 list_for_each_entry_safe(e, tmp, &tn->dist_queue, next) {
237 if (e->node != addr)
238 continue;
239 list_del(&e->next);
240 kfree(e);
241 }
242 spin_unlock_bh(&tn->nametbl_lock);
243}
244
232void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr) 245void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr)
233{ 246{
234 struct publication *publ, *tmp; 247 struct publication *publ, *tmp;
235 248
236 list_for_each_entry_safe(publ, tmp, nsub_list, nodesub_list) 249 list_for_each_entry_safe(publ, tmp, nsub_list, nodesub_list)
237 tipc_publ_purge(net, publ, addr); 250 tipc_publ_purge(net, publ, addr);
251 tipc_dist_queue_purge(net, addr);
238} 252}
239 253
240/** 254/**
@@ -279,9 +293,11 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
279 * tipc_named_add_backlog - add a failed name table update to the backlog 293 * tipc_named_add_backlog - add a failed name table update to the backlog
280 * 294 *
281 */ 295 */
282static void tipc_named_add_backlog(struct distr_item *i, u32 type, u32 node) 296static void tipc_named_add_backlog(struct net *net, struct distr_item *i,
297 u32 type, u32 node)
283{ 298{
284 struct distr_queue_item *e; 299 struct distr_queue_item *e;
300 struct tipc_net *tn = net_generic(net, tipc_net_id);
285 unsigned long now = get_jiffies_64(); 301 unsigned long now = get_jiffies_64();
286 302
287 e = kzalloc(sizeof(*e), GFP_ATOMIC); 303 e = kzalloc(sizeof(*e), GFP_ATOMIC);
@@ -291,7 +307,7 @@ static void tipc_named_add_backlog(struct distr_item *i, u32 type, u32 node)
291 e->node = node; 307 e->node = node;
292 e->expires = now + msecs_to_jiffies(sysctl_tipc_named_timeout); 308 e->expires = now + msecs_to_jiffies(sysctl_tipc_named_timeout);
293 memcpy(e, i, sizeof(*i)); 309 memcpy(e, i, sizeof(*i));
294 list_add_tail(&e->next, &tipc_dist_queue); 310 list_add_tail(&e->next, &tn->dist_queue);
295} 311}
296 312
297/** 313/**
@@ -301,10 +317,11 @@ static void tipc_named_add_backlog(struct distr_item *i, u32 type, u32 node)
301void tipc_named_process_backlog(struct net *net) 317void tipc_named_process_backlog(struct net *net)
302{ 318{
303 struct distr_queue_item *e, *tmp; 319 struct distr_queue_item *e, *tmp;
320 struct tipc_net *tn = net_generic(net, tipc_net_id);
304 char addr[16]; 321 char addr[16];
305 unsigned long now = get_jiffies_64(); 322 unsigned long now = get_jiffies_64();
306 323
307 list_for_each_entry_safe(e, tmp, &tipc_dist_queue, next) { 324 list_for_each_entry_safe(e, tmp, &tn->dist_queue, next) {
308 if (time_after(e->expires, now)) { 325 if (time_after(e->expires, now)) {
309 if (!tipc_update_nametbl(net, &e->i, e->node, e->dtype)) 326 if (!tipc_update_nametbl(net, &e->i, e->node, e->dtype))
310 continue; 327 continue;
@@ -344,7 +361,7 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq)
344 node = msg_orignode(msg); 361 node = msg_orignode(msg);
345 while (count--) { 362 while (count--) {
346 if (!tipc_update_nametbl(net, item, node, mtype)) 363 if (!tipc_update_nametbl(net, item, node, mtype))
347 tipc_named_add_backlog(item, mtype, node); 364 tipc_named_add_backlog(net, item, mtype, node);
348 item++; 365 item++;
349 } 366 }
350 kfree_skb(skb); 367 kfree_skb(skb);
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 91fce70291a8..e190460fe0d3 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -47,12 +47,6 @@
47 47
48#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ 48#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */
49 49
50static const struct nla_policy
51tipc_nl_name_table_policy[TIPC_NLA_NAME_TABLE_MAX + 1] = {
52 [TIPC_NLA_NAME_TABLE_UNSPEC] = { .type = NLA_UNSPEC },
53 [TIPC_NLA_NAME_TABLE_PUBL] = { .type = NLA_NESTED }
54};
55
56/** 50/**
57 * struct name_info - name sequence publication info 51 * struct name_info - name sequence publication info
58 * @node_list: circular list of publications made by own node 52 * @node_list: circular list of publications made by own node
@@ -418,6 +412,9 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
418 struct tipc_subscription *s) 412 struct tipc_subscription *s)
419{ 413{
420 struct sub_seq *sseq = nseq->sseqs; 414 struct sub_seq *sseq = nseq->sseqs;
415 struct tipc_name_seq ns;
416
417 tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
421 418
422 list_add(&s->nameseq_list, &nseq->subscriptions); 419 list_add(&s->nameseq_list, &nseq->subscriptions);
423 420
@@ -425,7 +422,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
425 return; 422 return;
426 423
427 while (sseq != &nseq->sseqs[nseq->first_free]) { 424 while (sseq != &nseq->sseqs[nseq->first_free]) {
428 if (tipc_subscrp_check_overlap(s, sseq->lower, sseq->upper)) { 425 if (tipc_subscrp_check_overlap(&ns, sseq->lower, sseq->upper)) {
429 struct publication *crs; 426 struct publication *crs;
430 struct name_info *info = sseq->info; 427 struct name_info *info = sseq->info;
431 int must_report = 1; 428 int must_report = 1;
@@ -722,9 +719,10 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
722void tipc_nametbl_subscribe(struct tipc_subscription *s) 719void tipc_nametbl_subscribe(struct tipc_subscription *s)
723{ 720{
724 struct tipc_net *tn = net_generic(s->net, tipc_net_id); 721 struct tipc_net *tn = net_generic(s->net, tipc_net_id);
725 u32 type = s->seq.type; 722 u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
726 int index = hash(type); 723 int index = hash(type);
727 struct name_seq *seq; 724 struct name_seq *seq;
725 struct tipc_name_seq ns;
728 726
729 spin_lock_bh(&tn->nametbl_lock); 727 spin_lock_bh(&tn->nametbl_lock);
730 seq = nametbl_find_seq(s->net, type); 728 seq = nametbl_find_seq(s->net, type);
@@ -735,8 +733,9 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s)
735 tipc_nameseq_subscribe(seq, s); 733 tipc_nameseq_subscribe(seq, s);
736 spin_unlock_bh(&seq->lock); 734 spin_unlock_bh(&seq->lock);
737 } else { 735 } else {
736 tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
738 pr_warn("Failed to create subscription for {%u,%u,%u}\n", 737 pr_warn("Failed to create subscription for {%u,%u,%u}\n",
739 s->seq.type, s->seq.lower, s->seq.upper); 738 ns.type, ns.lower, ns.upper);
740 } 739 }
741 spin_unlock_bh(&tn->nametbl_lock); 740 spin_unlock_bh(&tn->nametbl_lock);
742} 741}
@@ -748,9 +747,10 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
748{ 747{
749 struct tipc_net *tn = net_generic(s->net, tipc_net_id); 748 struct tipc_net *tn = net_generic(s->net, tipc_net_id);
750 struct name_seq *seq; 749 struct name_seq *seq;
750 u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
751 751
752 spin_lock_bh(&tn->nametbl_lock); 752 spin_lock_bh(&tn->nametbl_lock);
753 seq = nametbl_find_seq(s->net, s->seq.type); 753 seq = nametbl_find_seq(s->net, type);
754 if (seq != NULL) { 754 if (seq != NULL) {
755 spin_lock_bh(&seq->lock); 755 spin_lock_bh(&seq->lock);
756 list_del_init(&s->nameseq_list); 756 list_del_init(&s->nameseq_list);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 77bf9113c7a7..28bf4feeb81c 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -41,11 +41,7 @@
41#include "socket.h" 41#include "socket.h"
42#include "node.h" 42#include "node.h"
43#include "bcast.h" 43#include "bcast.h"
44 44#include "netlink.h"
45static const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
46 [TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC },
47 [TIPC_NLA_NET_ID] = { .type = NLA_U32 }
48};
49 45
50/* 46/*
51 * The TIPC locking policy is designed to ensure a very fine locking 47 * The TIPC locking policy is designed to ensure a very fine locking
@@ -116,7 +112,6 @@ int tipc_net_start(struct net *net, u32 addr)
116 tn->own_addr = addr; 112 tn->own_addr = addr;
117 tipc_named_reinit(net); 113 tipc_named_reinit(net);
118 tipc_sk_reinit(net); 114 tipc_sk_reinit(net);
119 tipc_bcast_reinit(net);
120 115
121 tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr, 116 tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr,
122 TIPC_ZONE_SCOPE, 0, tn->own_addr); 117 TIPC_ZONE_SCOPE, 0, tn->own_addr);
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 8975b0135b76..56935df2167a 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -55,6 +55,75 @@ static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = {
55 [TIPC_NLA_NAME_TABLE] = { .type = NLA_NESTED, } 55 [TIPC_NLA_NAME_TABLE] = { .type = NLA_NESTED, }
56}; 56};
57 57
58const struct nla_policy
59tipc_nl_name_table_policy[TIPC_NLA_NAME_TABLE_MAX + 1] = {
60 [TIPC_NLA_NAME_TABLE_UNSPEC] = { .type = NLA_UNSPEC },
61 [TIPC_NLA_NAME_TABLE_PUBL] = { .type = NLA_NESTED }
62};
63
64const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = {
65 [TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC },
66 [TIPC_NLA_SOCK_ADDR] = { .type = NLA_U32 },
67 [TIPC_NLA_SOCK_REF] = { .type = NLA_U32 },
68 [TIPC_NLA_SOCK_CON] = { .type = NLA_NESTED },
69 [TIPC_NLA_SOCK_HAS_PUBL] = { .type = NLA_FLAG }
70};
71
72const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
73 [TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC },
74 [TIPC_NLA_NET_ID] = { .type = NLA_U32 }
75};
76
77const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
78 [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC },
79 [TIPC_NLA_LINK_NAME] = { .type = NLA_STRING,
80 .len = TIPC_MAX_LINK_NAME },
81 [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 },
82 [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG },
83 [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG },
84 [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG },
85 [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED },
86 [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED },
87 [TIPC_NLA_LINK_RX] = { .type = NLA_U32 },
88 [TIPC_NLA_LINK_TX] = { .type = NLA_U32 }
89};
90
91const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = {
92 [TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC },
93 [TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 },
94 [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG }
95};
96
97/* Properties valid for media, bearer and link */
98const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
99 [TIPC_NLA_PROP_UNSPEC] = { .type = NLA_UNSPEC },
100 [TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 },
101 [TIPC_NLA_PROP_TOL] = { .type = NLA_U32 },
102 [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 }
103};
104
105const struct nla_policy tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = {
106 [TIPC_NLA_BEARER_UNSPEC] = { .type = NLA_UNSPEC },
107 [TIPC_NLA_BEARER_NAME] = { .type = NLA_STRING,
108 .len = TIPC_MAX_BEARER_NAME },
109 [TIPC_NLA_BEARER_PROP] = { .type = NLA_NESTED },
110 [TIPC_NLA_BEARER_DOMAIN] = { .type = NLA_U32 }
111};
112
113const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = {
114 [TIPC_NLA_MEDIA_UNSPEC] = { .type = NLA_UNSPEC },
115 [TIPC_NLA_MEDIA_NAME] = { .type = NLA_STRING },
116 [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED }
117};
118
119const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
120 [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC},
121 [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY,
122 .len = sizeof(struct sockaddr_storage)},
123 [TIPC_NLA_UDP_REMOTE] = {.type = NLA_BINARY,
124 .len = sizeof(struct sockaddr_storage)},
125};
126
58/* Users of the legacy API (tipc-config) can't handle that we add operations, 127/* Users of the legacy API (tipc-config) can't handle that we add operations,
59 * so we have a separate genl handling for the new API. 128 * so we have a separate genl handling for the new API.
60 */ 129 */
diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h
index 08a1db67b927..ed1dbcb4afbd 100644
--- a/net/tipc/netlink.h
+++ b/net/tipc/netlink.h
@@ -35,6 +35,7 @@
35 35
36#ifndef _TIPC_NETLINK_H 36#ifndef _TIPC_NETLINK_H
37#define _TIPC_NETLINK_H 37#define _TIPC_NETLINK_H
38#include <net/netlink.h>
38 39
39extern struct genl_family tipc_genl_family; 40extern struct genl_family tipc_genl_family;
40int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***buf); 41int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***buf);
@@ -45,6 +46,16 @@ struct tipc_nl_msg {
45 u32 seq; 46 u32 seq;
46}; 47};
47 48
49extern const struct nla_policy tipc_nl_name_table_policy[];
50extern const struct nla_policy tipc_nl_sock_policy[];
51extern const struct nla_policy tipc_nl_net_policy[];
52extern const struct nla_policy tipc_nl_link_policy[];
53extern const struct nla_policy tipc_nl_node_policy[];
54extern const struct nla_policy tipc_nl_prop_policy[];
55extern const struct nla_policy tipc_nl_bearer_policy[];
56extern const struct nla_policy tipc_nl_media_policy[];
57extern const struct nla_policy tipc_nl_udp_policy[];
58
48int tipc_netlink_start(void); 59int tipc_netlink_start(void);
49int tipc_netlink_compat_start(void); 60int tipc_netlink_compat_start(void);
50void tipc_netlink_stop(void); 61void tipc_netlink_stop(void);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 2c016fdefe97..d7d050f44fc1 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -1104,8 +1104,8 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
1104 req_nlh = (struct nlmsghdr *)skb->data; 1104 req_nlh = (struct nlmsghdr *)skb->data;
1105 msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN; 1105 msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN;
1106 msg.cmd = req_userhdr->cmd; 1106 msg.cmd = req_userhdr->cmd;
1107 msg.dst_sk = info->dst_sk;
1108 msg.net = genl_info_net(info); 1107 msg.net = genl_info_net(info);
1108 msg.dst_sk = skb->sk;
1109 1109
1110 if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) { 1110 if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) {
1111 msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_NET_ADMIN); 1111 msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_NET_ADMIN);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9d7a16fc5ca4..9aaa1bc566ae 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -41,6 +41,7 @@
41#include "socket.h" 41#include "socket.h"
42#include "bcast.h" 42#include "bcast.h"
43#include "discover.h" 43#include "discover.h"
44#include "netlink.h"
44 45
45#define INVALID_NODE_SIG 0x10000 46#define INVALID_NODE_SIG 0x10000
46 47
@@ -164,28 +165,6 @@ struct tipc_sock_conn {
164 struct list_head list; 165 struct list_head list;
165}; 166};
166 167
167static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
168 [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC },
169 [TIPC_NLA_LINK_NAME] = {
170 .type = NLA_STRING,
171 .len = TIPC_MAX_LINK_NAME
172 },
173 [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 },
174 [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG },
175 [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG },
176 [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG },
177 [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED },
178 [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED },
179 [TIPC_NLA_LINK_RX] = { .type = NLA_U32 },
180 [TIPC_NLA_LINK_TX] = { .type = NLA_U32 }
181};
182
183static const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = {
184 [TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC },
185 [TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 },
186 [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG }
187};
188
189static struct tipc_link *node_active_link(struct tipc_node *n, int sel) 168static struct tipc_link *node_active_link(struct tipc_node *n, int sel)
190{ 169{
191 int bearer_id = n->active_links[sel & 1]; 170 int bearer_id = n->active_links[sel & 1];
@@ -225,9 +204,10 @@ static unsigned int tipc_hashfn(u32 addr)
225 204
226static void tipc_node_kref_release(struct kref *kref) 205static void tipc_node_kref_release(struct kref *kref)
227{ 206{
228 struct tipc_node *node = container_of(kref, struct tipc_node, kref); 207 struct tipc_node *n = container_of(kref, struct tipc_node, kref);
229 208
230 tipc_node_delete(node); 209 kfree(n->bc_entry.link);
210 kfree_rcu(n, rcu);
231} 211}
232 212
233static void tipc_node_put(struct tipc_node *node) 213static void tipc_node_put(struct tipc_node *node)
@@ -245,23 +225,23 @@ static void tipc_node_get(struct tipc_node *node)
245 */ 225 */
246static struct tipc_node *tipc_node_find(struct net *net, u32 addr) 226static struct tipc_node *tipc_node_find(struct net *net, u32 addr)
247{ 227{
248 struct tipc_net *tn = net_generic(net, tipc_net_id); 228 struct tipc_net *tn = tipc_net(net);
249 struct tipc_node *node; 229 struct tipc_node *node;
230 unsigned int thash = tipc_hashfn(addr);
250 231
251 if (unlikely(!in_own_cluster_exact(net, addr))) 232 if (unlikely(!in_own_cluster_exact(net, addr)))
252 return NULL; 233 return NULL;
253 234
254 rcu_read_lock(); 235 rcu_read_lock();
255 hlist_for_each_entry_rcu(node, &tn->node_htable[tipc_hashfn(addr)], 236 hlist_for_each_entry_rcu(node, &tn->node_htable[thash], hash) {
256 hash) { 237 if (node->addr != addr)
257 if (node->addr == addr) { 238 continue;
258 tipc_node_get(node); 239 if (!kref_get_unless_zero(&node->kref))
259 rcu_read_unlock(); 240 node = NULL;
260 return node; 241 break;
261 }
262 } 242 }
263 rcu_read_unlock(); 243 rcu_read_unlock();
264 return NULL; 244 return node;
265} 245}
266 246
267static void tipc_node_read_lock(struct tipc_node *n) 247static void tipc_node_read_lock(struct tipc_node *n)
@@ -395,21 +375,20 @@ static void tipc_node_delete(struct tipc_node *node)
395{ 375{
396 list_del_rcu(&node->list); 376 list_del_rcu(&node->list);
397 hlist_del_rcu(&node->hash); 377 hlist_del_rcu(&node->hash);
398 kfree(node->bc_entry.link); 378 tipc_node_put(node);
399 kfree_rcu(node, rcu); 379
380 del_timer_sync(&node->timer);
381 tipc_node_put(node);
400} 382}
401 383
402void tipc_node_stop(struct net *net) 384void tipc_node_stop(struct net *net)
403{ 385{
404 struct tipc_net *tn = net_generic(net, tipc_net_id); 386 struct tipc_net *tn = tipc_net(net);
405 struct tipc_node *node, *t_node; 387 struct tipc_node *node, *t_node;
406 388
407 spin_lock_bh(&tn->node_list_lock); 389 spin_lock_bh(&tn->node_list_lock);
408 list_for_each_entry_safe(node, t_node, &tn->node_list, list) { 390 list_for_each_entry_safe(node, t_node, &tn->node_list, list)
409 if (del_timer(&node->timer)) 391 tipc_node_delete(node);
410 tipc_node_put(node);
411 tipc_node_put(node);
412 }
413 spin_unlock_bh(&tn->node_list_lock); 392 spin_unlock_bh(&tn->node_list_lock);
414} 393}
415 394
@@ -530,9 +509,7 @@ static void tipc_node_timeout(unsigned long data)
530 if (rc & TIPC_LINK_DOWN_EVT) 509 if (rc & TIPC_LINK_DOWN_EVT)
531 tipc_node_link_down(n, bearer_id, false); 510 tipc_node_link_down(n, bearer_id, false);
532 } 511 }
533 if (!mod_timer(&n->timer, jiffies + n->keepalive_intv)) 512 mod_timer(&n->timer, jiffies + n->keepalive_intv);
534 tipc_node_get(n);
535 tipc_node_put(n);
536} 513}
537 514
538/** 515/**
@@ -845,7 +822,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
845 memcpy(&le->maddr, maddr, sizeof(*maddr)); 822 memcpy(&le->maddr, maddr, sizeof(*maddr));
846exit: 823exit:
847 tipc_node_write_unlock(n); 824 tipc_node_write_unlock(n);
848 if (reset && !tipc_link_is_reset(l)) 825 if (reset && l && !tipc_link_is_reset(l))
849 tipc_node_link_down(n, b->identity, false); 826 tipc_node_link_down(n, b->identity, false);
850 tipc_node_put(n); 827 tipc_node_put(n);
851} 828}
@@ -1166,7 +1143,7 @@ msg_full:
1166 * @dnode: address of destination node 1143 * @dnode: address of destination node
1167 * @selector: a number used for deterministic link selection 1144 * @selector: a number used for deterministic link selection
1168 * Consumes the buffer chain, except when returning -ELINKCONG 1145 * Consumes the buffer chain, except when returning -ELINKCONG
1169 * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE 1146 * Returns 0 if success, otherwise: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE,-ENOBUF
1170 */ 1147 */
1171int tipc_node_xmit(struct net *net, struct sk_buff_head *list, 1148int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
1172 u32 dnode, int selector) 1149 u32 dnode, int selector)
@@ -1174,33 +1151,43 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
1174 struct tipc_link_entry *le = NULL; 1151 struct tipc_link_entry *le = NULL;
1175 struct tipc_node *n; 1152 struct tipc_node *n;
1176 struct sk_buff_head xmitq; 1153 struct sk_buff_head xmitq;
1177 int bearer_id = -1; 1154 int bearer_id;
1178 int rc = -EHOSTUNREACH; 1155 int rc;
1156
1157 if (in_own_node(net, dnode)) {
1158 tipc_sk_rcv(net, list);
1159 return 0;
1160 }
1179 1161
1180 __skb_queue_head_init(&xmitq);
1181 n = tipc_node_find(net, dnode); 1162 n = tipc_node_find(net, dnode);
1182 if (likely(n)) { 1163 if (unlikely(!n)) {
1183 tipc_node_read_lock(n); 1164 skb_queue_purge(list);
1184 bearer_id = n->active_links[selector & 1]; 1165 return -EHOSTUNREACH;
1185 if (bearer_id >= 0) { 1166 }
1186 le = &n->links[bearer_id]; 1167
1187 spin_lock_bh(&le->lock); 1168 tipc_node_read_lock(n);
1188 rc = tipc_link_xmit(le->link, list, &xmitq); 1169 bearer_id = n->active_links[selector & 1];
1189 spin_unlock_bh(&le->lock); 1170 if (unlikely(bearer_id == INVALID_BEARER_ID)) {
1190 }
1191 tipc_node_read_unlock(n); 1171 tipc_node_read_unlock(n);
1192 if (likely(!rc))
1193 tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
1194 else if (rc == -ENOBUFS)
1195 tipc_node_link_down(n, bearer_id, false);
1196 tipc_node_put(n); 1172 tipc_node_put(n);
1197 return rc; 1173 skb_queue_purge(list);
1174 return -EHOSTUNREACH;
1198 } 1175 }
1199 1176
1200 if (likely(in_own_node(net, dnode))) { 1177 __skb_queue_head_init(&xmitq);
1201 tipc_sk_rcv(net, list); 1178 le = &n->links[bearer_id];
1202 return 0; 1179 spin_lock_bh(&le->lock);
1203 } 1180 rc = tipc_link_xmit(le->link, list, &xmitq);
1181 spin_unlock_bh(&le->lock);
1182 tipc_node_read_unlock(n);
1183
1184 if (likely(rc == 0))
1185 tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
1186 else if (rc == -ENOBUFS)
1187 tipc_node_link_down(n, bearer_id, false);
1188
1189 tipc_node_put(n);
1190
1204 return rc; 1191 return rc;
1205} 1192}
1206 1193
@@ -1457,6 +1444,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
1457 int bearer_id = b->identity; 1444 int bearer_id = b->identity;
1458 struct tipc_link_entry *le; 1445 struct tipc_link_entry *le;
1459 u16 bc_ack = msg_bcast_ack(hdr); 1446 u16 bc_ack = msg_bcast_ack(hdr);
1447 u32 self = tipc_own_addr(net);
1460 int rc = 0; 1448 int rc = 0;
1461 1449
1462 __skb_queue_head_init(&xmitq); 1450 __skb_queue_head_init(&xmitq);
@@ -1473,6 +1461,10 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
1473 return tipc_node_bc_rcv(net, skb, bearer_id); 1461 return tipc_node_bc_rcv(net, skb, bearer_id);
1474 } 1462 }
1475 1463
1464 /* Discard unicast link messages destined for another node */
1465 if (unlikely(!msg_short(hdr) && (msg_destnode(hdr) != self)))
1466 goto discard;
1467
1476 /* Locate neighboring node that sent packet */ 1468 /* Locate neighboring node that sent packet */
1477 n = tipc_node_find(net, msg_prevnode(hdr)); 1469 n = tipc_node_find(net, msg_prevnode(hdr));
1478 if (unlikely(!n)) 1470 if (unlikely(!n))
@@ -1637,9 +1629,12 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info)
1637 char *name; 1629 char *name;
1638 struct tipc_link *link; 1630 struct tipc_link *link;
1639 struct tipc_node *node; 1631 struct tipc_node *node;
1632 struct sk_buff_head xmitq;
1640 struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; 1633 struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
1641 struct net *net = sock_net(skb->sk); 1634 struct net *net = sock_net(skb->sk);
1642 1635
1636 __skb_queue_head_init(&xmitq);
1637
1643 if (!info->attrs[TIPC_NLA_LINK]) 1638 if (!info->attrs[TIPC_NLA_LINK])
1644 return -EINVAL; 1639 return -EINVAL;
1645 1640
@@ -1683,13 +1678,13 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info)
1683 u32 tol; 1678 u32 tol;
1684 1679
1685 tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); 1680 tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
1686 tipc_link_set_tolerance(link, tol); 1681 tipc_link_set_tolerance(link, tol, &xmitq);
1687 } 1682 }
1688 if (props[TIPC_NLA_PROP_PRIO]) { 1683 if (props[TIPC_NLA_PROP_PRIO]) {
1689 u32 prio; 1684 u32 prio;
1690 1685
1691 prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); 1686 prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
1692 tipc_link_set_prio(link, prio); 1687 tipc_link_set_prio(link, prio, &xmitq);
1693 } 1688 }
1694 if (props[TIPC_NLA_PROP_WIN]) { 1689 if (props[TIPC_NLA_PROP_WIN]) {
1695 u32 win; 1690 u32 win;
@@ -1701,7 +1696,7 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info)
1701 1696
1702out: 1697out:
1703 tipc_node_read_unlock(node); 1698 tipc_node_read_unlock(node);
1704 1699 tipc_bearer_xmit(net, bearer_id, &xmitq, &node->links[bearer_id].maddr);
1705 return res; 1700 return res;
1706} 1701}
1707 1702
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 922e04a43396..2446bfbaa309 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -571,13 +571,13 @@ static void tipc_work_stop(struct tipc_server *s)
571 571
572static int tipc_work_start(struct tipc_server *s) 572static int tipc_work_start(struct tipc_server *s)
573{ 573{
574 s->rcv_wq = alloc_workqueue("tipc_rcv", WQ_UNBOUND, 1); 574 s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0);
575 if (!s->rcv_wq) { 575 if (!s->rcv_wq) {
576 pr_err("can't start tipc receive workqueue\n"); 576 pr_err("can't start tipc receive workqueue\n");
577 return -ENOMEM; 577 return -ENOMEM;
578 } 578 }
579 579
580 s->send_wq = alloc_workqueue("tipc_send", WQ_UNBOUND, 1); 580 s->send_wq = alloc_ordered_workqueue("tipc_send", 0);
581 if (!s->send_wq) { 581 if (!s->send_wq) {
582 pr_err("can't start tipc send workqueue\n"); 582 pr_err("can't start tipc send workqueue\n");
583 destroy_workqueue(s->rcv_wq); 583 destroy_workqueue(s->rcv_wq);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 4d420bb27396..3eeb50a27b89 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -42,6 +42,7 @@
42#include "name_distr.h" 42#include "name_distr.h"
43#include "socket.h" 43#include "socket.h"
44#include "bcast.h" 44#include "bcast.h"
45#include "netlink.h"
45 46
46#define SS_LISTENING -1 /* socket is listening */ 47#define SS_LISTENING -1 /* socket is listening */
47#define SS_READY -2 /* socket is connectionless */ 48#define SS_READY -2 /* socket is connectionless */
@@ -126,14 +127,6 @@ static const struct proto_ops stream_ops;
126static const struct proto_ops msg_ops; 127static const struct proto_ops msg_ops;
127static struct proto tipc_proto; 128static struct proto tipc_proto;
128 129
129static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = {
130 [TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC },
131 [TIPC_NLA_SOCK_ADDR] = { .type = NLA_U32 },
132 [TIPC_NLA_SOCK_REF] = { .type = NLA_U32 },
133 [TIPC_NLA_SOCK_CON] = { .type = NLA_NESTED },
134 [TIPC_NLA_SOCK_HAS_PUBL] = { .type = NLA_FLAG }
135};
136
137static const struct rhashtable_params tsk_rht_params; 130static const struct rhashtable_params tsk_rht_params;
138 131
139/* 132/*
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index f9ff73a8d815..e6cb386fbf34 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -92,25 +92,42 @@ static void tipc_subscrp_send_event(struct tipc_subscription *sub,
92 * 92 *
93 * Returns 1 if there is overlap, otherwise 0. 93 * Returns 1 if there is overlap, otherwise 0.
94 */ 94 */
95int tipc_subscrp_check_overlap(struct tipc_subscription *sub, u32 found_lower, 95int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
96 u32 found_upper) 96 u32 found_upper)
97{ 97{
98 if (found_lower < sub->seq.lower) 98 if (found_lower < seq->lower)
99 found_lower = sub->seq.lower; 99 found_lower = seq->lower;
100 if (found_upper > sub->seq.upper) 100 if (found_upper > seq->upper)
101 found_upper = sub->seq.upper; 101 found_upper = seq->upper;
102 if (found_lower > found_upper) 102 if (found_lower > found_upper)
103 return 0; 103 return 0;
104 return 1; 104 return 1;
105} 105}
106 106
107u32 tipc_subscrp_convert_seq_type(u32 type, int swap)
108{
109 return htohl(type, swap);
110}
111
112void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
113 struct tipc_name_seq *out)
114{
115 out->type = htohl(in->type, swap);
116 out->lower = htohl(in->lower, swap);
117 out->upper = htohl(in->upper, swap);
118}
119
107void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, 120void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
108 u32 found_upper, u32 event, u32 port_ref, 121 u32 found_upper, u32 event, u32 port_ref,
109 u32 node, int must) 122 u32 node, int must)
110{ 123{
111 if (!tipc_subscrp_check_overlap(sub, found_lower, found_upper)) 124 struct tipc_name_seq seq;
125
126 tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq);
127 if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper))
112 return; 128 return;
113 if (!must && !(sub->filter & TIPC_SUB_PORTS)) 129 if (!must &&
130 !(htohl(sub->evt.s.filter, sub->swap) & TIPC_SUB_PORTS))
114 return; 131 return;
115 132
116 tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, 133 tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
@@ -171,12 +188,14 @@ static struct tipc_subscriber *tipc_subscrb_create(int conid)
171static void tipc_subscrb_delete(struct tipc_subscriber *subscriber) 188static void tipc_subscrb_delete(struct tipc_subscriber *subscriber)
172{ 189{
173 struct tipc_subscription *sub, *temp; 190 struct tipc_subscription *sub, *temp;
191 u32 timeout;
174 192
175 spin_lock_bh(&subscriber->lock); 193 spin_lock_bh(&subscriber->lock);
176 /* Destroy any existing subscriptions for subscriber */ 194 /* Destroy any existing subscriptions for subscriber */
177 list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list, 195 list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list,
178 subscrp_list) { 196 subscrp_list) {
179 if (del_timer(&sub->timer)) { 197 timeout = htohl(sub->evt.s.timeout, sub->swap);
198 if ((timeout == TIPC_WAIT_FOREVER) || del_timer(&sub->timer)) {
180 tipc_subscrp_delete(sub); 199 tipc_subscrp_delete(sub);
181 tipc_subscrb_put(subscriber); 200 tipc_subscrb_put(subscriber);
182 } 201 }
@@ -200,13 +219,16 @@ static void tipc_subscrp_cancel(struct tipc_subscr *s,
200 struct tipc_subscriber *subscriber) 219 struct tipc_subscriber *subscriber)
201{ 220{
202 struct tipc_subscription *sub, *temp; 221 struct tipc_subscription *sub, *temp;
222 u32 timeout;
203 223
204 spin_lock_bh(&subscriber->lock); 224 spin_lock_bh(&subscriber->lock);
205 /* Find first matching subscription, exit if not found */ 225 /* Find first matching subscription, exit if not found */
206 list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list, 226 list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list,
207 subscrp_list) { 227 subscrp_list) {
208 if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) { 228 if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) {
209 if (del_timer(&sub->timer)) { 229 timeout = htohl(sub->evt.s.timeout, sub->swap);
230 if ((timeout == TIPC_WAIT_FOREVER) ||
231 del_timer(&sub->timer)) {
210 tipc_subscrp_delete(sub); 232 tipc_subscrp_delete(sub);
211 tipc_subscrb_put(subscriber); 233 tipc_subscrb_put(subscriber);
212 } 234 }
@@ -216,66 +238,67 @@ static void tipc_subscrp_cancel(struct tipc_subscr *s,
216 spin_unlock_bh(&subscriber->lock); 238 spin_unlock_bh(&subscriber->lock);
217} 239}
218 240
219static int tipc_subscrp_create(struct net *net, struct tipc_subscr *s, 241static struct tipc_subscription *tipc_subscrp_create(struct net *net,
220 struct tipc_subscriber *subscriber, 242 struct tipc_subscr *s,
221 struct tipc_subscription **sub_p) 243 int swap)
222{ 244{
223 struct tipc_net *tn = net_generic(net, tipc_net_id); 245 struct tipc_net *tn = net_generic(net, tipc_net_id);
224 struct tipc_subscription *sub; 246 struct tipc_subscription *sub;
225 int swap; 247 u32 filter = htohl(s->filter, swap);
226
227 /* Determine subscriber's endianness */
228 swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE));
229
230 /* Detect & process a subscription cancellation request */
231 if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
232 s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
233 tipc_subscrp_cancel(s, subscriber);
234 return 0;
235 }
236 248
237 /* Refuse subscription if global limit exceeded */ 249 /* Refuse subscription if global limit exceeded */
238 if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { 250 if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) {
239 pr_warn("Subscription rejected, limit reached (%u)\n", 251 pr_warn("Subscription rejected, limit reached (%u)\n",
240 TIPC_MAX_SUBSCRIPTIONS); 252 TIPC_MAX_SUBSCRIPTIONS);
241 return -EINVAL; 253 return NULL;
242 } 254 }
243 255
244 /* Allocate subscription object */ 256 /* Allocate subscription object */
245 sub = kmalloc(sizeof(*sub), GFP_ATOMIC); 257 sub = kmalloc(sizeof(*sub), GFP_ATOMIC);
246 if (!sub) { 258 if (!sub) {
247 pr_warn("Subscription rejected, no memory\n"); 259 pr_warn("Subscription rejected, no memory\n");
248 return -ENOMEM; 260 return NULL;
249 } 261 }
250 262
251 /* Initialize subscription object */ 263 /* Initialize subscription object */
252 sub->net = net; 264 sub->net = net;
253 sub->seq.type = htohl(s->seq.type, swap); 265 if (((filter & TIPC_SUB_PORTS) && (filter & TIPC_SUB_SERVICE)) ||
254 sub->seq.lower = htohl(s->seq.lower, swap); 266 (htohl(s->seq.lower, swap) > htohl(s->seq.upper, swap))) {
255 sub->seq.upper = htohl(s->seq.upper, swap);
256 sub->timeout = msecs_to_jiffies(htohl(s->timeout, swap));
257 sub->filter = htohl(s->filter, swap);
258 if ((!(sub->filter & TIPC_SUB_PORTS) ==
259 !(sub->filter & TIPC_SUB_SERVICE)) ||
260 (sub->seq.lower > sub->seq.upper)) {
261 pr_warn("Subscription rejected, illegal request\n"); 267 pr_warn("Subscription rejected, illegal request\n");
262 kfree(sub); 268 kfree(sub);
263 return -EINVAL; 269 return NULL;
264 } 270 }
265 spin_lock_bh(&subscriber->lock); 271
266 list_add(&sub->subscrp_list, &subscriber->subscrp_list);
267 spin_unlock_bh(&subscriber->lock);
268 sub->subscriber = subscriber;
269 sub->swap = swap; 272 sub->swap = swap;
270 memcpy(&sub->evt.s, s, sizeof(*s)); 273 memcpy(&sub->evt.s, s, sizeof(*s));
271 atomic_inc(&tn->subscription_count); 274 atomic_inc(&tn->subscription_count);
275 return sub;
276}
277
278static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
279 struct tipc_subscriber *subscriber, int swap)
280{
281 struct tipc_net *tn = net_generic(net, tipc_net_id);
282 struct tipc_subscription *sub = NULL;
283 u32 timeout;
284
285 sub = tipc_subscrp_create(net, s, swap);
286 if (!sub)
287 return tipc_conn_terminate(tn->topsrv, subscriber->conid);
288
289 spin_lock_bh(&subscriber->lock);
290 list_add(&sub->subscrp_list, &subscriber->subscrp_list);
291 tipc_subscrb_get(subscriber);
292 sub->subscriber = subscriber;
293 tipc_nametbl_subscribe(sub);
294 spin_unlock_bh(&subscriber->lock);
295
296 timeout = htohl(sub->evt.s.timeout, swap);
297 if (timeout == TIPC_WAIT_FOREVER)
298 return;
299
272 setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub); 300 setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub);
273 if (sub->timeout != TIPC_WAIT_FOREVER) 301 mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
274 sub->timeout += jiffies;
275 if (!mod_timer(&sub->timer, sub->timeout))
276 tipc_subscrb_get(subscriber);
277 *sub_p = sub;
278 return 0;
279} 302}
280 303
281/* Handle one termination request for the subscriber */ 304/* Handle one termination request for the subscriber */
@@ -289,15 +312,22 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid,
289 struct sockaddr_tipc *addr, void *usr_data, 312 struct sockaddr_tipc *addr, void *usr_data,
290 void *buf, size_t len) 313 void *buf, size_t len)
291{ 314{
292 struct tipc_subscriber *subscrb = usr_data; 315 struct tipc_subscriber *subscriber = usr_data;
293 struct tipc_subscription *sub = NULL; 316 struct tipc_subscr *s = (struct tipc_subscr *)buf;
294 struct tipc_net *tn = net_generic(net, tipc_net_id); 317 int swap;
295 318
296 if (tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscrb, &sub)) 319 /* Determine subscriber's endianness */
297 return tipc_conn_terminate(tn->topsrv, subscrb->conid); 320 swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE |
321 TIPC_SUB_CANCEL));
322
323 /* Detect & process a subscription cancellation request */
324 if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
325 s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
326 return tipc_subscrp_cancel(s, subscriber);
327 }
298 328
299 if (sub) 329 if (s)
300 tipc_nametbl_subscribe(sub); 330 tipc_subscrp_subscribe(net, s, subscriber, swap);
301} 331}
302 332
303/* Handle one request to establish a new subscriber */ 333/* Handle one request to establish a new subscriber */
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 92ee18cc5fe6..be60103082c9 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -50,21 +50,15 @@ struct tipc_subscriber;
50 * @subscriber: pointer to its subscriber 50 * @subscriber: pointer to its subscriber
51 * @seq: name sequence associated with subscription 51 * @seq: name sequence associated with subscription
52 * @net: point to network namespace 52 * @net: point to network namespace
53 * @timeout: duration of subscription (in ms)
54 * @filter: event filtering to be done for subscription
55 * @timer: timer governing subscription duration (optional) 53 * @timer: timer governing subscription duration (optional)
56 * @nameseq_list: adjacent subscriptions in name sequence's subscription list 54 * @nameseq_list: adjacent subscriptions in name sequence's subscription list
57 * @subscrp_list: adjacent subscriptions in subscriber's subscription list 55 * @subscrp_list: adjacent subscriptions in subscriber's subscription list
58 * @server_ref: object reference of server port associated with subscription
59 * @swap: indicates if subscriber uses opposite endianness in its messages 56 * @swap: indicates if subscriber uses opposite endianness in its messages
60 * @evt: template for events generated by subscription 57 * @evt: template for events generated by subscription
61 */ 58 */
62struct tipc_subscription { 59struct tipc_subscription {
63 struct tipc_subscriber *subscriber; 60 struct tipc_subscriber *subscriber;
64 struct tipc_name_seq seq;
65 struct net *net; 61 struct net *net;
66 unsigned long timeout;
67 u32 filter;
68 struct timer_list timer; 62 struct timer_list timer;
69 struct list_head nameseq_list; 63 struct list_head nameseq_list;
70 struct list_head subscrp_list; 64 struct list_head subscrp_list;
@@ -72,11 +66,14 @@ struct tipc_subscription {
72 struct tipc_event evt; 66 struct tipc_event evt;
73}; 67};
74 68
75int tipc_subscrp_check_overlap(struct tipc_subscription *sub, u32 found_lower, 69int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
76 u32 found_upper); 70 u32 found_upper);
77void tipc_subscrp_report_overlap(struct tipc_subscription *sub, 71void tipc_subscrp_report_overlap(struct tipc_subscription *sub,
78 u32 found_lower, u32 found_upper, u32 event, 72 u32 found_lower, u32 found_upper, u32 event,
79 u32 port_ref, u32 node, int must); 73 u32 port_ref, u32 node, int must);
74void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
75 struct tipc_name_seq *out);
76u32 tipc_subscrp_convert_seq_type(u32 type, int swap);
80int tipc_topsrv_start(struct net *net); 77int tipc_topsrv_start(struct net *net);
81void tipc_topsrv_stop(struct net *net); 78void tipc_topsrv_stop(struct net *net);
82 79
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index d63a911e7fe2..c9cf2be3674a 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -48,19 +48,12 @@
48#include <linux/tipc_netlink.h> 48#include <linux/tipc_netlink.h>
49#include "core.h" 49#include "core.h"
50#include "bearer.h" 50#include "bearer.h"
51#include "netlink.h"
51 52
52/* IANA assigned UDP port */ 53/* IANA assigned UDP port */
53#define UDP_PORT_DEFAULT 6118 54#define UDP_PORT_DEFAULT 6118
54 55
55#define UDP_MIN_HEADROOM 28 56#define UDP_MIN_HEADROOM 48
56
57static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
58 [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC},
59 [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY,
60 .len = sizeof(struct sockaddr_storage)},
61 [TIPC_NLA_UDP_REMOTE] = {.type = NLA_BINARY,
62 .len = sizeof(struct sockaddr_storage)},
63};
64 57
65/** 58/**
66 * struct udp_media_addr - IP/UDP addressing information 59 * struct udp_media_addr - IP/UDP addressing information
@@ -181,6 +174,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
181 err = PTR_ERR(rt); 174 err = PTR_ERR(rt);
182 goto tx_error; 175 goto tx_error;
183 } 176 }
177
178 skb->dev = rt->dst.dev;
184 ttl = ip4_dst_hoplimit(&rt->dst); 179 ttl = ip4_dst_hoplimit(&rt->dst);
185 udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr, 180 udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr,
186 dst->ipv4.s_addr, 0, ttl, 0, src->udp_port, 181 dst->ipv4.s_addr, 0, ttl, 0, src->udp_port,
@@ -201,7 +196,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
201 ttl = ip6_dst_hoplimit(ndst); 196 ttl = ip6_dst_hoplimit(ndst);
202 err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, 197 err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb,
203 ndst->dev, &src->ipv6, 198 ndst->dev, &src->ipv6,
204 &dst->ipv6, 0, ttl, src->udp_port, 199 &dst->ipv6, 0, ttl, 0, src->udp_port,
205 dst->udp_port, false); 200 dst->udp_port, false);
206#endif 201#endif
207 } 202 }
@@ -274,7 +269,7 @@ static int parse_options(struct nlattr *attrs[], struct udp_bearer *ub,
274 struct udp_media_addr *remote) 269 struct udp_media_addr *remote)
275{ 270{
276 struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; 271 struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
277 struct sockaddr_storage *sa_local, *sa_remote; 272 struct sockaddr_storage sa_local, sa_remote;
278 273
279 if (!attrs[TIPC_NLA_BEARER_UDP_OPTS]) 274 if (!attrs[TIPC_NLA_BEARER_UDP_OPTS])
280 goto err; 275 goto err;
@@ -283,41 +278,48 @@ static int parse_options(struct nlattr *attrs[], struct udp_bearer *ub,
283 tipc_nl_udp_policy)) 278 tipc_nl_udp_policy))
284 goto err; 279 goto err;
285 if (opts[TIPC_NLA_UDP_LOCAL] && opts[TIPC_NLA_UDP_REMOTE]) { 280 if (opts[TIPC_NLA_UDP_LOCAL] && opts[TIPC_NLA_UDP_REMOTE]) {
286 sa_local = nla_data(opts[TIPC_NLA_UDP_LOCAL]); 281 nla_memcpy(&sa_local, opts[TIPC_NLA_UDP_LOCAL],
287 sa_remote = nla_data(opts[TIPC_NLA_UDP_REMOTE]); 282 sizeof(sa_local));
283 nla_memcpy(&sa_remote, opts[TIPC_NLA_UDP_REMOTE],
284 sizeof(sa_remote));
288 } else { 285 } else {
289err: 286err:
290 pr_err("Invalid UDP bearer configuration"); 287 pr_err("Invalid UDP bearer configuration");
291 return -EINVAL; 288 return -EINVAL;
292 } 289 }
293 if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET) { 290 if ((sa_local.ss_family & sa_remote.ss_family) == AF_INET) {
294 struct sockaddr_in *ip4; 291 struct sockaddr_in *ip4;
295 292
296 ip4 = (struct sockaddr_in *)sa_local; 293 ip4 = (struct sockaddr_in *)&sa_local;
297 local->proto = htons(ETH_P_IP); 294 local->proto = htons(ETH_P_IP);
298 local->udp_port = ip4->sin_port; 295 local->udp_port = ip4->sin_port;
299 local->ipv4.s_addr = ip4->sin_addr.s_addr; 296 local->ipv4.s_addr = ip4->sin_addr.s_addr;
300 297
301 ip4 = (struct sockaddr_in *)sa_remote; 298 ip4 = (struct sockaddr_in *)&sa_remote;
302 remote->proto = htons(ETH_P_IP); 299 remote->proto = htons(ETH_P_IP);
303 remote->udp_port = ip4->sin_port; 300 remote->udp_port = ip4->sin_port;
304 remote->ipv4.s_addr = ip4->sin_addr.s_addr; 301 remote->ipv4.s_addr = ip4->sin_addr.s_addr;
305 return 0; 302 return 0;
306 303
307#if IS_ENABLED(CONFIG_IPV6) 304#if IS_ENABLED(CONFIG_IPV6)
308 } else if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET6) { 305 } else if ((sa_local.ss_family & sa_remote.ss_family) == AF_INET6) {
306 int atype;
309 struct sockaddr_in6 *ip6; 307 struct sockaddr_in6 *ip6;
310 308
311 ip6 = (struct sockaddr_in6 *)sa_local; 309 ip6 = (struct sockaddr_in6 *)&sa_local;
310 atype = ipv6_addr_type(&ip6->sin6_addr);
311 if (__ipv6_addr_needs_scope_id(atype) && !ip6->sin6_scope_id)
312 return -EINVAL;
313
312 local->proto = htons(ETH_P_IPV6); 314 local->proto = htons(ETH_P_IPV6);
313 local->udp_port = ip6->sin6_port; 315 local->udp_port = ip6->sin6_port;
314 local->ipv6 = ip6->sin6_addr; 316 memcpy(&local->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
315 ub->ifindex = ip6->sin6_scope_id; 317 ub->ifindex = ip6->sin6_scope_id;
316 318
317 ip6 = (struct sockaddr_in6 *)sa_remote; 319 ip6 = (struct sockaddr_in6 *)&sa_remote;
318 remote->proto = htons(ETH_P_IPV6); 320 remote->proto = htons(ETH_P_IPV6);
319 remote->udp_port = ip6->sin6_port; 321 remote->udp_port = ip6->sin6_port;
320 remote->ipv6 = ip6->sin6_addr; 322 memcpy(&remote->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
321 return 0; 323 return 0;
322#endif 324#endif
323 } 325 }
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f75f847e688d..8269da73e9e5 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1534,7 +1534,6 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1534{ 1534{
1535 int i; 1535 int i;
1536 unsigned char max_level = 0; 1536 unsigned char max_level = 0;
1537 int unix_sock_count = 0;
1538 1537
1539 if (too_many_unix_fds(current)) 1538 if (too_many_unix_fds(current))
1540 return -ETOOMANYREFS; 1539 return -ETOOMANYREFS;
@@ -1542,11 +1541,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1542 for (i = scm->fp->count - 1; i >= 0; i--) { 1541 for (i = scm->fp->count - 1; i >= 0; i--) {
1543 struct sock *sk = unix_get_socket(scm->fp->fp[i]); 1542 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1544 1543
1545 if (sk) { 1544 if (sk)
1546 unix_sock_count++;
1547 max_level = max(max_level, 1545 max_level = max(max_level,
1548 unix_sk(sk)->recursion_level); 1546 unix_sk(sk)->recursion_level);
1549 }
1550 } 1547 }
1551 if (unlikely(max_level > MAX_RECURSION_LEVEL)) 1548 if (unlikely(max_level > MAX_RECURSION_LEVEL))
1552 return -ETOOMANYREFS; 1549 return -ETOOMANYREFS;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index bbe65dcb9738..b5f1221f48d4 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1209,10 +1209,14 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
1209 1209
1210 if (signal_pending(current)) { 1210 if (signal_pending(current)) {
1211 err = sock_intr_errno(timeout); 1211 err = sock_intr_errno(timeout);
1212 goto out_wait_error; 1212 sk->sk_state = SS_UNCONNECTED;
1213 sock->state = SS_UNCONNECTED;
1214 goto out_wait;
1213 } else if (timeout == 0) { 1215 } else if (timeout == 0) {
1214 err = -ETIMEDOUT; 1216 err = -ETIMEDOUT;
1215 goto out_wait_error; 1217 sk->sk_state = SS_UNCONNECTED;
1218 sock->state = SS_UNCONNECTED;
1219 goto out_wait;
1216 } 1220 }
1217 1221
1218 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1222 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
@@ -1220,20 +1224,17 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
1220 1224
1221 if (sk->sk_err) { 1225 if (sk->sk_err) {
1222 err = -sk->sk_err; 1226 err = -sk->sk_err;
1223 goto out_wait_error; 1227 sk->sk_state = SS_UNCONNECTED;
1224 } else 1228 sock->state = SS_UNCONNECTED;
1229 } else {
1225 err = 0; 1230 err = 0;
1231 }
1226 1232
1227out_wait: 1233out_wait:
1228 finish_wait(sk_sleep(sk), &wait); 1234 finish_wait(sk_sleep(sk), &wait);
1229out: 1235out:
1230 release_sock(sk); 1236 release_sock(sk);
1231 return err; 1237 return err;
1232
1233out_wait_error:
1234 sk->sk_state = SS_UNCONNECTED;
1235 sock->state = SS_UNCONNECTED;
1236 goto out_wait;
1237} 1238}
1238 1239
1239static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) 1240static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
@@ -1270,18 +1271,20 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
1270 listener->sk_err == 0) { 1271 listener->sk_err == 0) {
1271 release_sock(listener); 1272 release_sock(listener);
1272 timeout = schedule_timeout(timeout); 1273 timeout = schedule_timeout(timeout);
1274 finish_wait(sk_sleep(listener), &wait);
1273 lock_sock(listener); 1275 lock_sock(listener);
1274 1276
1275 if (signal_pending(current)) { 1277 if (signal_pending(current)) {
1276 err = sock_intr_errno(timeout); 1278 err = sock_intr_errno(timeout);
1277 goto out_wait; 1279 goto out;
1278 } else if (timeout == 0) { 1280 } else if (timeout == 0) {
1279 err = -EAGAIN; 1281 err = -EAGAIN;
1280 goto out_wait; 1282 goto out;
1281 } 1283 }
1282 1284
1283 prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); 1285 prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
1284 } 1286 }
1287 finish_wait(sk_sleep(listener), &wait);
1285 1288
1286 if (listener->sk_err) 1289 if (listener->sk_err)
1287 err = -listener->sk_err; 1290 err = -listener->sk_err;
@@ -1301,19 +1304,15 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
1301 */ 1304 */
1302 if (err) { 1305 if (err) {
1303 vconnected->rejected = true; 1306 vconnected->rejected = true;
1304 release_sock(connected); 1307 } else {
1305 sock_put(connected); 1308 newsock->state = SS_CONNECTED;
1306 goto out_wait; 1309 sock_graft(connected, newsock);
1307 } 1310 }
1308 1311
1309 newsock->state = SS_CONNECTED;
1310 sock_graft(connected, newsock);
1311 release_sock(connected); 1312 release_sock(connected);
1312 sock_put(connected); 1313 sock_put(connected);
1313 } 1314 }
1314 1315
1315out_wait:
1316 finish_wait(sk_sleep(listener), &wait);
1317out: 1316out:
1318 release_sock(listener); 1317 release_sock(listener);
1319 return err; 1318 return err;
@@ -1557,9 +1556,11 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1557 if (err < 0) 1556 if (err < 0)
1558 goto out; 1557 goto out;
1559 1558
1559
1560 while (total_written < len) { 1560 while (total_written < len) {
1561 ssize_t written; 1561 ssize_t written;
1562 1562
1563 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1563 while (vsock_stream_has_space(vsk) == 0 && 1564 while (vsock_stream_has_space(vsk) == 0 &&
1564 sk->sk_err == 0 && 1565 sk->sk_err == 0 &&
1565 !(sk->sk_shutdown & SEND_SHUTDOWN) && 1566 !(sk->sk_shutdown & SEND_SHUTDOWN) &&
@@ -1568,27 +1569,33 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1568 /* Don't wait for non-blocking sockets. */ 1569 /* Don't wait for non-blocking sockets. */
1569 if (timeout == 0) { 1570 if (timeout == 0) {
1570 err = -EAGAIN; 1571 err = -EAGAIN;
1571 goto out_wait; 1572 finish_wait(sk_sleep(sk), &wait);
1573 goto out_err;
1572 } 1574 }
1573 1575
1574 err = transport->notify_send_pre_block(vsk, &send_data); 1576 err = transport->notify_send_pre_block(vsk, &send_data);
1575 if (err < 0) 1577 if (err < 0) {
1576 goto out_wait; 1578 finish_wait(sk_sleep(sk), &wait);
1579 goto out_err;
1580 }
1577 1581
1578 release_sock(sk); 1582 release_sock(sk);
1579 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1580 timeout = schedule_timeout(timeout); 1583 timeout = schedule_timeout(timeout);
1581 finish_wait(sk_sleep(sk), &wait);
1582 lock_sock(sk); 1584 lock_sock(sk);
1583 if (signal_pending(current)) { 1585 if (signal_pending(current)) {
1584 err = sock_intr_errno(timeout); 1586 err = sock_intr_errno(timeout);
1585 goto out_wait; 1587 finish_wait(sk_sleep(sk), &wait);
1588 goto out_err;
1586 } else if (timeout == 0) { 1589 } else if (timeout == 0) {
1587 err = -EAGAIN; 1590 err = -EAGAIN;
1588 goto out_wait; 1591 finish_wait(sk_sleep(sk), &wait);
1592 goto out_err;
1589 } 1593 }
1590 1594
1595 prepare_to_wait(sk_sleep(sk), &wait,
1596 TASK_INTERRUPTIBLE);
1591 } 1597 }
1598 finish_wait(sk_sleep(sk), &wait);
1592 1599
1593 /* These checks occur both as part of and after the loop 1600 /* These checks occur both as part of and after the loop
1594 * conditional since we need to check before and after 1601 * conditional since we need to check before and after
@@ -1596,16 +1603,16 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1596 */ 1603 */
1597 if (sk->sk_err) { 1604 if (sk->sk_err) {
1598 err = -sk->sk_err; 1605 err = -sk->sk_err;
1599 goto out_wait; 1606 goto out_err;
1600 } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || 1607 } else if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
1601 (vsk->peer_shutdown & RCV_SHUTDOWN)) { 1608 (vsk->peer_shutdown & RCV_SHUTDOWN)) {
1602 err = -EPIPE; 1609 err = -EPIPE;
1603 goto out_wait; 1610 goto out_err;
1604 } 1611 }
1605 1612
1606 err = transport->notify_send_pre_enqueue(vsk, &send_data); 1613 err = transport->notify_send_pre_enqueue(vsk, &send_data);
1607 if (err < 0) 1614 if (err < 0)
1608 goto out_wait; 1615 goto out_err;
1609 1616
1610 /* Note that enqueue will only write as many bytes as are free 1617 /* Note that enqueue will only write as many bytes as are free
1611 * in the produce queue, so we don't need to ensure len is 1618 * in the produce queue, so we don't need to ensure len is
@@ -1618,7 +1625,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1618 len - total_written); 1625 len - total_written);
1619 if (written < 0) { 1626 if (written < 0) {
1620 err = -ENOMEM; 1627 err = -ENOMEM;
1621 goto out_wait; 1628 goto out_err;
1622 } 1629 }
1623 1630
1624 total_written += written; 1631 total_written += written;
@@ -1626,11 +1633,11 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1626 err = transport->notify_send_post_enqueue( 1633 err = transport->notify_send_post_enqueue(
1627 vsk, written, &send_data); 1634 vsk, written, &send_data);
1628 if (err < 0) 1635 if (err < 0)
1629 goto out_wait; 1636 goto out_err;
1630 1637
1631 } 1638 }
1632 1639
1633out_wait: 1640out_err:
1634 if (total_written > 0) 1641 if (total_written > 0)
1635 err = total_written; 1642 err = total_written;
1636out: 1643out:
@@ -1715,18 +1722,59 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1715 1722
1716 1723
1717 while (1) { 1724 while (1) {
1718 s64 ready = vsock_stream_has_data(vsk); 1725 s64 ready;
1719 1726
1720 if (ready < 0) { 1727 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1721 /* Invalid queue pair content. XXX This should be 1728 ready = vsock_stream_has_data(vsk);
1722 * changed to a connection reset in a later change.
1723 */
1724 1729
1725 err = -ENOMEM; 1730 if (ready == 0) {
1726 goto out; 1731 if (sk->sk_err != 0 ||
1727 } else if (ready > 0) { 1732 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1733 (vsk->peer_shutdown & SEND_SHUTDOWN)) {
1734 finish_wait(sk_sleep(sk), &wait);
1735 break;
1736 }
1737 /* Don't wait for non-blocking sockets. */
1738 if (timeout == 0) {
1739 err = -EAGAIN;
1740 finish_wait(sk_sleep(sk), &wait);
1741 break;
1742 }
1743
1744 err = transport->notify_recv_pre_block(
1745 vsk, target, &recv_data);
1746 if (err < 0) {
1747 finish_wait(sk_sleep(sk), &wait);
1748 break;
1749 }
1750 release_sock(sk);
1751 timeout = schedule_timeout(timeout);
1752 lock_sock(sk);
1753
1754 if (signal_pending(current)) {
1755 err = sock_intr_errno(timeout);
1756 finish_wait(sk_sleep(sk), &wait);
1757 break;
1758 } else if (timeout == 0) {
1759 err = -EAGAIN;
1760 finish_wait(sk_sleep(sk), &wait);
1761 break;
1762 }
1763 } else {
1728 ssize_t read; 1764 ssize_t read;
1729 1765
1766 finish_wait(sk_sleep(sk), &wait);
1767
1768 if (ready < 0) {
1769 /* Invalid queue pair content. XXX This should
1770 * be changed to a connection reset in a later
1771 * change.
1772 */
1773
1774 err = -ENOMEM;
1775 goto out;
1776 }
1777
1730 err = transport->notify_recv_pre_dequeue( 1778 err = transport->notify_recv_pre_dequeue(
1731 vsk, target, &recv_data); 1779 vsk, target, &recv_data);
1732 if (err < 0) 1780 if (err < 0)
@@ -1752,35 +1800,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1752 break; 1800 break;
1753 1801
1754 target -= read; 1802 target -= read;
1755 } else {
1756 if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN)
1757 || (vsk->peer_shutdown & SEND_SHUTDOWN)) {
1758 break;
1759 }
1760 /* Don't wait for non-blocking sockets. */
1761 if (timeout == 0) {
1762 err = -EAGAIN;
1763 break;
1764 }
1765
1766 err = transport->notify_recv_pre_block(
1767 vsk, target, &recv_data);
1768 if (err < 0)
1769 break;
1770
1771 release_sock(sk);
1772 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1773 timeout = schedule_timeout(timeout);
1774 finish_wait(sk_sleep(sk), &wait);
1775 lock_sock(sk);
1776
1777 if (signal_pending(current)) {
1778 err = sock_intr_errno(timeout);
1779 break;
1780 } else if (timeout == 0) {
1781 err = -EAGAIN;
1782 break;
1783 }
1784 } 1803 }
1785 } 1804 }
1786 1805
@@ -1789,27 +1808,8 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1789 else if (sk->sk_shutdown & RCV_SHUTDOWN) 1808 else if (sk->sk_shutdown & RCV_SHUTDOWN)
1790 err = 0; 1809 err = 0;
1791 1810
1792 if (copied > 0) { 1811 if (copied > 0)
1793 /* We only do these additional bookkeeping/notification steps
1794 * if we actually copied something out of the queue pair
1795 * instead of just peeking ahead.
1796 */
1797
1798 if (!(flags & MSG_PEEK)) {
1799 /* If the other side has shutdown for sending and there
1800 * is nothing more to read, then modify the socket
1801 * state.
1802 */
1803 if (vsk->peer_shutdown & SEND_SHUTDOWN) {
1804 if (vsock_stream_has_data(vsk) <= 0) {
1805 sk->sk_state = SS_UNCONNECTED;
1806 sock_set_flag(sk, SOCK_DONE);
1807 sk->sk_state_change(sk);
1808 }
1809 }
1810 }
1811 err = copied; 1812 err = copied;
1812 }
1813 1813
1814out: 1814out:
1815 release_sock(sk); 1815 release_sock(sk);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 0a369bb440e7..56214736fe88 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -842,7 +842,7 @@ static void vmci_transport_peer_detach_cb(u32 sub_id,
842 * qp_handle. 842 * qp_handle.
843 */ 843 */
844 if (vmci_handle_is_invalid(e_payload->handle) || 844 if (vmci_handle_is_invalid(e_payload->handle) ||
845 vmci_handle_is_equal(trans->qp_handle, e_payload->handle)) 845 !vmci_handle_is_equal(trans->qp_handle, e_payload->handle))
846 return; 846 return;
847 847
848 /* We don't ask for delayed CBs when we subscribe to this event (we 848 /* We don't ask for delayed CBs when we subscribe to this event (we
@@ -1735,11 +1735,8 @@ static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk,
1735 /* Retrieve the head sk_buff from the socket's receive queue. */ 1735 /* Retrieve the head sk_buff from the socket's receive queue. */
1736 err = 0; 1736 err = 0;
1737 skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); 1737 skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err);
1738 if (err)
1739 return err;
1740
1741 if (!skb) 1738 if (!skb)
1742 return -EAGAIN; 1739 return err;
1743 1740
1744 dg = (struct vmci_datagram *)skb->data; 1741 dg = (struct vmci_datagram *)skb->data;
1745 if (!dg) 1742 if (!dg)
@@ -2154,7 +2151,7 @@ module_exit(vmci_transport_exit);
2154 2151
2155MODULE_AUTHOR("VMware, Inc."); 2152MODULE_AUTHOR("VMware, Inc.");
2156MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); 2153MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
2157MODULE_VERSION("1.0.2.0-k"); 2154MODULE_VERSION("1.0.4.0-k");
2158MODULE_LICENSE("GPL v2"); 2155MODULE_LICENSE("GPL v2");
2159MODULE_ALIAS("vmware_vsock"); 2156MODULE_ALIAS("vmware_vsock");
2160MODULE_ALIAS_NETPROTO(PF_VSOCK); 2157MODULE_ALIAS_NETPROTO(PF_VSOCK);
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index da72ed32f143..6c606120abfe 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -50,8 +50,8 @@ config CFG80211_DEVELOPER_WARNINGS
50 default n 50 default n
51 help 51 help
52 This option enables some additional warnings that help 52 This option enables some additional warnings that help
53 cfg80211 developers and driver developers, but that can 53 cfg80211 developers and driver developers, but beware that
54 trigger due to races with userspace. 54 they can also trigger due to races with userspace.
55 55
56 For example, when a driver reports that it was disconnected 56 For example, when a driver reports that it was disconnected
57 from the AP, but the user disconnects manually at the same 57 from the AP, but the user disconnects manually at the same
@@ -61,19 +61,6 @@ config CFG80211_DEVELOPER_WARNINGS
61 on it (or mac80211). 61 on it (or mac80211).
62 62
63 63
64config CFG80211_REG_DEBUG
65 bool "cfg80211 regulatory debugging"
66 depends on CFG80211
67 default n
68 ---help---
69 You can enable this if you want to debug regulatory changes.
70 For more information on cfg80211 regulatory refer to the wireless
71 wiki:
72
73 http://wireless.kernel.org/en/developers/Regulatory
74
75 If unsure, say N.
76
77config CFG80211_CERTIFICATION_ONUS 64config CFG80211_CERTIFICATION_ONUS
78 bool "cfg80211 certification onus" 65 bool "cfg80211 certification onus"
79 depends on CFG80211 && EXPERT 66 depends on CFG80211 && EXPERT
@@ -123,7 +110,7 @@ config CFG80211_REG_RELAX_NO_IR
123 interface which associated to an AP which userspace assumes or confirms 110 interface which associated to an AP which userspace assumes or confirms
124 to be an authorized master, i.e., with radar detection support and DFS 111 to be an authorized master, i.e., with radar detection support and DFS
125 capabilities. However, note that in order to not create daisy chain 112 capabilities. However, note that in order to not create daisy chain
126 scenarios, this relaxation is not allowed in cases that the BSS client 113 scenarios, this relaxation is not allowed in cases where the BSS client
127 is associated to P2P GO and in addition the P2P GO instantiated on 114 is associated to P2P GO and in addition the P2P GO instantiated on
128 a channel due to this relaxation should not allow connection from 115 a channel due to this relaxation should not allow connection from
129 non P2P clients. 116 non P2P clients.
@@ -148,7 +135,7 @@ config CFG80211_DEBUGFS
148 depends on CFG80211 135 depends on CFG80211
149 depends on DEBUG_FS 136 depends on DEBUG_FS
150 ---help--- 137 ---help---
151 You can enable this if you want to debugfs entries for cfg80211. 138 You can enable this if you want debugfs entries for cfg80211.
152 139
153 If unsure, say N. 140 If unsure, say N.
154 141
@@ -159,7 +146,7 @@ config CFG80211_INTERNAL_REGDB
159 ---help--- 146 ---help---
160 This option generates an internal data structure representing 147 This option generates an internal data structure representing
161 the wireless regulatory rules described in net/wireless/db.txt 148 the wireless regulatory rules described in net/wireless/db.txt
162 and includes code to query that database. This is an alternative 149 and includes code to query that database. This is an alternative
163 to using CRDA for defining regulatory rules for the kernel. 150 to using CRDA for defining regulatory rules for the kernel.
164 151
165 Using this option requires some parsing of the db.txt at build time, 152 Using this option requires some parsing of the db.txt at build time,
@@ -172,7 +159,7 @@ config CFG80211_INTERNAL_REGDB
172 159
173 http://wireless.kernel.org/en/developers/Regulatory 160 http://wireless.kernel.org/en/developers/Regulatory
174 161
175 Most distributions have a CRDA package. So if unsure, say N. 162 Most distributions have a CRDA package. So if unsure, say N.
176 163
177config CFG80211_CRDA_SUPPORT 164config CFG80211_CRDA_SUPPORT
178 bool "support CRDA" if CFG80211_INTERNAL_REGDB 165 bool "support CRDA" if CFG80211_INTERNAL_REGDB
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 8f0bac7e03c4..9f1c4aa851ef 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -352,6 +352,16 @@ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv,
352 WARN_ON(ops->add_station && !ops->del_station); 352 WARN_ON(ops->add_station && !ops->del_station);
353 WARN_ON(ops->add_mpath && !ops->del_mpath); 353 WARN_ON(ops->add_mpath && !ops->del_mpath);
354 WARN_ON(ops->join_mesh && !ops->leave_mesh); 354 WARN_ON(ops->join_mesh && !ops->leave_mesh);
355 WARN_ON(ops->start_p2p_device && !ops->stop_p2p_device);
356 WARN_ON(ops->start_ap && !ops->stop_ap);
357 WARN_ON(ops->join_ocb && !ops->leave_ocb);
358 WARN_ON(ops->suspend && !ops->resume);
359 WARN_ON(ops->sched_scan_start && !ops->sched_scan_stop);
360 WARN_ON(ops->remain_on_channel && !ops->cancel_remain_on_channel);
361 WARN_ON(ops->tdls_channel_switch && !ops->tdls_cancel_channel_switch);
362 WARN_ON(ops->add_tx_ts && !ops->del_tx_ts);
363 WARN_ON(ops->set_tx_power && !ops->get_tx_power);
364 WARN_ON(ops->set_antenna && !ops->get_antenna);
355 365
356 alloc_size = sizeof(*rdev) + sizeof_priv; 366 alloc_size = sizeof(*rdev) + sizeof_priv;
357 367
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index 3cd819539241..71447cf86306 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -29,7 +29,8 @@
29#include <linux/ieee80211.h> 29#include <linux/ieee80211.h>
30#include <net/iw_handler.h> 30#include <net/iw_handler.h>
31 31
32#include <linux/crypto.h> 32#include <crypto/hash.h>
33#include <crypto/skcipher.h>
33#include <linux/crc32.h> 34#include <linux/crc32.h>
34 35
35#include <net/lib80211.h> 36#include <net/lib80211.h>
@@ -63,10 +64,10 @@ struct lib80211_tkip_data {
63 64
64 int key_idx; 65 int key_idx;
65 66
66 struct crypto_blkcipher *rx_tfm_arc4; 67 struct crypto_skcipher *rx_tfm_arc4;
67 struct crypto_hash *rx_tfm_michael; 68 struct crypto_ahash *rx_tfm_michael;
68 struct crypto_blkcipher *tx_tfm_arc4; 69 struct crypto_skcipher *tx_tfm_arc4;
69 struct crypto_hash *tx_tfm_michael; 70 struct crypto_ahash *tx_tfm_michael;
70 71
71 /* scratch buffers for virt_to_page() (crypto API) */ 72 /* scratch buffers for virt_to_page() (crypto API) */
72 u8 rx_hdr[16], tx_hdr[16]; 73 u8 rx_hdr[16], tx_hdr[16];
@@ -98,29 +99,29 @@ static void *lib80211_tkip_init(int key_idx)
98 99
99 priv->key_idx = key_idx; 100 priv->key_idx = key_idx;
100 101
101 priv->tx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, 102 priv->tx_tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0,
102 CRYPTO_ALG_ASYNC); 103 CRYPTO_ALG_ASYNC);
103 if (IS_ERR(priv->tx_tfm_arc4)) { 104 if (IS_ERR(priv->tx_tfm_arc4)) {
104 priv->tx_tfm_arc4 = NULL; 105 priv->tx_tfm_arc4 = NULL;
105 goto fail; 106 goto fail;
106 } 107 }
107 108
108 priv->tx_tfm_michael = crypto_alloc_hash("michael_mic", 0, 109 priv->tx_tfm_michael = crypto_alloc_ahash("michael_mic", 0,
109 CRYPTO_ALG_ASYNC); 110 CRYPTO_ALG_ASYNC);
110 if (IS_ERR(priv->tx_tfm_michael)) { 111 if (IS_ERR(priv->tx_tfm_michael)) {
111 priv->tx_tfm_michael = NULL; 112 priv->tx_tfm_michael = NULL;
112 goto fail; 113 goto fail;
113 } 114 }
114 115
115 priv->rx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, 116 priv->rx_tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0,
116 CRYPTO_ALG_ASYNC); 117 CRYPTO_ALG_ASYNC);
117 if (IS_ERR(priv->rx_tfm_arc4)) { 118 if (IS_ERR(priv->rx_tfm_arc4)) {
118 priv->rx_tfm_arc4 = NULL; 119 priv->rx_tfm_arc4 = NULL;
119 goto fail; 120 goto fail;
120 } 121 }
121 122
122 priv->rx_tfm_michael = crypto_alloc_hash("michael_mic", 0, 123 priv->rx_tfm_michael = crypto_alloc_ahash("michael_mic", 0,
123 CRYPTO_ALG_ASYNC); 124 CRYPTO_ALG_ASYNC);
124 if (IS_ERR(priv->rx_tfm_michael)) { 125 if (IS_ERR(priv->rx_tfm_michael)) {
125 priv->rx_tfm_michael = NULL; 126 priv->rx_tfm_michael = NULL;
126 goto fail; 127 goto fail;
@@ -130,14 +131,10 @@ static void *lib80211_tkip_init(int key_idx)
130 131
131 fail: 132 fail:
132 if (priv) { 133 if (priv) {
133 if (priv->tx_tfm_michael) 134 crypto_free_ahash(priv->tx_tfm_michael);
134 crypto_free_hash(priv->tx_tfm_michael); 135 crypto_free_skcipher(priv->tx_tfm_arc4);
135 if (priv->tx_tfm_arc4) 136 crypto_free_ahash(priv->rx_tfm_michael);
136 crypto_free_blkcipher(priv->tx_tfm_arc4); 137 crypto_free_skcipher(priv->rx_tfm_arc4);
137 if (priv->rx_tfm_michael)
138 crypto_free_hash(priv->rx_tfm_michael);
139 if (priv->rx_tfm_arc4)
140 crypto_free_blkcipher(priv->rx_tfm_arc4);
141 kfree(priv); 138 kfree(priv);
142 } 139 }
143 140
@@ -148,14 +145,10 @@ static void lib80211_tkip_deinit(void *priv)
148{ 145{
149 struct lib80211_tkip_data *_priv = priv; 146 struct lib80211_tkip_data *_priv = priv;
150 if (_priv) { 147 if (_priv) {
151 if (_priv->tx_tfm_michael) 148 crypto_free_ahash(_priv->tx_tfm_michael);
152 crypto_free_hash(_priv->tx_tfm_michael); 149 crypto_free_skcipher(_priv->tx_tfm_arc4);
153 if (_priv->tx_tfm_arc4) 150 crypto_free_ahash(_priv->rx_tfm_michael);
154 crypto_free_blkcipher(_priv->tx_tfm_arc4); 151 crypto_free_skcipher(_priv->rx_tfm_arc4);
155 if (_priv->rx_tfm_michael)
156 crypto_free_hash(_priv->rx_tfm_michael);
157 if (_priv->rx_tfm_arc4)
158 crypto_free_blkcipher(_priv->rx_tfm_arc4);
159 } 152 }
160 kfree(priv); 153 kfree(priv);
161} 154}
@@ -353,11 +346,12 @@ static int lib80211_tkip_hdr(struct sk_buff *skb, int hdr_len,
353static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) 346static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
354{ 347{
355 struct lib80211_tkip_data *tkey = priv; 348 struct lib80211_tkip_data *tkey = priv;
356 struct blkcipher_desc desc = { .tfm = tkey->tx_tfm_arc4 }; 349 SKCIPHER_REQUEST_ON_STACK(req, tkey->tx_tfm_arc4);
357 int len; 350 int len;
358 u8 rc4key[16], *pos, *icv; 351 u8 rc4key[16], *pos, *icv;
359 u32 crc; 352 u32 crc;
360 struct scatterlist sg; 353 struct scatterlist sg;
354 int err;
361 355
362 if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { 356 if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) {
363 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; 357 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
@@ -382,9 +376,14 @@ static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
382 icv[2] = crc >> 16; 376 icv[2] = crc >> 16;
383 icv[3] = crc >> 24; 377 icv[3] = crc >> 24;
384 378
385 crypto_blkcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16); 379 crypto_skcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16);
386 sg_init_one(&sg, pos, len + 4); 380 sg_init_one(&sg, pos, len + 4);
387 return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4); 381 skcipher_request_set_tfm(req, tkey->tx_tfm_arc4);
382 skcipher_request_set_callback(req, 0, NULL, NULL);
383 skcipher_request_set_crypt(req, &sg, &sg, len + 4, NULL);
384 err = crypto_skcipher_encrypt(req);
385 skcipher_request_zero(req);
386 return err;
388} 387}
389 388
390/* 389/*
@@ -403,7 +402,7 @@ static inline int tkip_replay_check(u32 iv32_n, u16 iv16_n,
403static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) 402static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
404{ 403{
405 struct lib80211_tkip_data *tkey = priv; 404 struct lib80211_tkip_data *tkey = priv;
406 struct blkcipher_desc desc = { .tfm = tkey->rx_tfm_arc4 }; 405 SKCIPHER_REQUEST_ON_STACK(req, tkey->rx_tfm_arc4);
407 u8 rc4key[16]; 406 u8 rc4key[16];
408 u8 keyidx, *pos; 407 u8 keyidx, *pos;
409 u32 iv32; 408 u32 iv32;
@@ -413,6 +412,7 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
413 u32 crc; 412 u32 crc;
414 struct scatterlist sg; 413 struct scatterlist sg;
415 int plen; 414 int plen;
415 int err;
416 416
417 hdr = (struct ieee80211_hdr *)skb->data; 417 hdr = (struct ieee80211_hdr *)skb->data;
418 418
@@ -465,9 +465,14 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
465 465
466 plen = skb->len - hdr_len - 12; 466 plen = skb->len - hdr_len - 12;
467 467
468 crypto_blkcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16); 468 crypto_skcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16);
469 sg_init_one(&sg, pos, plen + 4); 469 sg_init_one(&sg, pos, plen + 4);
470 if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) { 470 skcipher_request_set_tfm(req, tkey->rx_tfm_arc4);
471 skcipher_request_set_callback(req, 0, NULL, NULL);
472 skcipher_request_set_crypt(req, &sg, &sg, plen + 4, NULL);
473 err = crypto_skcipher_decrypt(req);
474 skcipher_request_zero(req);
475 if (err) {
471 net_dbg_ratelimited("TKIP: failed to decrypt received packet from %pM\n", 476 net_dbg_ratelimited("TKIP: failed to decrypt received packet from %pM\n",
472 hdr->addr2); 477 hdr->addr2);
473 return -7; 478 return -7;
@@ -505,11 +510,12 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
505 return keyidx; 510 return keyidx;
506} 511}
507 512
508static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr, 513static int michael_mic(struct crypto_ahash *tfm_michael, u8 * key, u8 * hdr,
509 u8 * data, size_t data_len, u8 * mic) 514 u8 * data, size_t data_len, u8 * mic)
510{ 515{
511 struct hash_desc desc; 516 AHASH_REQUEST_ON_STACK(req, tfm_michael);
512 struct scatterlist sg[2]; 517 struct scatterlist sg[2];
518 int err;
513 519
514 if (tfm_michael == NULL) { 520 if (tfm_michael == NULL) {
515 pr_warn("%s(): tfm_michael == NULL\n", __func__); 521 pr_warn("%s(): tfm_michael == NULL\n", __func__);
@@ -519,12 +525,15 @@ static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr,
519 sg_set_buf(&sg[0], hdr, 16); 525 sg_set_buf(&sg[0], hdr, 16);
520 sg_set_buf(&sg[1], data, data_len); 526 sg_set_buf(&sg[1], data, data_len);
521 527
522 if (crypto_hash_setkey(tfm_michael, key, 8)) 528 if (crypto_ahash_setkey(tfm_michael, key, 8))
523 return -1; 529 return -1;
524 530
525 desc.tfm = tfm_michael; 531 ahash_request_set_tfm(req, tfm_michael);
526 desc.flags = 0; 532 ahash_request_set_callback(req, 0, NULL, NULL);
527 return crypto_hash_digest(&desc, sg, data_len + 16, mic); 533 ahash_request_set_crypt(req, sg, mic, data_len + 16);
534 err = crypto_ahash_digest(req);
535 ahash_request_zero(req);
536 return err;
528} 537}
529 538
530static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) 539static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr)
@@ -645,10 +654,10 @@ static int lib80211_tkip_set_key(void *key, int len, u8 * seq, void *priv)
645{ 654{
646 struct lib80211_tkip_data *tkey = priv; 655 struct lib80211_tkip_data *tkey = priv;
647 int keyidx; 656 int keyidx;
648 struct crypto_hash *tfm = tkey->tx_tfm_michael; 657 struct crypto_ahash *tfm = tkey->tx_tfm_michael;
649 struct crypto_blkcipher *tfm2 = tkey->tx_tfm_arc4; 658 struct crypto_skcipher *tfm2 = tkey->tx_tfm_arc4;
650 struct crypto_hash *tfm3 = tkey->rx_tfm_michael; 659 struct crypto_ahash *tfm3 = tkey->rx_tfm_michael;
651 struct crypto_blkcipher *tfm4 = tkey->rx_tfm_arc4; 660 struct crypto_skcipher *tfm4 = tkey->rx_tfm_arc4;
652 661
653 keyidx = tkey->key_idx; 662 keyidx = tkey->key_idx;
654 memset(tkey, 0, sizeof(*tkey)); 663 memset(tkey, 0, sizeof(*tkey));
diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c
index 1c292e4ea7b6..d05f58b0fd04 100644
--- a/net/wireless/lib80211_crypt_wep.c
+++ b/net/wireless/lib80211_crypt_wep.c
@@ -22,7 +22,7 @@
22 22
23#include <net/lib80211.h> 23#include <net/lib80211.h>
24 24
25#include <linux/crypto.h> 25#include <crypto/skcipher.h>
26#include <linux/crc32.h> 26#include <linux/crc32.h>
27 27
28MODULE_AUTHOR("Jouni Malinen"); 28MODULE_AUTHOR("Jouni Malinen");
@@ -35,8 +35,8 @@ struct lib80211_wep_data {
35 u8 key[WEP_KEY_LEN + 1]; 35 u8 key[WEP_KEY_LEN + 1];
36 u8 key_len; 36 u8 key_len;
37 u8 key_idx; 37 u8 key_idx;
38 struct crypto_blkcipher *tx_tfm; 38 struct crypto_skcipher *tx_tfm;
39 struct crypto_blkcipher *rx_tfm; 39 struct crypto_skcipher *rx_tfm;
40}; 40};
41 41
42static void *lib80211_wep_init(int keyidx) 42static void *lib80211_wep_init(int keyidx)
@@ -48,13 +48,13 @@ static void *lib80211_wep_init(int keyidx)
48 goto fail; 48 goto fail;
49 priv->key_idx = keyidx; 49 priv->key_idx = keyidx;
50 50
51 priv->tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); 51 priv->tx_tfm = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
52 if (IS_ERR(priv->tx_tfm)) { 52 if (IS_ERR(priv->tx_tfm)) {
53 priv->tx_tfm = NULL; 53 priv->tx_tfm = NULL;
54 goto fail; 54 goto fail;
55 } 55 }
56 56
57 priv->rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); 57 priv->rx_tfm = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
58 if (IS_ERR(priv->rx_tfm)) { 58 if (IS_ERR(priv->rx_tfm)) {
59 priv->rx_tfm = NULL; 59 priv->rx_tfm = NULL;
60 goto fail; 60 goto fail;
@@ -66,10 +66,8 @@ static void *lib80211_wep_init(int keyidx)
66 66
67 fail: 67 fail:
68 if (priv) { 68 if (priv) {
69 if (priv->tx_tfm) 69 crypto_free_skcipher(priv->tx_tfm);
70 crypto_free_blkcipher(priv->tx_tfm); 70 crypto_free_skcipher(priv->rx_tfm);
71 if (priv->rx_tfm)
72 crypto_free_blkcipher(priv->rx_tfm);
73 kfree(priv); 71 kfree(priv);
74 } 72 }
75 return NULL; 73 return NULL;
@@ -79,10 +77,8 @@ static void lib80211_wep_deinit(void *priv)
79{ 77{
80 struct lib80211_wep_data *_priv = priv; 78 struct lib80211_wep_data *_priv = priv;
81 if (_priv) { 79 if (_priv) {
82 if (_priv->tx_tfm) 80 crypto_free_skcipher(_priv->tx_tfm);
83 crypto_free_blkcipher(_priv->tx_tfm); 81 crypto_free_skcipher(_priv->rx_tfm);
84 if (_priv->rx_tfm)
85 crypto_free_blkcipher(_priv->rx_tfm);
86 } 82 }
87 kfree(priv); 83 kfree(priv);
88} 84}
@@ -133,11 +129,12 @@ static int lib80211_wep_build_iv(struct sk_buff *skb, int hdr_len,
133static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) 129static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
134{ 130{
135 struct lib80211_wep_data *wep = priv; 131 struct lib80211_wep_data *wep = priv;
136 struct blkcipher_desc desc = { .tfm = wep->tx_tfm }; 132 SKCIPHER_REQUEST_ON_STACK(req, wep->tx_tfm);
137 u32 crc, klen, len; 133 u32 crc, klen, len;
138 u8 *pos, *icv; 134 u8 *pos, *icv;
139 struct scatterlist sg; 135 struct scatterlist sg;
140 u8 key[WEP_KEY_LEN + 3]; 136 u8 key[WEP_KEY_LEN + 3];
137 int err;
141 138
142 /* other checks are in lib80211_wep_build_iv */ 139 /* other checks are in lib80211_wep_build_iv */
143 if (skb_tailroom(skb) < 4) 140 if (skb_tailroom(skb) < 4)
@@ -165,9 +162,14 @@ static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
165 icv[2] = crc >> 16; 162 icv[2] = crc >> 16;
166 icv[3] = crc >> 24; 163 icv[3] = crc >> 24;
167 164
168 crypto_blkcipher_setkey(wep->tx_tfm, key, klen); 165 crypto_skcipher_setkey(wep->tx_tfm, key, klen);
169 sg_init_one(&sg, pos, len + 4); 166 sg_init_one(&sg, pos, len + 4);
170 return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4); 167 skcipher_request_set_tfm(req, wep->tx_tfm);
168 skcipher_request_set_callback(req, 0, NULL, NULL);
169 skcipher_request_set_crypt(req, &sg, &sg, len + 4, NULL);
170 err = crypto_skcipher_encrypt(req);
171 skcipher_request_zero(req);
172 return err;
171} 173}
172 174
173/* Perform WEP decryption on given buffer. Buffer includes whole WEP part of 175/* Perform WEP decryption on given buffer. Buffer includes whole WEP part of
@@ -180,11 +182,12 @@ static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
180static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) 182static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
181{ 183{
182 struct lib80211_wep_data *wep = priv; 184 struct lib80211_wep_data *wep = priv;
183 struct blkcipher_desc desc = { .tfm = wep->rx_tfm }; 185 SKCIPHER_REQUEST_ON_STACK(req, wep->rx_tfm);
184 u32 crc, klen, plen; 186 u32 crc, klen, plen;
185 u8 key[WEP_KEY_LEN + 3]; 187 u8 key[WEP_KEY_LEN + 3];
186 u8 keyidx, *pos, icv[4]; 188 u8 keyidx, *pos, icv[4];
187 struct scatterlist sg; 189 struct scatterlist sg;
190 int err;
188 191
189 if (skb->len < hdr_len + 8) 192 if (skb->len < hdr_len + 8)
190 return -1; 193 return -1;
@@ -205,9 +208,14 @@ static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
205 /* Apply RC4 to data and compute CRC32 over decrypted data */ 208 /* Apply RC4 to data and compute CRC32 over decrypted data */
206 plen = skb->len - hdr_len - 8; 209 plen = skb->len - hdr_len - 8;
207 210
208 crypto_blkcipher_setkey(wep->rx_tfm, key, klen); 211 crypto_skcipher_setkey(wep->rx_tfm, key, klen);
209 sg_init_one(&sg, pos, plen + 4); 212 sg_init_one(&sg, pos, plen + 4);
210 if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) 213 skcipher_request_set_tfm(req, wep->rx_tfm);
214 skcipher_request_set_callback(req, 0, NULL, NULL);
215 skcipher_request_set_crypt(req, &sg, &sg, plen + 4, NULL);
216 err = crypto_skcipher_decrypt(req);
217 skcipher_request_zero(req);
218 if (err)
211 return -7; 219 return -7;
212 220
213 crc = ~crc32_le(~0, pos, plen); 221 crc = ~crc32_le(~0, pos, plen);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index fb44fa3bf4ef..ff328250bc44 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -711,7 +711,7 @@ EXPORT_SYMBOL(cfg80211_rx_mgmt);
711 711
712void cfg80211_dfs_channels_update_work(struct work_struct *work) 712void cfg80211_dfs_channels_update_work(struct work_struct *work)
713{ 713{
714 struct delayed_work *delayed_work; 714 struct delayed_work *delayed_work = to_delayed_work(work);
715 struct cfg80211_registered_device *rdev; 715 struct cfg80211_registered_device *rdev;
716 struct cfg80211_chan_def chandef; 716 struct cfg80211_chan_def chandef;
717 struct ieee80211_supported_band *sband; 717 struct ieee80211_supported_band *sband;
@@ -721,7 +721,6 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work)
721 unsigned long timeout, next_time = 0; 721 unsigned long timeout, next_time = 0;
722 int bandid, i; 722 int bandid, i;
723 723
724 delayed_work = container_of(work, struct delayed_work, work);
725 rdev = container_of(delayed_work, struct cfg80211_registered_device, 724 rdev = container_of(delayed_work, struct cfg80211_registered_device,
726 dfs_update_channels_wk); 725 dfs_update_channels_wk);
727 wiphy = &rdev->wiphy; 726 wiphy = &rdev->wiphy;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 711cb7ad6ae0..056a7307862b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> 4 * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
5 * Copyright 2013-2014 Intel Mobile Communications GmbH 5 * Copyright 2013-2014 Intel Mobile Communications GmbH
6 * Copyright 2015 Intel Deutschland GmbH 6 * Copyright 2015-2016 Intel Deutschland GmbH
7 */ 7 */
8 8
9#include <linux/if.h> 9#include <linux/if.h>
@@ -401,6 +401,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
401 [NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 }, 401 [NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 },
402 [NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 }, 402 [NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 },
403 [NL80211_ATTR_REG_INDOOR] = { .type = NLA_FLAG }, 403 [NL80211_ATTR_REG_INDOOR] = { .type = NLA_FLAG },
404 [NL80211_ATTR_PBSS] = { .type = NLA_FLAG },
404}; 405};
405 406
406/* policy for the key attributes */ 407/* policy for the key attributes */
@@ -3461,6 +3462,10 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
3461 return PTR_ERR(params.acl); 3462 return PTR_ERR(params.acl);
3462 } 3463 }
3463 3464
3465 params.pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]);
3466 if (params.pbss && !rdev->wiphy.bands[IEEE80211_BAND_60GHZ])
3467 return -EOPNOTSUPP;
3468
3464 wdev_lock(wdev); 3469 wdev_lock(wdev);
3465 err = rdev_start_ap(rdev, dev, &params); 3470 err = rdev_start_ap(rdev, dev, &params);
3466 if (!err) { 3471 if (!err) {
@@ -7281,9 +7286,11 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
7281 } 7286 }
7282 7287
7283 if (nla_get_flag(info->attrs[NL80211_ATTR_USE_RRM])) { 7288 if (nla_get_flag(info->attrs[NL80211_ATTR_USE_RRM])) {
7284 if (!(rdev->wiphy.features & 7289 if (!((rdev->wiphy.features &
7285 NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES) || 7290 NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES) &&
7286 !(rdev->wiphy.features & NL80211_FEATURE_QUIET)) 7291 (rdev->wiphy.features & NL80211_FEATURE_QUIET)) &&
7292 !wiphy_ext_feature_isset(&rdev->wiphy,
7293 NL80211_EXT_FEATURE_RRM))
7287 return -EINVAL; 7294 return -EINVAL;
7288 req.flags |= ASSOC_REQ_USE_RRM; 7295 req.flags |= ASSOC_REQ_USE_RRM;
7289 } 7296 }
@@ -7971,15 +7978,23 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
7971 } 7978 }
7972 7979
7973 if (nla_get_flag(info->attrs[NL80211_ATTR_USE_RRM])) { 7980 if (nla_get_flag(info->attrs[NL80211_ATTR_USE_RRM])) {
7974 if (!(rdev->wiphy.features & 7981 if (!((rdev->wiphy.features &
7975 NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES) || 7982 NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES) &&
7976 !(rdev->wiphy.features & NL80211_FEATURE_QUIET)) { 7983 (rdev->wiphy.features & NL80211_FEATURE_QUIET)) &&
7984 !wiphy_ext_feature_isset(&rdev->wiphy,
7985 NL80211_EXT_FEATURE_RRM)) {
7977 kzfree(connkeys); 7986 kzfree(connkeys);
7978 return -EINVAL; 7987 return -EINVAL;
7979 } 7988 }
7980 connect.flags |= ASSOC_REQ_USE_RRM; 7989 connect.flags |= ASSOC_REQ_USE_RRM;
7981 } 7990 }
7982 7991
7992 connect.pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]);
7993 if (connect.pbss && !rdev->wiphy.bands[IEEE80211_BAND_60GHZ]) {
7994 kzfree(connkeys);
7995 return -EOPNOTSUPP;
7996 }
7997
7983 wdev_lock(dev->ieee80211_ptr); 7998 wdev_lock(dev->ieee80211_ptr);
7984 err = cfg80211_connect(rdev, dev, &connect, connkeys, NULL); 7999 err = cfg80211_connect(rdev, dev, &connect, connkeys, NULL);
7985 wdev_unlock(dev->ieee80211_ptr); 8000 wdev_unlock(dev->ieee80211_ptr);
@@ -13201,7 +13216,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
13201 struct wireless_dev *wdev; 13216 struct wireless_dev *wdev;
13202 struct cfg80211_beacon_registration *reg, *tmp; 13217 struct cfg80211_beacon_registration *reg, *tmp;
13203 13218
13204 if (state != NETLINK_URELEASE) 13219 if (state != NETLINK_URELEASE || notify->protocol != NETLINK_GENERIC)
13205 return NOTIFY_DONE; 13220 return NOTIFY_DONE;
13206 13221
13207 rcu_read_lock(); 13222 rcu_read_lock();
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index 722da616438c..6582d155e2fc 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -43,6 +43,7 @@ static const struct radiotap_align_size rtap_namespace_sizes[] = {
43 [IEEE80211_RADIOTAP_DATA_RETRIES] = { .align = 1, .size = 1, }, 43 [IEEE80211_RADIOTAP_DATA_RETRIES] = { .align = 1, .size = 1, },
44 [IEEE80211_RADIOTAP_MCS] = { .align = 1, .size = 3, }, 44 [IEEE80211_RADIOTAP_MCS] = { .align = 1, .size = 3, },
45 [IEEE80211_RADIOTAP_AMPDU_STATUS] = { .align = 4, .size = 8, }, 45 [IEEE80211_RADIOTAP_AMPDU_STATUS] = { .align = 4, .size = 8, },
46 [IEEE80211_RADIOTAP_VHT] = { .align = 2, .size = 12, },
46 /* 47 /*
47 * add more here as they are defined in radiotap.h 48 * add more here as they are defined in radiotap.h
48 */ 49 */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 547ceecc0523..c5fb317eee68 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -60,13 +60,6 @@
60#include "regdb.h" 60#include "regdb.h"
61#include "nl80211.h" 61#include "nl80211.h"
62 62
63#ifdef CONFIG_CFG80211_REG_DEBUG
64#define REG_DBG_PRINT(format, args...) \
65 printk(KERN_DEBUG pr_fmt(format), ##args)
66#else
67#define REG_DBG_PRINT(args...)
68#endif
69
70/* 63/*
71 * Grace period we give before making sure all current interfaces reside on 64 * Grace period we give before making sure all current interfaces reside on
72 * channels allowed by the current regulatory domain. 65 * channels allowed by the current regulatory domain.
@@ -178,12 +171,10 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy)
178 if (wiphy_regd->dfs_region == regd->dfs_region) 171 if (wiphy_regd->dfs_region == regd->dfs_region)
179 goto out; 172 goto out;
180 173
181 REG_DBG_PRINT("%s: device specific dfs_region " 174 pr_debug("%s: device specific dfs_region (%s) disagrees with cfg80211's central dfs_region (%s)\n",
182 "(%s) disagrees with cfg80211's " 175 dev_name(&wiphy->dev),
183 "central dfs_region (%s)\n", 176 reg_dfs_region_str(wiphy_regd->dfs_region),
184 dev_name(&wiphy->dev), 177 reg_dfs_region_str(regd->dfs_region));
185 reg_dfs_region_str(wiphy_regd->dfs_region),
186 reg_dfs_region_str(regd->dfs_region));
187 178
188out: 179out:
189 return regd->dfs_region; 180 return regd->dfs_region;
@@ -543,7 +534,7 @@ static DECLARE_DELAYED_WORK(crda_timeout, crda_timeout_work);
543 534
544static void crda_timeout_work(struct work_struct *work) 535static void crda_timeout_work(struct work_struct *work)
545{ 536{
546 REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n"); 537 pr_debug("Timeout while waiting for CRDA to reply, restoring regulatory settings\n");
547 rtnl_lock(); 538 rtnl_lock();
548 reg_crda_timeouts++; 539 reg_crda_timeouts++;
549 restore_regulatory_settings(true); 540 restore_regulatory_settings(true);
@@ -585,7 +576,7 @@ static int call_crda(const char *alpha2)
585 576
586 if (!is_world_regdom((char *) alpha2)) 577 if (!is_world_regdom((char *) alpha2))
587 pr_debug("Calling CRDA for country: %c%c\n", 578 pr_debug("Calling CRDA for country: %c%c\n",
588 alpha2[0], alpha2[1]); 579 alpha2[0], alpha2[1]);
589 else 580 else
590 pr_debug("Calling CRDA to update world regulatory domain\n"); 581 pr_debug("Calling CRDA to update world regulatory domain\n");
591 582
@@ -1132,42 +1123,6 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator)
1132} 1123}
1133EXPORT_SYMBOL(reg_initiator_name); 1124EXPORT_SYMBOL(reg_initiator_name);
1134 1125
1135static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd,
1136 struct ieee80211_channel *chan,
1137 const struct ieee80211_reg_rule *reg_rule)
1138{
1139#ifdef CONFIG_CFG80211_REG_DEBUG
1140 const struct ieee80211_power_rule *power_rule;
1141 const struct ieee80211_freq_range *freq_range;
1142 char max_antenna_gain[32], bw[32];
1143
1144 power_rule = &reg_rule->power_rule;
1145 freq_range = &reg_rule->freq_range;
1146
1147 if (!power_rule->max_antenna_gain)
1148 snprintf(max_antenna_gain, sizeof(max_antenna_gain), "N/A");
1149 else
1150 snprintf(max_antenna_gain, sizeof(max_antenna_gain), "%d mBi",
1151 power_rule->max_antenna_gain);
1152
1153 if (reg_rule->flags & NL80211_RRF_AUTO_BW)
1154 snprintf(bw, sizeof(bw), "%d KHz, %d KHz AUTO",
1155 freq_range->max_bandwidth_khz,
1156 reg_get_max_bandwidth(regd, reg_rule));
1157 else
1158 snprintf(bw, sizeof(bw), "%d KHz",
1159 freq_range->max_bandwidth_khz);
1160
1161 REG_DBG_PRINT("Updating information on frequency %d MHz with regulatory rule:\n",
1162 chan->center_freq);
1163
1164 REG_DBG_PRINT("(%d KHz - %d KHz @ %s), (%s, %d mBm)\n",
1165 freq_range->start_freq_khz, freq_range->end_freq_khz,
1166 bw, max_antenna_gain,
1167 power_rule->max_eirp);
1168#endif
1169}
1170
1171static uint32_t reg_rule_to_chan_bw_flags(const struct ieee80211_regdomain *regd, 1126static uint32_t reg_rule_to_chan_bw_flags(const struct ieee80211_regdomain *regd,
1172 const struct ieee80211_reg_rule *reg_rule, 1127 const struct ieee80211_reg_rule *reg_rule,
1173 const struct ieee80211_channel *chan) 1128 const struct ieee80211_channel *chan)
@@ -1242,20 +1197,19 @@ static void handle_channel(struct wiphy *wiphy,
1242 if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER && 1197 if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
1243 request_wiphy && request_wiphy == wiphy && 1198 request_wiphy && request_wiphy == wiphy &&
1244 request_wiphy->regulatory_flags & REGULATORY_STRICT_REG) { 1199 request_wiphy->regulatory_flags & REGULATORY_STRICT_REG) {
1245 REG_DBG_PRINT("Disabling freq %d MHz for good\n", 1200 pr_debug("Disabling freq %d MHz for good\n",
1246 chan->center_freq); 1201 chan->center_freq);
1247 chan->orig_flags |= IEEE80211_CHAN_DISABLED; 1202 chan->orig_flags |= IEEE80211_CHAN_DISABLED;
1248 chan->flags = chan->orig_flags; 1203 chan->flags = chan->orig_flags;
1249 } else { 1204 } else {
1250 REG_DBG_PRINT("Disabling freq %d MHz\n", 1205 pr_debug("Disabling freq %d MHz\n",
1251 chan->center_freq); 1206 chan->center_freq);
1252 chan->flags |= IEEE80211_CHAN_DISABLED; 1207 chan->flags |= IEEE80211_CHAN_DISABLED;
1253 } 1208 }
1254 return; 1209 return;
1255 } 1210 }
1256 1211
1257 regd = reg_get_regdomain(wiphy); 1212 regd = reg_get_regdomain(wiphy);
1258 chan_reg_rule_print_dbg(regd, chan, reg_rule);
1259 1213
1260 power_rule = &reg_rule->power_rule; 1214 power_rule = &reg_rule->power_rule;
1261 bw_flags = reg_rule_to_chan_bw_flags(regd, reg_rule, chan); 1215 bw_flags = reg_rule_to_chan_bw_flags(regd, reg_rule, chan);
@@ -1393,18 +1347,15 @@ static bool ignore_reg_update(struct wiphy *wiphy,
1393 return true; 1347 return true;
1394 1348
1395 if (!lr) { 1349 if (!lr) {
1396 REG_DBG_PRINT("Ignoring regulatory request set by %s " 1350 pr_debug("Ignoring regulatory request set by %s since last_request is not set\n",
1397 "since last_request is not set\n", 1351 reg_initiator_name(initiator));
1398 reg_initiator_name(initiator));
1399 return true; 1352 return true;
1400 } 1353 }
1401 1354
1402 if (initiator == NL80211_REGDOM_SET_BY_CORE && 1355 if (initiator == NL80211_REGDOM_SET_BY_CORE &&
1403 wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) { 1356 wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) {
1404 REG_DBG_PRINT("Ignoring regulatory request set by %s " 1357 pr_debug("Ignoring regulatory request set by %s since the driver uses its own custom regulatory domain\n",
1405 "since the driver uses its own custom " 1358 reg_initiator_name(initiator));
1406 "regulatory domain\n",
1407 reg_initiator_name(initiator));
1408 return true; 1359 return true;
1409 } 1360 }
1410 1361
@@ -1415,10 +1366,8 @@ static bool ignore_reg_update(struct wiphy *wiphy,
1415 if (wiphy_strict_alpha2_regd(wiphy) && !wiphy->regd && 1366 if (wiphy_strict_alpha2_regd(wiphy) && !wiphy->regd &&
1416 initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && 1367 initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
1417 !is_world_regdom(lr->alpha2)) { 1368 !is_world_regdom(lr->alpha2)) {
1418 REG_DBG_PRINT("Ignoring regulatory request set by %s " 1369 pr_debug("Ignoring regulatory request set by %s since the driver requires its own regulatory domain to be set first\n",
1419 "since the driver requires its own regulatory " 1370 reg_initiator_name(initiator));
1420 "domain to be set first\n",
1421 reg_initiator_name(initiator));
1422 return true; 1371 return true;
1423 } 1372 }
1424 1373
@@ -1699,7 +1648,7 @@ static void reg_check_chans_work(struct work_struct *work)
1699{ 1648{
1700 struct cfg80211_registered_device *rdev; 1649 struct cfg80211_registered_device *rdev;
1701 1650
1702 REG_DBG_PRINT("Verifying active interfaces after reg change\n"); 1651 pr_debug("Verifying active interfaces after reg change\n");
1703 rtnl_lock(); 1652 rtnl_lock();
1704 1653
1705 list_for_each_entry(rdev, &cfg80211_rdev_list, list) 1654 list_for_each_entry(rdev, &cfg80211_rdev_list, list)
@@ -1781,8 +1730,8 @@ static void handle_channel_custom(struct wiphy *wiphy,
1781 } 1730 }
1782 1731
1783 if (IS_ERR(reg_rule)) { 1732 if (IS_ERR(reg_rule)) {
1784 REG_DBG_PRINT("Disabling freq %d MHz as custom regd has no rule that fits it\n", 1733 pr_debug("Disabling freq %d MHz as custom regd has no rule that fits it\n",
1785 chan->center_freq); 1734 chan->center_freq);
1786 if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) { 1735 if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) {
1787 chan->flags |= IEEE80211_CHAN_DISABLED; 1736 chan->flags |= IEEE80211_CHAN_DISABLED;
1788 } else { 1737 } else {
@@ -1792,8 +1741,6 @@ static void handle_channel_custom(struct wiphy *wiphy,
1792 return; 1741 return;
1793 } 1742 }
1794 1743
1795 chan_reg_rule_print_dbg(regd, chan, reg_rule);
1796
1797 power_rule = &reg_rule->power_rule; 1744 power_rule = &reg_rule->power_rule;
1798 bw_flags = reg_rule_to_chan_bw_flags(regd, reg_rule, chan); 1745 bw_flags = reg_rule_to_chan_bw_flags(regd, reg_rule, chan);
1799 1746
@@ -2524,7 +2471,7 @@ static void restore_alpha2(char *alpha2, bool reset_user)
2524 if (is_user_regdom_saved()) { 2471 if (is_user_regdom_saved()) {
2525 /* Unless we're asked to ignore it and reset it */ 2472 /* Unless we're asked to ignore it and reset it */
2526 if (reset_user) { 2473 if (reset_user) {
2527 REG_DBG_PRINT("Restoring regulatory settings including user preference\n"); 2474 pr_debug("Restoring regulatory settings including user preference\n");
2528 user_alpha2[0] = '9'; 2475 user_alpha2[0] = '9';
2529 user_alpha2[1] = '7'; 2476 user_alpha2[1] = '7';
2530 2477
@@ -2534,24 +2481,24 @@ static void restore_alpha2(char *alpha2, bool reset_user)
2534 * back as they were for a full restore. 2481 * back as they were for a full restore.
2535 */ 2482 */
2536 if (!is_world_regdom(ieee80211_regdom)) { 2483 if (!is_world_regdom(ieee80211_regdom)) {
2537 REG_DBG_PRINT("Keeping preference on module parameter ieee80211_regdom: %c%c\n", 2484 pr_debug("Keeping preference on module parameter ieee80211_regdom: %c%c\n",
2538 ieee80211_regdom[0], ieee80211_regdom[1]); 2485 ieee80211_regdom[0], ieee80211_regdom[1]);
2539 alpha2[0] = ieee80211_regdom[0]; 2486 alpha2[0] = ieee80211_regdom[0];
2540 alpha2[1] = ieee80211_regdom[1]; 2487 alpha2[1] = ieee80211_regdom[1];
2541 } 2488 }
2542 } else { 2489 } else {
2543 REG_DBG_PRINT("Restoring regulatory settings while preserving user preference for: %c%c\n", 2490 pr_debug("Restoring regulatory settings while preserving user preference for: %c%c\n",
2544 user_alpha2[0], user_alpha2[1]); 2491 user_alpha2[0], user_alpha2[1]);
2545 alpha2[0] = user_alpha2[0]; 2492 alpha2[0] = user_alpha2[0];
2546 alpha2[1] = user_alpha2[1]; 2493 alpha2[1] = user_alpha2[1];
2547 } 2494 }
2548 } else if (!is_world_regdom(ieee80211_regdom)) { 2495 } else if (!is_world_regdom(ieee80211_regdom)) {
2549 REG_DBG_PRINT("Keeping preference on module parameter ieee80211_regdom: %c%c\n", 2496 pr_debug("Keeping preference on module parameter ieee80211_regdom: %c%c\n",
2550 ieee80211_regdom[0], ieee80211_regdom[1]); 2497 ieee80211_regdom[0], ieee80211_regdom[1]);
2551 alpha2[0] = ieee80211_regdom[0]; 2498 alpha2[0] = ieee80211_regdom[0];
2552 alpha2[1] = ieee80211_regdom[1]; 2499 alpha2[1] = ieee80211_regdom[1];
2553 } else 2500 } else
2554 REG_DBG_PRINT("Restoring regulatory settings\n"); 2501 pr_debug("Restoring regulatory settings\n");
2555} 2502}
2556 2503
2557static void restore_custom_reg_settings(struct wiphy *wiphy) 2504static void restore_custom_reg_settings(struct wiphy *wiphy)
@@ -2663,14 +2610,14 @@ static void restore_regulatory_settings(bool reset_user)
2663 list_splice_tail_init(&tmp_reg_req_list, &reg_requests_list); 2610 list_splice_tail_init(&tmp_reg_req_list, &reg_requests_list);
2664 spin_unlock(&reg_requests_lock); 2611 spin_unlock(&reg_requests_lock);
2665 2612
2666 REG_DBG_PRINT("Kicking the queue\n"); 2613 pr_debug("Kicking the queue\n");
2667 2614
2668 schedule_work(&reg_work); 2615 schedule_work(&reg_work);
2669} 2616}
2670 2617
2671void regulatory_hint_disconnect(void) 2618void regulatory_hint_disconnect(void)
2672{ 2619{
2673 REG_DBG_PRINT("All devices are disconnected, going to restore regulatory settings\n"); 2620 pr_debug("All devices are disconnected, going to restore regulatory settings\n");
2674 restore_regulatory_settings(false); 2621 restore_regulatory_settings(false);
2675} 2622}
2676 2623
@@ -2718,10 +2665,10 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy,
2718 if (!reg_beacon) 2665 if (!reg_beacon)
2719 return -ENOMEM; 2666 return -ENOMEM;
2720 2667
2721 REG_DBG_PRINT("Found new beacon on frequency: %d MHz (Ch %d) on %s\n", 2668 pr_debug("Found new beacon on frequency: %d MHz (Ch %d) on %s\n",
2722 beacon_chan->center_freq, 2669 beacon_chan->center_freq,
2723 ieee80211_frequency_to_channel(beacon_chan->center_freq), 2670 ieee80211_frequency_to_channel(beacon_chan->center_freq),
2724 wiphy_name(wiphy)); 2671 wiphy_name(wiphy));
2725 2672
2726 memcpy(&reg_beacon->chan, beacon_chan, 2673 memcpy(&reg_beacon->chan, beacon_chan,
2727 sizeof(struct ieee80211_channel)); 2674 sizeof(struct ieee80211_channel));
@@ -2800,8 +2747,7 @@ bool reg_supported_dfs_region(enum nl80211_dfs_regions dfs_region)
2800 case NL80211_DFS_JP: 2747 case NL80211_DFS_JP:
2801 return true; 2748 return true;
2802 default: 2749 default:
2803 REG_DBG_PRINT("Ignoring uknown DFS master region: %d\n", 2750 pr_debug("Ignoring uknown DFS master region: %d\n", dfs_region);
2804 dfs_region);
2805 return false; 2751 return false;
2806 } 2752 }
2807} 2753}
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index d49ed7666d4c..544558171787 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -264,7 +264,7 @@ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev)
264 wdev->conn->params.bssid, 264 wdev->conn->params.bssid,
265 wdev->conn->params.ssid, 265 wdev->conn->params.ssid,
266 wdev->conn->params.ssid_len, 266 wdev->conn->params.ssid_len,
267 IEEE80211_BSS_TYPE_ESS, 267 wdev->conn_bss_type,
268 IEEE80211_PRIVACY(wdev->conn->params.privacy)); 268 IEEE80211_PRIVACY(wdev->conn->params.privacy));
269 if (!bss) 269 if (!bss)
270 return NULL; 270 return NULL;
@@ -687,7 +687,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
687 WARN_ON_ONCE(!wiphy_to_rdev(wdev->wiphy)->ops->connect); 687 WARN_ON_ONCE(!wiphy_to_rdev(wdev->wiphy)->ops->connect);
688 bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, 688 bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid,
689 wdev->ssid, wdev->ssid_len, 689 wdev->ssid, wdev->ssid_len,
690 IEEE80211_BSS_TYPE_ESS, 690 wdev->conn_bss_type,
691 IEEE80211_PRIVACY_ANY); 691 IEEE80211_PRIVACY_ANY);
692 if (bss) 692 if (bss)
693 cfg80211_hold_bss(bss_from_pub(bss)); 693 cfg80211_hold_bss(bss_from_pub(bss));
@@ -846,7 +846,7 @@ void cfg80211_roamed(struct net_device *dev,
846 846
847 bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, wdev->ssid, 847 bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, wdev->ssid,
848 wdev->ssid_len, 848 wdev->ssid_len,
849 IEEE80211_BSS_TYPE_ESS, IEEE80211_PRIVACY_ANY); 849 wdev->conn_bss_type, IEEE80211_PRIVACY_ANY);
850 if (WARN_ON(!bss)) 850 if (WARN_ON(!bss))
851 return; 851 return;
852 852
@@ -1023,6 +1023,9 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
1023 memcpy(wdev->ssid, connect->ssid, connect->ssid_len); 1023 memcpy(wdev->ssid, connect->ssid, connect->ssid_len);
1024 wdev->ssid_len = connect->ssid_len; 1024 wdev->ssid_len = connect->ssid_len;
1025 1025
1026 wdev->conn_bss_type = connect->pbss ? IEEE80211_BSS_TYPE_PBSS :
1027 IEEE80211_BSS_TYPE_ESS;
1028
1026 if (!rdev->ops->connect) 1029 if (!rdev->ops->connect)
1027 err = cfg80211_sme_connect(wdev, connect, prev_bssid); 1030 err = cfg80211_sme_connect(wdev, connect, prev_bssid);
1028 else 1031 else
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 92770427b211..9f440a9de63b 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -393,9 +393,9 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb)
393} 393}
394EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb); 394EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb);
395 395
396unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr) 396static unsigned int __ieee80211_get_mesh_hdrlen(u8 flags)
397{ 397{
398 int ae = meshhdr->flags & MESH_FLAGS_AE; 398 int ae = flags & MESH_FLAGS_AE;
399 /* 802.11-2012, 8.2.4.7.3 */ 399 /* 802.11-2012, 8.2.4.7.3 */
400 switch (ae) { 400 switch (ae) {
401 default: 401 default:
@@ -407,21 +407,31 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
407 return 18; 407 return 18;
408 } 408 }
409} 409}
410
411unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
412{
413 return __ieee80211_get_mesh_hdrlen(meshhdr->flags);
414}
410EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen); 415EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen);
411 416
412int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, 417static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr,
413 enum nl80211_iftype iftype) 418 const u8 *addr, enum nl80211_iftype iftype)
414{ 419{
415 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; 420 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
416 u16 hdrlen, ethertype; 421 struct {
417 u8 *payload; 422 u8 hdr[ETH_ALEN] __aligned(2);
418 u8 dst[ETH_ALEN]; 423 __be16 proto;
419 u8 src[ETH_ALEN] __aligned(2); 424 } payload;
425 struct ethhdr tmp;
426 u16 hdrlen;
427 u8 mesh_flags = 0;
420 428
421 if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) 429 if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
422 return -1; 430 return -1;
423 431
424 hdrlen = ieee80211_hdrlen(hdr->frame_control); 432 hdrlen = ieee80211_hdrlen(hdr->frame_control);
433 if (skb->len < hdrlen + 8)
434 return -1;
425 435
426 /* convert IEEE 802.11 header + possible LLC headers into Ethernet 436 /* convert IEEE 802.11 header + possible LLC headers into Ethernet
427 * header 437 * header
@@ -432,8 +442,11 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
432 * 1 0 BSSID SA DA n/a 442 * 1 0 BSSID SA DA n/a
433 * 1 1 RA TA DA SA 443 * 1 1 RA TA DA SA
434 */ 444 */
435 memcpy(dst, ieee80211_get_DA(hdr), ETH_ALEN); 445 memcpy(tmp.h_dest, ieee80211_get_DA(hdr), ETH_ALEN);
436 memcpy(src, ieee80211_get_SA(hdr), ETH_ALEN); 446 memcpy(tmp.h_source, ieee80211_get_SA(hdr), ETH_ALEN);
447
448 if (iftype == NL80211_IFTYPE_MESH_POINT)
449 skb_copy_bits(skb, hdrlen, &mesh_flags, 1);
437 450
438 switch (hdr->frame_control & 451 switch (hdr->frame_control &
439 cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { 452 cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
@@ -450,44 +463,31 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
450 iftype != NL80211_IFTYPE_STATION)) 463 iftype != NL80211_IFTYPE_STATION))
451 return -1; 464 return -1;
452 if (iftype == NL80211_IFTYPE_MESH_POINT) { 465 if (iftype == NL80211_IFTYPE_MESH_POINT) {
453 struct ieee80211s_hdr *meshdr = 466 if (mesh_flags & MESH_FLAGS_AE_A4)
454 (struct ieee80211s_hdr *) (skb->data + hdrlen);
455 /* make sure meshdr->flags is on the linear part */
456 if (!pskb_may_pull(skb, hdrlen + 1))
457 return -1;
458 if (meshdr->flags & MESH_FLAGS_AE_A4)
459 return -1; 467 return -1;
460 if (meshdr->flags & MESH_FLAGS_AE_A5_A6) { 468 if (mesh_flags & MESH_FLAGS_AE_A5_A6) {
461 skb_copy_bits(skb, hdrlen + 469 skb_copy_bits(skb, hdrlen +
462 offsetof(struct ieee80211s_hdr, eaddr1), 470 offsetof(struct ieee80211s_hdr, eaddr1),
463 dst, ETH_ALEN); 471 tmp.h_dest, 2 * ETH_ALEN);
464 skb_copy_bits(skb, hdrlen +
465 offsetof(struct ieee80211s_hdr, eaddr2),
466 src, ETH_ALEN);
467 } 472 }
468 hdrlen += ieee80211_get_mesh_hdrlen(meshdr); 473 hdrlen += __ieee80211_get_mesh_hdrlen(mesh_flags);
469 } 474 }
470 break; 475 break;
471 case cpu_to_le16(IEEE80211_FCTL_FROMDS): 476 case cpu_to_le16(IEEE80211_FCTL_FROMDS):
472 if ((iftype != NL80211_IFTYPE_STATION && 477 if ((iftype != NL80211_IFTYPE_STATION &&
473 iftype != NL80211_IFTYPE_P2P_CLIENT && 478 iftype != NL80211_IFTYPE_P2P_CLIENT &&
474 iftype != NL80211_IFTYPE_MESH_POINT) || 479 iftype != NL80211_IFTYPE_MESH_POINT) ||
475 (is_multicast_ether_addr(dst) && 480 (is_multicast_ether_addr(tmp.h_dest) &&
476 ether_addr_equal(src, addr))) 481 ether_addr_equal(tmp.h_source, addr)))
477 return -1; 482 return -1;
478 if (iftype == NL80211_IFTYPE_MESH_POINT) { 483 if (iftype == NL80211_IFTYPE_MESH_POINT) {
479 struct ieee80211s_hdr *meshdr = 484 if (mesh_flags & MESH_FLAGS_AE_A5_A6)
480 (struct ieee80211s_hdr *) (skb->data + hdrlen);
481 /* make sure meshdr->flags is on the linear part */
482 if (!pskb_may_pull(skb, hdrlen + 1))
483 return -1;
484 if (meshdr->flags & MESH_FLAGS_AE_A5_A6)
485 return -1; 485 return -1;
486 if (meshdr->flags & MESH_FLAGS_AE_A4) 486 if (mesh_flags & MESH_FLAGS_AE_A4)
487 skb_copy_bits(skb, hdrlen + 487 skb_copy_bits(skb, hdrlen +
488 offsetof(struct ieee80211s_hdr, eaddr1), 488 offsetof(struct ieee80211s_hdr, eaddr1),
489 src, ETH_ALEN); 489 tmp.h_source, ETH_ALEN);
490 hdrlen += ieee80211_get_mesh_hdrlen(meshdr); 490 hdrlen += __ieee80211_get_mesh_hdrlen(mesh_flags);
491 } 491 }
492 break; 492 break;
493 case cpu_to_le16(0): 493 case cpu_to_le16(0):
@@ -498,33 +498,33 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
498 break; 498 break;
499 } 499 }
500 500
501 if (!pskb_may_pull(skb, hdrlen + 8)) 501 skb_copy_bits(skb, hdrlen, &payload, sizeof(payload));
502 return -1; 502 tmp.h_proto = payload.proto;
503
504 payload = skb->data + hdrlen;
505 ethertype = (payload[6] << 8) | payload[7];
506 503
507 if (likely((ether_addr_equal(payload, rfc1042_header) && 504 if (likely((ether_addr_equal(payload.hdr, rfc1042_header) &&
508 ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) || 505 tmp.h_proto != htons(ETH_P_AARP) &&
509 ether_addr_equal(payload, bridge_tunnel_header))) { 506 tmp.h_proto != htons(ETH_P_IPX)) ||
507 ether_addr_equal(payload.hdr, bridge_tunnel_header)))
510 /* remove RFC1042 or Bridge-Tunnel encapsulation and 508 /* remove RFC1042 or Bridge-Tunnel encapsulation and
511 * replace EtherType */ 509 * replace EtherType */
512 skb_pull(skb, hdrlen + 6); 510 hdrlen += ETH_ALEN + 2;
513 memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN); 511 else
514 memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN); 512 tmp.h_proto = htons(skb->len);
515 } else {
516 struct ethhdr *ehdr;
517 __be16 len;
518 513
519 skb_pull(skb, hdrlen); 514 pskb_pull(skb, hdrlen);
520 len = htons(skb->len); 515
516 if (!ehdr)
521 ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr)); 517 ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr));
522 memcpy(ehdr->h_dest, dst, ETH_ALEN); 518 memcpy(ehdr, &tmp, sizeof(tmp));
523 memcpy(ehdr->h_source, src, ETH_ALEN); 519
524 ehdr->h_proto = len;
525 }
526 return 0; 520 return 0;
527} 521}
522
523int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
524 enum nl80211_iftype iftype)
525{
526 return __ieee80211_data_to_8023(skb, NULL, addr, iftype);
527}
528EXPORT_SYMBOL(ieee80211_data_to_8023); 528EXPORT_SYMBOL(ieee80211_data_to_8023);
529 529
530int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, 530int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
@@ -636,7 +636,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
636 /* Update skb pointers to various headers since this modified frame 636 /* Update skb pointers to various headers since this modified frame
637 * is going to go through Linux networking code that may potentially 637 * is going to go through Linux networking code that may potentially
638 * need things like pointer to IP header. */ 638 * need things like pointer to IP header. */
639 skb_set_mac_header(skb, 0); 639 skb_reset_mac_header(skb);
640 skb_set_network_header(skb, nh_pos); 640 skb_set_network_header(skb, nh_pos);
641 skb_set_transport_header(skb, h_pos); 641 skb_set_transport_header(skb, h_pos);
642 642
@@ -644,70 +644,147 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
644} 644}
645EXPORT_SYMBOL(ieee80211_data_from_8023); 645EXPORT_SYMBOL(ieee80211_data_from_8023);
646 646
647static void
648__frame_add_frag(struct sk_buff *skb, struct page *page,
649 void *ptr, int len, int size)
650{
651 struct skb_shared_info *sh = skb_shinfo(skb);
652 int page_offset;
653
654 atomic_inc(&page->_count);
655 page_offset = ptr - page_address(page);
656 skb_add_rx_frag(skb, sh->nr_frags, page, page_offset, len, size);
657}
658
659static void
660__ieee80211_amsdu_copy_frag(struct sk_buff *skb, struct sk_buff *frame,
661 int offset, int len)
662{
663 struct skb_shared_info *sh = skb_shinfo(skb);
664 const skb_frag_t *frag = &sh->frags[-1];
665 struct page *frag_page;
666 void *frag_ptr;
667 int frag_len, frag_size;
668 int head_size = skb->len - skb->data_len;
669 int cur_len;
670
671 frag_page = virt_to_head_page(skb->head);
672 frag_ptr = skb->data;
673 frag_size = head_size;
674
675 while (offset >= frag_size) {
676 offset -= frag_size;
677 frag++;
678 frag_page = skb_frag_page(frag);
679 frag_ptr = skb_frag_address(frag);
680 frag_size = skb_frag_size(frag);
681 }
682
683 frag_ptr += offset;
684 frag_len = frag_size - offset;
685
686 cur_len = min(len, frag_len);
687
688 __frame_add_frag(frame, frag_page, frag_ptr, cur_len, frag_size);
689 len -= cur_len;
690
691 while (len > 0) {
692 frag++;
693 frag_len = skb_frag_size(frag);
694 cur_len = min(len, frag_len);
695 __frame_add_frag(frame, skb_frag_page(frag),
696 skb_frag_address(frag), cur_len, frag_len);
697 len -= cur_len;
698 }
699}
700
701static struct sk_buff *
702__ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen,
703 int offset, int len, bool reuse_frag)
704{
705 struct sk_buff *frame;
706 int cur_len = len;
707
708 if (skb->len - offset < len)
709 return NULL;
710
711 /*
712 * When reusing framents, copy some data to the head to simplify
713 * ethernet header handling and speed up protocol header processing
714 * in the stack later.
715 */
716 if (reuse_frag)
717 cur_len = min_t(int, len, 32);
718
719 /*
720 * Allocate and reserve two bytes more for payload
721 * alignment since sizeof(struct ethhdr) is 14.
722 */
723 frame = dev_alloc_skb(hlen + sizeof(struct ethhdr) + 2 + cur_len);
724
725 skb_reserve(frame, hlen + sizeof(struct ethhdr) + 2);
726 skb_copy_bits(skb, offset, skb_put(frame, cur_len), cur_len);
727
728 len -= cur_len;
729 if (!len)
730 return frame;
731
732 offset += cur_len;
733 __ieee80211_amsdu_copy_frag(skb, frame, offset, len);
734
735 return frame;
736}
647 737
648void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, 738void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
649 const u8 *addr, enum nl80211_iftype iftype, 739 const u8 *addr, enum nl80211_iftype iftype,
650 const unsigned int extra_headroom, 740 const unsigned int extra_headroom,
651 bool has_80211_header) 741 bool has_80211_header)
652{ 742{
743 unsigned int hlen = ALIGN(extra_headroom, 4);
653 struct sk_buff *frame = NULL; 744 struct sk_buff *frame = NULL;
654 u16 ethertype; 745 u16 ethertype;
655 u8 *payload; 746 u8 *payload;
656 const struct ethhdr *eth; 747 int offset = 0, remaining, err;
657 int remaining, err; 748 struct ethhdr eth;
658 u8 dst[ETH_ALEN], src[ETH_ALEN]; 749 bool reuse_frag = skb->head_frag && !skb_has_frag_list(skb);
750 bool reuse_skb = false;
751 bool last = false;
659 752
660 if (has_80211_header) { 753 if (has_80211_header) {
661 err = ieee80211_data_to_8023(skb, addr, iftype); 754 err = __ieee80211_data_to_8023(skb, &eth, addr, iftype);
662 if (err) 755 if (err)
663 goto out; 756 goto out;
664
665 /* skip the wrapping header */
666 eth = (struct ethhdr *) skb_pull(skb, sizeof(struct ethhdr));
667 if (!eth)
668 goto out;
669 } else {
670 eth = (struct ethhdr *) skb->data;
671 } 757 }
672 758
673 while (skb != frame) { 759 while (!last) {
760 unsigned int subframe_len;
761 int len;
674 u8 padding; 762 u8 padding;
675 __be16 len = eth->h_proto;
676 unsigned int subframe_len = sizeof(struct ethhdr) + ntohs(len);
677
678 remaining = skb->len;
679 memcpy(dst, eth->h_dest, ETH_ALEN);
680 memcpy(src, eth->h_source, ETH_ALEN);
681 763
764 skb_copy_bits(skb, offset, &eth, sizeof(eth));
765 len = ntohs(eth.h_proto);
766 subframe_len = sizeof(struct ethhdr) + len;
682 padding = (4 - subframe_len) & 0x3; 767 padding = (4 - subframe_len) & 0x3;
768
683 /* the last MSDU has no padding */ 769 /* the last MSDU has no padding */
770 remaining = skb->len - offset;
684 if (subframe_len > remaining) 771 if (subframe_len > remaining)
685 goto purge; 772 goto purge;
686 773
687 skb_pull(skb, sizeof(struct ethhdr)); 774 offset += sizeof(struct ethhdr);
688 /* reuse skb for the last subframe */ 775 /* reuse skb for the last subframe */
689 if (remaining <= subframe_len + padding) 776 last = remaining <= subframe_len + padding;
777 if (!skb_is_nonlinear(skb) && !reuse_frag && last) {
778 skb_pull(skb, offset);
690 frame = skb; 779 frame = skb;
691 else { 780 reuse_skb = true;
692 unsigned int hlen = ALIGN(extra_headroom, 4); 781 } else {
693 /* 782 frame = __ieee80211_amsdu_copy(skb, hlen, offset, len,
694 * Allocate and reserve two bytes more for payload 783 reuse_frag);
695 * alignment since sizeof(struct ethhdr) is 14.
696 */
697 frame = dev_alloc_skb(hlen + subframe_len + 2);
698 if (!frame) 784 if (!frame)
699 goto purge; 785 goto purge;
700 786
701 skb_reserve(frame, hlen + sizeof(struct ethhdr) + 2); 787 offset += len + padding;
702 memcpy(skb_put(frame, ntohs(len)), skb->data,
703 ntohs(len));
704
705 eth = (struct ethhdr *)skb_pull(skb, ntohs(len) +
706 padding);
707 if (!eth) {
708 dev_kfree_skb(frame);
709 goto purge;
710 }
711 } 788 }
712 789
713 skb_reset_network_header(frame); 790 skb_reset_network_header(frame);
@@ -716,24 +793,20 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
716 793
717 payload = frame->data; 794 payload = frame->data;
718 ethertype = (payload[6] << 8) | payload[7]; 795 ethertype = (payload[6] << 8) | payload[7];
719
720 if (likely((ether_addr_equal(payload, rfc1042_header) && 796 if (likely((ether_addr_equal(payload, rfc1042_header) &&
721 ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) || 797 ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) ||
722 ether_addr_equal(payload, bridge_tunnel_header))) { 798 ether_addr_equal(payload, bridge_tunnel_header))) {
723 /* remove RFC1042 or Bridge-Tunnel 799 eth.h_proto = htons(ethertype);
724 * encapsulation and replace EtherType */ 800 skb_pull(frame, ETH_ALEN + 2);
725 skb_pull(frame, 6);
726 memcpy(skb_push(frame, ETH_ALEN), src, ETH_ALEN);
727 memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN);
728 } else {
729 memcpy(skb_push(frame, sizeof(__be16)), &len,
730 sizeof(__be16));
731 memcpy(skb_push(frame, ETH_ALEN), src, ETH_ALEN);
732 memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN);
733 } 801 }
802
803 memcpy(skb_push(frame, sizeof(eth)), &eth, sizeof(eth));
734 __skb_queue_tail(list, frame); 804 __skb_queue_tail(list, frame);
735 } 805 }
736 806
807 if (!reuse_skb)
808 dev_kfree_skb(skb);
809
737 return; 810 return;
738 811
739 purge: 812 purge:
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 7ecd04c21360..997ff7b2509b 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -277,6 +277,7 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk,
277 277
278 memset(&theirs, 0, sizeof(theirs)); 278 memset(&theirs, 0, sizeof(theirs));
279 memcpy(new, ours, sizeof(*new)); 279 memcpy(new, ours, sizeof(*new));
280 memset(dte, 0, sizeof(*dte));
280 281
281 len = x25_parse_facilities(skb, &theirs, dte, &x25->vc_facil_mask); 282 len = x25_parse_facilities(skb, &theirs, dte, &x25->vc_facil_mask);
282 if (len < 0) 283 if (len < 0)
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index f07224d8b88f..250e567ba3d6 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -9,6 +9,8 @@
9 * any later version. 9 * any later version.
10 */ 10 */
11 11
12#include <crypto/hash.h>
13#include <crypto/skcipher.h>
12#include <linux/module.h> 14#include <linux/module.h>
13#include <linux/kernel.h> 15#include <linux/kernel.h>
14#include <linux/pfkeyv2.h> 16#include <linux/pfkeyv2.h>
@@ -782,14 +784,13 @@ void xfrm_probe_algs(void)
782 BUG_ON(in_softirq()); 784 BUG_ON(in_softirq());
783 785
784 for (i = 0; i < aalg_entries(); i++) { 786 for (i = 0; i < aalg_entries(); i++) {
785 status = crypto_has_hash(aalg_list[i].name, 0, 787 status = crypto_has_ahash(aalg_list[i].name, 0, 0);
786 CRYPTO_ALG_ASYNC);
787 if (aalg_list[i].available != status) 788 if (aalg_list[i].available != status)
788 aalg_list[i].available = status; 789 aalg_list[i].available = status;
789 } 790 }
790 791
791 for (i = 0; i < ealg_entries(); i++) { 792 for (i = 0; i < ealg_entries(); i++) {
792 status = crypto_has_ablkcipher(ealg_list[i].name, 0, 0); 793 status = crypto_has_skcipher(ealg_list[i].name, 0, 0);
793 if (ealg_list[i].available != status) 794 if (ealg_list[i].available != status)
794 ealg_list[i].available = status; 795 ealg_list[i].available = status;
795 } 796 }
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index ad7f5b3f9b61..1c4ad477ce93 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -292,12 +292,15 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
292 XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; 292 XFRM_SKB_CB(skb)->seq.input.hi = seq_hi;
293 293
294 skb_dst_force(skb); 294 skb_dst_force(skb);
295 dev_hold(skb->dev);
295 296
296 nexthdr = x->type->input(x, skb); 297 nexthdr = x->type->input(x, skb);
297 298
298 if (nexthdr == -EINPROGRESS) 299 if (nexthdr == -EINPROGRESS)
299 return 0; 300 return 0;
300resume: 301resume:
302 dev_put(skb->dev);
303
301 spin_lock(&x->lock); 304 spin_lock(&x->lock);
302 if (nexthdr <= 0) { 305 if (nexthdr <= 0) {
303 if (nexthdr == -EBADMSG) { 306 if (nexthdr == -EBADMSG) {
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index ff4a91fcab9f..637387bbaaea 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -99,6 +99,9 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
99 99
100 skb_dst_force(skb); 100 skb_dst_force(skb);
101 101
102 /* Inner headers are invalid now. */
103 skb->encapsulation = 0;
104
102 err = x->type->output(x, skb); 105 err = x->type->output(x, skb);
103 if (err == -EINPROGRESS) 106 if (err == -EINPROGRESS)
104 goto out; 107 goto out;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 805681a7d356..2cc7af858c6f 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2449,7 +2449,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
2449 int type, err; 2449 int type, err;
2450 2450
2451#ifdef CONFIG_COMPAT 2451#ifdef CONFIG_COMPAT
2452 if (is_compat_task()) 2452 if (in_compat_syscall())
2453 return -ENOTSUPP; 2453 return -ENOTSUPP;
2454#endif 2454#endif
2455 2455