aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorTony Lindgren <tony@atomide.com>2017-11-28 10:06:34 -0500
committerTony Lindgren <tony@atomide.com>2017-11-28 10:06:34 -0500
commit2db57789e6612ce0cf2fcbb577a1c8307b708566 (patch)
treedd9f9e3dffabbf5cb932fcf5055ab329ca940fa0 /net
parentf0c96c6d40312b1a76cd36709dc3eb5948c1b97f (diff)
parente9a9bb4e4779ca74cb52a6e2f8acbc0881d3bb18 (diff)
Merge branch 'soc-fixes' into omap-for-v4.15/fixes
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/6lowpan_i.h1
-rw-r--r--net/6lowpan/Makefile1
-rw-r--r--net/6lowpan/nhc.h1
-rw-r--r--net/802/Makefile1
-rw-r--r--net/802/garp.c6
-rw-r--r--net/802/mrp.c13
-rw-r--r--net/8021q/Makefile1
-rw-r--r--net/8021q/vlan.c15
-rw-r--r--net/8021q/vlan.h3
-rw-r--r--net/8021q/vlan_core.c7
-rw-r--r--net/8021q/vlan_netlink.c3
-rw-r--r--net/8021q/vlanproc.h1
-rw-r--r--net/9p/Makefile1
-rw-r--r--net/9p/client.c5
-rw-r--r--net/9p/trans_fd.c6
-rw-r--r--net/9p/trans_virtio.c13
-rw-r--r--net/9p/trans_xen.c4
-rw-r--r--net/Makefile1
-rw-r--r--net/appletalk/aarp.c4
-rw-r--r--net/appletalk/ddp.c7
-rw-r--r--net/appletalk/dev.c1
-rw-r--r--net/appletalk/sysctl_net_atalk.c1
-rw-r--r--net/atm/Makefile1
-rw-r--r--net/atm/addr.c1
-rw-r--r--net/atm/addr.h1
-rw-r--r--net/atm/atm_misc.c1
-rw-r--r--net/atm/atm_sysfs.c1
-rw-r--r--net/atm/clip.c4
-rw-r--r--net/atm/common.h1
-rw-r--r--net/atm/ioctl.c1
-rw-r--r--net/atm/lec.c13
-rw-r--r--net/atm/lec.h1
-rw-r--r--net/atm/lec_arpc.h1
-rw-r--r--net/atm/mpc.c12
-rw-r--r--net/atm/mpc.h1
-rw-r--r--net/atm/mpoa_caches.c1
-rw-r--r--net/atm/mpoa_caches.h1
-rw-r--r--net/atm/mpoa_proc.c1
-rw-r--r--net/atm/proc.c1
-rw-r--r--net/atm/protocols.h1
-rw-r--r--net/atm/pvc.c1
-rw-r--r--net/atm/raw.c1
-rw-r--r--net/atm/resources.c1
-rw-r--r--net/atm/resources.h1
-rw-r--r--net/atm/signaling.c1
-rw-r--r--net/atm/signaling.h1
-rw-r--r--net/atm/svc.c1
-rw-r--r--net/ax25/Makefile1
-rw-r--r--net/ax25/af_ax25.c7
-rw-r--r--net/ax25/ax25_ds_timer.c9
-rw-r--r--net/ax25/ax25_timer.c41
-rw-r--r--net/batman-adv/bat_iv_ogm.c24
-rw-r--r--net/batman-adv/bat_v.c11
-rw-r--r--net/batman-adv/bat_v_elp.c6
-rw-r--r--net/batman-adv/bat_v_ogm.c12
-rw-r--r--net/batman-adv/distributed-arp-table.c4
-rw-r--r--net/batman-adv/gateway_client.c8
-rw-r--r--net/batman-adv/gateway_common.c18
-rw-r--r--net/batman-adv/hard-interface.c14
-rw-r--r--net/batman-adv/icmp_socket.c4
-rw-r--r--net/batman-adv/main.c12
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/multicast.c2
-rw-r--r--net/batman-adv/originator.c26
-rw-r--r--net/batman-adv/routing.c6
-rw-r--r--net/batman-adv/send.c6
-rw-r--r--net/batman-adv/soft-interface.c10
-rw-r--r--net/batman-adv/sysfs.c4
-rw-r--r--net/batman-adv/tp_meter.c16
-rw-r--r--net/bluetooth/Kconfig10
-rw-r--r--net/bluetooth/Makefile1
-rw-r--r--net/bluetooth/a2mp.c2
-rw-r--r--net/bluetooth/amp.c4
-rw-r--r--net/bluetooth/ecdh_helper.c228
-rw-r--r--net/bluetooth/ecdh_helper.h9
-rw-r--r--net/bluetooth/hci_conn.c6
-rw-r--r--net/bluetooth/hci_core.c35
-rw-r--r--net/bluetooth/hci_event.c46
-rw-r--r--net/bluetooth/hci_request.c21
-rw-r--r--net/bluetooth/hci_request.h1
-rw-r--r--net/bluetooth/hci_sock.c23
-rw-r--r--net/bluetooth/hci_sysfs.c3
-rw-r--r--net/bluetooth/hidp/core.c7
-rw-r--r--net/bluetooth/mgmt.c57
-rw-r--r--net/bluetooth/rfcomm/core.c12
-rw-r--r--net/bluetooth/sco.c6
-rw-r--r--net/bluetooth/selftest.c48
-rw-r--r--net/bluetooth/smp.c149
-rw-r--r--net/bpf/test_run.c3
-rw-r--r--net/bridge/Makefile3
-rw-r--r--net/bridge/br.c2
-rw-r--r--net/bridge/br_arp_nd_proxy.c469
-rw-r--r--net/bridge/br_device.c27
-rw-r--r--net/bridge/br_forward.c2
-rw-r--r--net/bridge/br_if.c24
-rw-r--r--net/bridge/br_input.c77
-rw-r--r--net/bridge/br_ioctl.c4
-rw-r--r--net/bridge/br_mdb.c55
-rw-r--r--net/bridge/br_multicast.c141
-rw-r--r--net/bridge/br_netlink.c131
-rw-r--r--net/bridge/br_netlink_tunnel.c14
-rw-r--r--net/bridge/br_private.h40
-rw-r--r--net/bridge/br_private_tunnel.h3
-rw-r--r--net/bridge/br_stp.c6
-rw-r--r--net/bridge/br_stp_if.c4
-rw-r--r--net/bridge/br_stp_timer.c50
-rw-r--r--net/bridge/br_switchdev.c1
-rw-r--r--net/bridge/br_sysfs_if.c22
-rw-r--r--net/bridge/br_vlan.c78
-rw-r--r--net/bridge/netfilter/Makefile1
-rw-r--r--net/bridge/netfilter/ebtable_broute.c4
-rw-r--r--net/bridge/netfilter/ebtable_filter.c4
-rw-r--r--net/bridge/netfilter/ebtable_nat.c4
-rw-r--r--net/bridge/netfilter/ebtables.c20
-rw-r--r--net/caif/Makefile1
-rw-r--r--net/can/Makefile1
-rw-r--r--net/can/af_can.c24
-rw-r--r--net/can/af_can.h2
-rw-r--r--net/can/bcm.c5
-rw-r--r--net/can/proc.c4
-rw-r--r--net/ceph/Makefile1
-rw-r--r--net/ceph/armor.c1
-rw-r--r--net/ceph/auth.c1
-rw-r--r--net/ceph/auth_none.c1
-rw-r--r--net/ceph/auth_none.h1
-rw-r--r--net/ceph/auth_x.c1
-rw-r--r--net/ceph/auth_x.h1
-rw-r--r--net/ceph/auth_x_protocol.h1
-rw-r--r--net/ceph/buffer.c1
-rw-r--r--net/ceph/ceph_fs.c1
-rw-r--r--net/ceph/ceph_hash.c12
-rw-r--r--net/ceph/ceph_strings.c1
-rw-r--r--net/ceph/cls_lock_client.c1
-rw-r--r--net/ceph/crush/crush.c1
-rw-r--r--net/ceph/crush/hash.c1
-rw-r--r--net/ceph/crypto.c5
-rw-r--r--net/ceph/crypto.h1
-rw-r--r--net/ceph/debugfs.c1
-rw-r--r--net/ceph/messenger.c2
-rw-r--r--net/ceph/mon_client.c6
-rw-r--r--net/ceph/msgpool.c1
-rw-r--r--net/ceph/osd_client.c1
-rw-r--r--net/ceph/osdmap.c36
-rw-r--r--net/ceph/pagelist.c1
-rw-r--r--net/ceph/pagevec.c5
-rw-r--r--net/ceph/string_table.c1
-rw-r--r--net/compat.c7
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/datagram.c3
-rw-r--r--net/core/dev.c367
-rw-r--r--net/core/dev_ioctl.c14
-rw-r--r--net/core/drop_monitor.c7
-rw-r--r--net/core/dst.c16
-rw-r--r--net/core/ethtool.c23
-rw-r--r--net/core/fib_notifier.c10
-rw-r--r--net/core/fib_rules.c15
-rw-r--r--net/core/filter.c437
-rw-r--r--net/core/flow_dissector.c130
-rw-r--r--net/core/gen_estimator.c6
-rw-r--r--net/core/gro_cells.c1
-rw-r--r--net/core/lwt_bpf.c2
-rw-r--r--net/core/neighbour.c32
-rw-r--r--net/core/net-procfs.c1
-rw-r--r--net/core/net-sysfs.c28
-rw-r--r--net/core/net-sysfs.h1
-rw-r--r--net/core/net-traces.c4
-rw-r--r--net/core/net_namespace.c1
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/pktgen.c18
-rw-r--r--net/core/rtnetlink.c464
-rw-r--r--net/core/skbuff.c73
-rw-r--r--net/core/sock.c52
-rw-r--r--net/core/sock_reuseport.c13
-rw-r--r--net/core/stream.c1
-rw-r--r--net/core/sysctl_net_core.c1
-rw-r--r--net/core/tso.c1
-rw-r--r--net/dccp/Makefile1
-rw-r--r--net/dccp/ccids/ccid2.c10
-rw-r--r--net/dccp/ccids/ccid2.h1
-rw-r--r--net/dccp/ccids/ccid3.c11
-rw-r--r--net/dccp/ccids/ccid3.h1
-rw-r--r--net/dccp/ccids/lib/packet_history.c4
-rw-r--r--net/dccp/ccids/lib/tfrc.c1
-rw-r--r--net/dccp/input.c1
-rw-r--r--net/dccp/ipv4.c13
-rw-r--r--net/dccp/options.c2
-rw-r--r--net/dccp/timer.c30
-rw-r--r--net/decnet/Makefile1
-rw-r--r--net/decnet/af_decnet.c7
-rw-r--r--net/decnet/dn_dev.c13
-rw-r--r--net/decnet/dn_fib.c1
-rw-r--r--net/decnet/dn_neigh.c1
-rw-r--r--net/decnet/dn_nsp_in.c8
-rw-r--r--net/decnet/dn_nsp_out.c18
-rw-r--r--net/decnet/dn_route.c18
-rw-r--r--net/decnet/dn_rules.c1
-rw-r--r--net/decnet/dn_table.c2
-rw-r--r--net/decnet/dn_timer.c9
-rw-r--r--net/decnet/sysctl_net_decnet.c1
-rw-r--r--net/dns_resolver/dns_key.c2
-rw-r--r--net/dsa/Kconfig4
-rw-r--r--net/dsa/Makefile4
-rw-r--r--net/dsa/dsa.c105
-rw-r--r--net/dsa/dsa2.c906
-rw-r--r--net/dsa/dsa_priv.h71
-rw-r--r--net/dsa/legacy.c85
-rw-r--r--net/dsa/master.c143
-rw-r--r--net/dsa/port.c105
-rw-r--r--net/dsa/slave.c624
-rw-r--r--net/dsa/switch.c6
-rw-r--r--net/dsa/tag_brcm.c90
-rw-r--r--net/dsa/tag_dsa.c28
-rw-r--r--net/dsa/tag_edsa.c28
-rw-r--r--net/dsa/tag_ksz.c13
-rw-r--r--net/dsa/tag_lan9303.c55
-rw-r--r--net/dsa/tag_mtk.c20
-rw-r--r--net/dsa/tag_qca.c22
-rw-r--r--net/dsa/tag_trailer.c13
-rw-r--r--net/hsr/hsr_device.c9
-rw-r--r--net/hsr/hsr_framereg.c6
-rw-r--r--net/hsr/hsr_framereg.h2
-rw-r--r--net/ieee802154/6lowpan/6lowpan_i.h1
-rw-r--r--net/ieee802154/6lowpan/reassembly.c5
-rw-r--r--net/ieee802154/Makefile1
-rw-r--r--net/ieee802154/core.h1
-rw-r--r--net/ieee802154/netlink.c6
-rw-r--r--net/ieee802154/nl802154.h1
-rw-r--r--net/ieee802154/rdev-ops.h1
-rw-r--r--net/ieee802154/sysfs.h1
-rw-r--r--net/ieee802154/trace.h1
-rw-r--r--net/ife/ife.c2
-rw-r--r--net/ipv4/Kconfig8
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c24
-rw-r--r--net/ipv4/ah4.c2
-rw-r--r--net/ipv4/arp.c1
-rw-r--r--net/ipv4/cipso_ipv4.c24
-rw-r--r--net/ipv4/devinet.c43
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/fib_frontend.c34
-rw-r--r--net/ipv4/fib_lookup.h1
-rw-r--r--net/ipv4/fib_notifier.c1
-rw-r--r--net/ipv4/fib_semantics.c35
-rw-r--r--net/ipv4/fib_trie.c42
-rw-r--r--net/ipv4/gre_offload.c2
-rw-r--r--net/ipv4/icmp.c20
-rw-r--r--net/ipv4/igmp.c20
-rw-r--r--net/ipv4/inet_connection_sock.c73
-rw-r--r--net/ipv4/inet_fragment.c6
-rw-r--r--net/ipv4/inet_hashtables.c5
-rw-r--r--net/ipv4/inet_timewait_sock.c10
-rw-r--r--net/ipv4/inetpeer.c15
-rw-r--r--net/ipv4/ip_forward.c1
-rw-r--r--net/ipv4/ip_fragment.c6
-rw-r--r--net/ipv4/ip_gre.c98
-rw-r--r--net/ipv4/ip_input.c25
-rw-r--r--net/ipv4/ip_options.c1
-rw-r--r--net/ipv4/ip_sockglue.c1
-rw-r--r--net/ipv4/ip_tunnel.c12
-rw-r--r--net/ipv4/ip_vti.c19
-rw-r--r--net/ipv4/ipconfig.c1
-rw-r--r--net/ipv4/ipip.c66
-rw-r--r--net/ipv4/ipmr.c271
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/arp_tables.c22
-rw-r--r--net/ipv4/netfilter/ip_tables.c23
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c3
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c28
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c3
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c2
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/route.c69
-rw-r--r--net/ipv4/syncookies.c4
-rw-r--r--net/ipv4/sysctl_net_ipv4.c521
-rw-r--r--net/ipv4/tcp.c167
-rw-r--r--net/ipv4/tcp_cdg.c2
-rw-r--r--net/ipv4/tcp_cong.c76
-rw-r--r--net/ipv4/tcp_fastopen.c161
-rw-r--r--net/ipv4/tcp_input.c614
-rw-r--r--net/ipv4/tcp_ipv4.c100
-rw-r--r--net/ipv4/tcp_metrics.c23
-rw-r--r--net/ipv4/tcp_minisocks.c41
-rw-r--r--net/ipv4/tcp_nv.c47
-rw-r--r--net/ipv4/tcp_offload.c12
-rw-r--r--net/ipv4/tcp_output.c393
-rw-r--r--net/ipv4/tcp_recovery.c103
-rw-r--r--net/ipv4/tcp_timer.c77
-rw-r--r--net/ipv4/tcp_vegas.c2
-rw-r--r--net/ipv4/tcp_vegas.h1
-rw-r--r--net/ipv4/udp.c52
-rw-r--r--net/ipv4/udp_impl.h1
-rw-r--r--net/ipv4/udp_offload.c51
-rw-r--r--net/ipv4/xfrm4_input.c1
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv4/xfrm4_state.c1
-rw-r--r--net/ipv6/Makefile1
-rw-r--r--net/ipv6/addrconf.c425
-rw-r--r--net/ipv6/addrconf_core.c9
-rw-r--r--net/ipv6/addrlabel.c147
-rw-r--r--net/ipv6/af_inet6.c4
-rw-r--r--net/ipv6/ah6.c3
-rw-r--r--net/ipv6/esp6.c10
-rw-r--r--net/ipv6/exthdrs.c73
-rw-r--r--net/ipv6/exthdrs_core.c5
-rw-r--r--net/ipv6/icmp.c50
-rw-r--r--net/ipv6/ila/ila.h12
-rw-r--r--net/ipv6/ila/ila_common.c104
-rw-r--r--net/ipv6/ila/ila_lwt.c112
-rw-r--r--net/ipv6/ila/ila_xlat.c27
-rw-r--r--net/ipv6/ip6_checksum.c1
-rw-r--r--net/ipv6/ip6_fib.c677
-rw-r--r--net/ipv6/ip6_flowlabel.c7
-rw-r--r--net/ipv6/ip6_gre.c62
-rw-r--r--net/ipv6/ip6_icmp.c1
-rw-r--r--net/ipv6/ip6_offload.c2
-rw-r--r--net/ipv6/ip6_output.c4
-rw-r--r--net/ipv6/ip6_tunnel.c105
-rw-r--r--net/ipv6/ip6_vti.c26
-rw-r--r--net/ipv6/ip6mr.c14
-rw-r--r--net/ipv6/ipv6_sockglue.c12
-rw-r--r--net/ipv6/mcast.c33
-rw-r--r--net/ipv6/ndisc.c18
-rw-r--r--net/ipv6/netfilter/Makefile1
-rw-r--r--net/ipv6/netfilter/ip6_tables.c22
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c24
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c5
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c3
-rw-r--r--net/ipv6/output_core.c6
-rw-r--r--net/ipv6/ping.c5
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/ipv6/reassembly.c5
-rw-r--r--net/ipv6/route.c949
-rw-r--r--net/ipv6/sit.c40
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/sysctl_net_ipv6.c33
-rw-r--r--net/ipv6/tcp_ipv6.c17
-rw-r--r--net/ipv6/udp.c5
-rw-r--r--net/ipv6/udp_impl.h1
-rw-r--r--net/ipv6/udp_offload.c85
-rw-r--r--net/ipv6/xfrm6_input.c1
-rw-r--r--net/ipv6/xfrm6_policy.c2
-rw-r--r--net/ipv6/xfrm6_state.c1
-rw-r--r--net/ipv6/xfrm6_tunnel.c8
-rw-r--r--net/ipx/af_ipx.c1
-rw-r--r--net/ipx/ipx_proc.c1
-rw-r--r--net/ipx/ipx_route.c1
-rw-r--r--net/ipx/pe2.c1
-rw-r--r--net/ipx/sysctl_net_ipx.c1
-rw-r--r--net/kcm/kcmproc.c1
-rw-r--r--net/kcm/kcmsock.c2
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/l2tp/Makefile1
-rw-r--r--net/l2tp/l2tp_core.c97
-rw-r--r--net/l2tp/l2tp_core.h43
-rw-r--r--net/l2tp/l2tp_debugfs.c4
-rw-r--r--net/l2tp/l2tp_eth.c153
-rw-r--r--net/l2tp/l2tp_ip.c28
-rw-r--r--net/l2tp/l2tp_ip6.c28
-rw-r--r--net/l2tp/l2tp_netlink.c24
-rw-r--r--net/l2tp/l2tp_ppp.c326
-rw-r--r--net/lapb/lapb_iface.c4
-rw-r--r--net/lapb/lapb_timer.c18
-rw-r--r--net/llc/llc_c_ac.c27
-rw-r--r--net/llc/llc_conn.c12
-rw-r--r--net/llc/llc_input.c4
-rw-r--r--net/llc/sysctl_net_llc.c1
-rw-r--r--net/mac80211/Makefile4
-rw-r--r--net/mac80211/aead_api.c (renamed from net/mac80211/aes_ccm.c)40
-rw-r--r--net/mac80211/aead_api.h27
-rw-r--r--net/mac80211/aes_ccm.h42
-rw-r--r--net/mac80211/aes_gcm.c109
-rw-r--r--net/mac80211/aes_gcm.h38
-rw-r--r--net/mac80211/agg-rx.c45
-rw-r--r--net/mac80211/agg-tx.c49
-rw-r--r--net/mac80211/cfg.c12
-rw-r--r--net/mac80211/debug.h1
-rw-r--r--net/mac80211/debugfs.h1
-rw-r--r--net/mac80211/debugfs_key.h1
-rw-r--r--net/mac80211/debugfs_netdev.h1
-rw-r--r--net/mac80211/debugfs_sta.h1
-rw-r--r--net/mac80211/driver-ops.h1
-rw-r--r--net/mac80211/ht.c12
-rw-r--r--net/mac80211/ibss.c7
-rw-r--r--net/mac80211/ieee80211_i.h5
-rw-r--r--net/mac80211/iface.c29
-rw-r--r--net/mac80211/key.c54
-rw-r--r--net/mac80211/led.c11
-rw-r--r--net/mac80211/main.c3
-rw-r--r--net/mac80211/mesh.c30
-rw-r--r--net/mac80211/mesh.h3
-rw-r--r--net/mac80211/mesh_hwmp.c12
-rw-r--r--net/mac80211/mesh_pathtbl.c3
-rw-r--r--net/mac80211/mesh_plink.c13
-rw-r--r--net/mac80211/mlme.c65
-rw-r--r--net/mac80211/ocb.c10
-rw-r--r--net/mac80211/pm.c1
-rw-r--r--net/mac80211/scan.c37
-rw-r--r--net/mac80211/sta_info.c78
-rw-r--r--net/mac80211/sta_info.h16
-rw-r--r--net/mac80211/trace.c1
-rw-r--r--net/mac80211/trace.h1
-rw-r--r--net/mac80211/trace_msg.h1
-rw-r--r--net/mac80211/tx.c34
-rw-r--r--net/mac80211/util.c25
-rw-r--r--net/mac80211/vht.c10
-rw-r--r--net/mac80211/wpa.c4
-rw-r--r--net/mac802154/cfg.h1
-rw-r--r--net/mac802154/driver-ops.h1
-rw-r--r--net/mac802154/llsec.c14
-rw-r--r--net/mac802154/trace.c1
-rw-r--r--net/mac802154/trace.h1
-rw-r--r--net/mpls/Kconfig1
-rw-r--r--net/mpls/af_mpls.c36
-rw-r--r--net/mpls/internal.h1
-rw-r--r--net/ncsi/internal.h1
-rw-r--r--net/ncsi/ncsi-aen.c17
-rw-r--r--net/ncsi/ncsi-manage.c145
-rw-r--r--net/ncsi/ncsi-rsp.c43
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/ipset/Makefile1
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h10
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c2
-rw-r--r--net/netfilter/ipset/ip_set_core.c29
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h26
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c22
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c6
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c4
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c4
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c17
-rw-r--r--net/netfilter/ipset/pfxlen.c395
-rw-r--r--net/netfilter/ipvs/Makefile1
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c15
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c11
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c11
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c8
-rw-r--r--net/netfilter/nf_conntrack_core.c17
-rw-r--r--net/netfilter/nf_conntrack_expect.c7
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c81
-rw-r--r--net/netfilter/nf_conntrack_netlink.c12
-rw-r--r--net/netfilter/nf_conntrack_proto.c86
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c21
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c62
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c41
-rw-r--r--net/netfilter/nf_internals.h1
-rw-r--r--net/netfilter/nf_nat_core.c21
-rw-r--r--net/netfilter/nf_nat_ftp.c2
-rw-r--r--net/netfilter/nf_nat_irc.c2
-rw-r--r--net/netfilter/nf_sockopt.c1
-rw-r--r--net/netfilter/nf_tables_api.c205
-rw-r--r--net/netfilter/nfnetlink_log.c8
-rw-r--r--net/netfilter/nfnetlink_queue.c4
-rw-r--r--net/netfilter/nft_ct.c39
-rw-r--r--net/netfilter/nft_set_bitmap.c18
-rw-r--r--net/netfilter/nft_set_hash.c42
-rw-r--r--net/netfilter/nft_set_rbtree.c73
-rw-r--r--net/netfilter/x_tables.c25
-rw-r--r--net/netfilter/xt_IDLETIMER.c7
-rw-r--r--net/netfilter/xt_LED.c8
-rw-r--r--net/netfilter/xt_bpf.c22
-rw-r--r--net/netfilter/xt_connlimit.c55
-rw-r--r--net/netfilter/xt_repldata.h1
-rw-r--r--net/netfilter/xt_socket.c4
-rw-r--r--net/netlabel/Makefile1
-rw-r--r--net/netlabel/netlabel_addrlist.h4
-rw-r--r--net/netlabel/netlabel_calipso.c2
-rw-r--r--net/netlink/af_netlink.c64
-rw-r--r--net/netlink/af_netlink.h2
-rw-r--r--net/netlink/genetlink.c1
-rw-r--r--net/netrom/af_netrom.c4
-rw-r--r--net/netrom/nr_in.c2
-rw-r--r--net/netrom/nr_loopback.c6
-rw-r--r--net/netrom/nr_route.c62
-rw-r--r--net/netrom/nr_timer.c48
-rw-r--r--net/nfc/Makefile1
-rw-r--r--net/nfc/core.c11
-rw-r--r--net/nfc/digital_core.c1
-rw-r--r--net/nfc/hci/core.c8
-rw-r--r--net/nfc/hci/llc_shdlc.c26
-rw-r--r--net/nfc/llcp_core.c16
-rw-r--r--net/nfc/nci/Makefile1
-rw-r--r--net/nfc/nci/core.c14
-rw-r--r--net/nfc/netlink.c35
-rw-r--r--net/nsh/nsh.c60
-rw-r--r--net/openvswitch/Kconfig1
-rw-r--r--net/openvswitch/Makefile2
-rw-r--r--net/openvswitch/actions.c126
-rw-r--r--net/openvswitch/conntrack.c12
-rw-r--r--net/openvswitch/conntrack.h7
-rw-r--r--net/openvswitch/datapath.c104
-rw-r--r--net/openvswitch/datapath.h39
-rw-r--r--net/openvswitch/dp_notify.c4
-rw-r--r--net/openvswitch/flow.c57
-rw-r--r--net/openvswitch/flow.h7
-rw-r--r--net/openvswitch/flow_netlink.c405
-rw-r--r--net/openvswitch/flow_netlink.h5
-rw-r--r--net/openvswitch/meter.c597
-rw-r--r--net/openvswitch/meter.h54
-rw-r--r--net/openvswitch/vport-netdev.c3
-rw-r--r--net/packet/af_packet.c77
-rw-r--r--net/packet/internal.h1
-rw-r--r--net/phonet/Makefile1
-rw-r--r--net/phonet/af_phonet.c17
-rw-r--r--net/phonet/datagram.c2
-rw-r--r--net/phonet/pep.c2
-rw-r--r--net/phonet/pn_dev.c3
-rw-r--r--net/psample/psample.c2
-rw-r--r--net/qrtr/qrtr.c379
-rw-r--r--net/qrtr/qrtr.h1
-rw-r--r--net/rds/Makefile1
-rw-r--r--net/rds/ib.c11
-rw-r--r--net/rds/ib.h3
-rw-r--r--net/rds/ib_fmr.c4
-rw-r--r--net/rds/ib_rdma.c4
-rw-r--r--net/rds/ib_recv.c10
-rw-r--r--net/rds/ib_send.c16
-rw-r--r--net/rds/info.h1
-rw-r--r--net/rds/loop.h1
-rw-r--r--net/rds/rdma_transport.h1
-rw-r--r--net/rds/rds.h1
-rw-r--r--net/rds/rds_single_path.h1
-rw-r--r--net/rds/tcp.h1
-rw-r--r--net/rose/af_rose.c17
-rw-r--r--net/rose/rose_in.c1
-rw-r--r--net/rose/rose_link.c16
-rw-r--r--net/rose/rose_loopback.c9
-rw-r--r--net/rose/rose_route.c10
-rw-r--r--net/rose/rose_timer.c39
-rw-r--r--net/rxrpc/Makefile1
-rw-r--r--net/rxrpc/af_rxrpc.c47
-rw-r--r--net/rxrpc/ar-internal.h1
-rw-r--r--net/rxrpc/call_event.c2
-rw-r--r--net/rxrpc/call_object.c8
-rw-r--r--net/rxrpc/input.c3
-rw-r--r--net/rxrpc/output.c19
-rw-r--r--net/rxrpc/peer_object.c13
-rw-r--r--net/rxrpc/recvmsg.c7
-rw-r--r--net/rxrpc/sendmsg.c108
-rw-r--r--net/sched/Kconfig11
-rw-r--r--net/sched/Makefile2
-rw-r--r--net/sched/act_api.c223
-rw-r--r--net/sched/act_bpf.c4
-rw-r--r--net/sched/act_csum.c6
-rw-r--r--net/sched/act_ife.c153
-rw-r--r--net/sched/act_meta_mark.c2
-rw-r--r--net/sched/act_meta_skbprio.c2
-rw-r--r--net/sched/act_meta_skbtcindex.c2
-rw-r--r--net/sched/act_mirred.c13
-rw-r--r--net/sched/act_sample.c3
-rw-r--r--net/sched/act_vlan.c81
-rw-r--r--net/sched/cls_api.c374
-rw-r--r--net/sched/cls_basic.c72
-rw-r--r--net/sched/cls_bpf.c123
-rw-r--r--net/sched/cls_cgroup.c40
-rw-r--r--net/sched/cls_flow.c51
-rw-r--r--net/sched/cls_flower.c120
-rw-r--r--net/sched/cls_fw.c37
-rw-r--r--net/sched/cls_matchall.c89
-rw-r--r--net/sched/cls_route.c32
-rw-r--r--net/sched/cls_rsvp.h30
-rw-r--r--net/sched/cls_tcindex.c70
-rw-r--r--net/sched/cls_u32.c224
-rw-r--r--net/sched/ematch.c2
-rw-r--r--net/sched/sch_api.c11
-rw-r--r--net/sched/sch_atm.c4
-rw-r--r--net/sched/sch_cbq.c3
-rw-r--r--net/sched/sch_cbs.c373
-rw-r--r--net/sched/sch_drr.c3
-rw-r--r--net/sched/sch_dsmark.c2
-rw-r--r--net/sched/sch_fq_codel.c3
-rw-r--r--net/sched/sch_generic.c71
-rw-r--r--net/sched/sch_hfsc.c28
-rw-r--r--net/sched/sch_htb.c10
-rw-r--r--net/sched/sch_ingress.c49
-rw-r--r--net/sched/sch_mq.c10
-rw-r--r--net/sched/sch_mqprio.c273
-rw-r--r--net/sched/sch_multiq.c3
-rw-r--r--net/sched/sch_netem.c167
-rw-r--r--net/sched/sch_pie.c10
-rw-r--r--net/sched/sch_prio.c3
-rw-r--r--net/sched/sch_qfq.c3
-rw-r--r--net/sched/sch_red.c93
-rw-r--r--net/sched/sch_sfb.c3
-rw-r--r--net/sched/sch_sfq.c13
-rw-r--r--net/sctp/Makefile4
-rw-r--r--net/sctp/associola.c3
-rw-r--r--net/sctp/chunk.c6
-rw-r--r--net/sctp/input.c24
-rw-r--r--net/sctp/ipv6.c13
-rw-r--r--net/sctp/outqueue.c63
-rw-r--r--net/sctp/protocol.c7
-rw-r--r--net/sctp/sctp_diag.c4
-rw-r--r--net/sctp/sm_make_chunk.c11
-rw-r--r--net/sctp/sm_sideeffect.c96
-rw-r--r--net/sctp/socket.c282
-rw-r--r--net/sctp/stream.c260
-rw-r--r--net/sctp/stream_sched.c275
-rw-r--r--net/sctp/stream_sched_prio.c347
-rw-r--r--net/sctp/stream_sched_rr.c201
-rw-r--r--net/sctp/transport.c13
-rw-r--r--net/sctp/ulpevent.c2
-rw-r--r--net/smc/af_smc.c58
-rw-r--r--net/smc/smc.h3
-rw-r--r--net/smc/smc_cdc.c8
-rw-r--r--net/smc/smc_cdc.h4
-rw-r--r--net/smc/smc_clc.c11
-rw-r--r--net/smc/smc_clc.h4
-rw-r--r--net/smc/smc_close.c31
-rw-r--r--net/smc/smc_close.h1
-rw-r--r--net/smc/smc_core.c33
-rw-r--r--net/smc/smc_core.h1
-rw-r--r--net/smc/smc_ib.c26
-rw-r--r--net/smc/smc_ib.h1
-rw-r--r--net/smc/smc_llc.c1
-rw-r--r--net/smc/smc_llc.h1
-rw-r--r--net/smc/smc_pnet.c5
-rw-r--r--net/smc/smc_pnet.h1
-rw-r--r--net/smc/smc_rx.c3
-rw-r--r--net/smc/smc_rx.h1
-rw-r--r--net/smc/smc_tx.c19
-rw-r--r--net/smc/smc_tx.h1
-rw-r--r--net/smc/smc_wr.c3
-rw-r--r--net/smc/smc_wr.h1
-rw-r--r--net/socket.c1
-rw-r--r--net/strparser/strparser.c17
-rw-r--r--net/sunrpc/Makefile1
-rw-r--r--net/sunrpc/auth_gss/Makefile1
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c14
-rw-r--r--net/sunrpc/auth_null.c1
-rw-r--r--net/sunrpc/auth_unix.c1
-rw-r--r--net/sunrpc/clnt.c14
-rw-r--r--net/sunrpc/debugfs.c1
-rw-r--r--net/sunrpc/netns.h1
-rw-r--r--net/sunrpc/rpc_pipe.c8
-rw-r--r--net/sunrpc/rpcb_clnt.c6
-rw-r--r--net/sunrpc/sched.c11
-rw-r--r--net/sunrpc/sunrpc_syms.c3
-rw-r--r--net/sunrpc/svc.c6
-rw-r--r--net/sunrpc/svc_xprt.c115
-rw-r--r--net/sunrpc/xprt.c46
-rw-r--r--net/sunrpc/xprtmultipath.c1
-rw-r--r--net/sunrpc/xprtrdma/Makefile1
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c7
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c20
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c30
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c363
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c7
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c1
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c11
-rw-r--r--net/sunrpc/xprtrdma/transport.c19
-rw-r--r--net/sunrpc/xprtrdma/verbs.c236
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h119
-rw-r--r--net/sunrpc/xprtsock.c7
-rw-r--r--net/switchdev/switchdev.c2
-rw-r--r--net/tipc/Makefile3
-rw-r--r--net/tipc/bcast.c22
-rw-r--r--net/tipc/core.h5
-rw-r--r--net/tipc/discover.c6
-rw-r--r--net/tipc/group.c871
-rw-r--r--net/tipc/group.h73
-rw-r--r--net/tipc/link.c34
-rw-r--r--net/tipc/monitor.c17
-rw-r--r--net/tipc/msg.c41
-rw-r--r--net/tipc/msg.h135
-rw-r--r--net/tipc/name_table.c176
-rw-r--r--net/tipc/name_table.h28
-rw-r--r--net/tipc/node.c52
-rw-r--r--net/tipc/node.h5
-rw-r--r--net/tipc/server.c121
-rw-r--r--net/tipc/server.h5
-rw-r--r--net/tipc/socket.c845
-rw-r--r--net/tipc/subscr.c6
-rw-r--r--net/tls/tls_main.c96
-rw-r--r--net/tls/tls_sw.c24
-rw-r--r--net/unix/Makefile1
-rw-r--r--net/unix/af_unix.c1
-rw-r--r--net/unix/diag.c2
-rw-r--r--net/vmw_vsock/Kconfig10
-rw-r--r--net/vmw_vsock/Makefile4
-rw-r--r--net/vmw_vsock/af_vsock.c73
-rw-r--r--net/vmw_vsock/diag.c186
-rw-r--r--net/vmw_vsock/hyperv_transport.c33
-rw-r--r--net/vmw_vsock/virtio_transport.c2
-rw-r--r--net/vmw_vsock/virtio_transport_common.c22
-rw-r--r--net/vmw_vsock/vmci_transport.c35
-rw-r--r--net/vmw_vsock/vmci_transport_notify.c2
-rw-r--r--net/vmw_vsock/vmci_transport_notify_qstate.c2
-rw-r--r--net/wimax/Makefile1
-rw-r--r--net/wireless/.gitignore3
-rw-r--r--net/wireless/Kconfig58
-rw-r--r--net/wireless/Makefile25
-rw-r--r--net/wireless/ap.c1
-rw-r--r--net/wireless/certs/sforshee.x509bin0 -> 680 bytes
-rw-r--r--net/wireless/chan.c5
-rw-r--r--net/wireless/core.c2
-rw-r--r--net/wireless/core.h6
-rw-r--r--net/wireless/db.txt17
-rw-r--r--net/wireless/debugfs.h1
-rw-r--r--net/wireless/ethtool.c1
-rw-r--r--net/wireless/genregdb.awk158
-rw-r--r--net/wireless/ibss.c1
-rw-r--r--net/wireless/lib80211.c11
-rw-r--r--net/wireless/mesh.c1
-rw-r--r--net/wireless/mlme.c1
-rw-r--r--net/wireless/nl80211.c251
-rw-r--r--net/wireless/nl80211.h3
-rw-r--r--net/wireless/rdev-ops.h1
-rw-r--r--net/wireless/reg.c492
-rw-r--r--net/wireless/reg.h14
-rw-r--r--net/wireless/regdb.h23
-rw-r--r--net/wireless/scan.c1
-rw-r--r--net/wireless/sme.c96
-rw-r--r--net/wireless/sysfs.h1
-rw-r--r--net/wireless/trace.h1
-rw-r--r--net/wireless/util.c203
-rw-r--r--net/wireless/wext-compat.c1
-rw-r--r--net/wireless/wext-sme.c1
-rw-r--r--net/x25/Makefile1
-rw-r--r--net/x25/af_x25.c7
-rw-r--r--net/x25/sysctl_net_x25.c1
-rw-r--r--net/x25/x25_facilities.c2
-rw-r--r--net/x25/x25_in.c1
-rw-r--r--net/x25/x25_link.c8
-rw-r--r--net/x25/x25_timer.c18
-rw-r--r--net/xfrm/Makefile1
-rw-r--r--net/xfrm/xfrm_device.c1
-rw-r--r--net/xfrm/xfrm_hash.c1
-rw-r--r--net/xfrm/xfrm_hash.h1
-rw-r--r--net/xfrm/xfrm_input.c11
-rw-r--r--net/xfrm/xfrm_output.c4
-rw-r--r--net/xfrm/xfrm_policy.c77
-rw-r--r--net/xfrm/xfrm_state.c14
-rw-r--r--net/xfrm/xfrm_sysctl.c1
-rw-r--r--net/xfrm/xfrm_user.c131
751 files changed, 20197 insertions, 9444 deletions
diff --git a/net/6lowpan/6lowpan_i.h b/net/6lowpan/6lowpan_i.h
index a67caee11929..53cf446ce2e3 100644
--- a/net/6lowpan/6lowpan_i.h
+++ b/net/6lowpan/6lowpan_i.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __6LOWPAN_I_H 2#ifndef __6LOWPAN_I_H
2#define __6LOWPAN_I_H 3#define __6LOWPAN_I_H
3 4
diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile
index 12d131ab2324..2247b96dbc75 100644
--- a/net/6lowpan/Makefile
+++ b/net/6lowpan/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1obj-$(CONFIG_6LOWPAN) += 6lowpan.o 2obj-$(CONFIG_6LOWPAN) += 6lowpan.o
2 3
36lowpan-y := core.o iphc.o nhc.o ndisc.o 46lowpan-y := core.o iphc.o nhc.o ndisc.o
diff --git a/net/6lowpan/nhc.h b/net/6lowpan/nhc.h
index 803041400136..67951c40734b 100644
--- a/net/6lowpan/nhc.h
+++ b/net/6lowpan/nhc.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __6LOWPAN_NHC_H 2#ifndef __6LOWPAN_NHC_H
2#define __6LOWPAN_NHC_H 3#define __6LOWPAN_NHC_H
3 4
diff --git a/net/802/Makefile b/net/802/Makefile
index 37e654d6615e..19406a87bdaa 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the Linux 802.x protocol layers. 3# Makefile for the Linux 802.x protocol layers.
3# 4#
diff --git a/net/802/garp.c b/net/802/garp.c
index 2dac647ff420..7f50d47470bd 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -401,9 +401,9 @@ static void garp_join_timer_arm(struct garp_applicant *app)
401 mod_timer(&app->join_timer, jiffies + delay); 401 mod_timer(&app->join_timer, jiffies + delay);
402} 402}
403 403
404static void garp_join_timer(unsigned long data) 404static void garp_join_timer(struct timer_list *t)
405{ 405{
406 struct garp_applicant *app = (struct garp_applicant *)data; 406 struct garp_applicant *app = from_timer(app, t, join_timer);
407 407
408 spin_lock(&app->lock); 408 spin_lock(&app->lock);
409 garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU); 409 garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU);
@@ -584,7 +584,7 @@ int garp_init_applicant(struct net_device *dev, struct garp_application *appl)
584 spin_lock_init(&app->lock); 584 spin_lock_init(&app->lock);
585 skb_queue_head_init(&app->queue); 585 skb_queue_head_init(&app->queue);
586 rcu_assign_pointer(dev->garp_port->applicants[appl->type], app); 586 rcu_assign_pointer(dev->garp_port->applicants[appl->type], app);
587 setup_timer(&app->join_timer, garp_join_timer, (unsigned long)app); 587 timer_setup(&app->join_timer, garp_join_timer, 0);
588 garp_join_timer_arm(app); 588 garp_join_timer_arm(app);
589 return 0; 589 return 0;
590 590
diff --git a/net/802/mrp.c b/net/802/mrp.c
index be4dd3165347..a808dd5bbb27 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -586,9 +586,9 @@ static void mrp_join_timer_arm(struct mrp_applicant *app)
586 mod_timer(&app->join_timer, jiffies + delay); 586 mod_timer(&app->join_timer, jiffies + delay);
587} 587}
588 588
589static void mrp_join_timer(unsigned long data) 589static void mrp_join_timer(struct timer_list *t)
590{ 590{
591 struct mrp_applicant *app = (struct mrp_applicant *)data; 591 struct mrp_applicant *app = from_timer(app, t, join_timer);
592 592
593 spin_lock(&app->lock); 593 spin_lock(&app->lock);
594 mrp_mad_event(app, MRP_EVENT_TX); 594 mrp_mad_event(app, MRP_EVENT_TX);
@@ -605,9 +605,9 @@ static void mrp_periodic_timer_arm(struct mrp_applicant *app)
605 jiffies + msecs_to_jiffies(mrp_periodic_time)); 605 jiffies + msecs_to_jiffies(mrp_periodic_time));
606} 606}
607 607
608static void mrp_periodic_timer(unsigned long data) 608static void mrp_periodic_timer(struct timer_list *t)
609{ 609{
610 struct mrp_applicant *app = (struct mrp_applicant *)data; 610 struct mrp_applicant *app = from_timer(app, t, periodic_timer);
611 611
612 spin_lock(&app->lock); 612 spin_lock(&app->lock);
613 mrp_mad_event(app, MRP_EVENT_PERIODIC); 613 mrp_mad_event(app, MRP_EVENT_PERIODIC);
@@ -865,10 +865,9 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl)
865 spin_lock_init(&app->lock); 865 spin_lock_init(&app->lock);
866 skb_queue_head_init(&app->queue); 866 skb_queue_head_init(&app->queue);
867 rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); 867 rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app);
868 setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app); 868 timer_setup(&app->join_timer, mrp_join_timer, 0);
869 mrp_join_timer_arm(app); 869 mrp_join_timer_arm(app);
870 setup_timer(&app->periodic_timer, mrp_periodic_timer, 870 timer_setup(&app->periodic_timer, mrp_periodic_timer, 0);
871 (unsigned long)app);
872 mrp_periodic_timer_arm(app); 871 mrp_periodic_timer_arm(app);
873 return 0; 872 return 0;
874 873
diff --git a/net/8021q/Makefile b/net/8021q/Makefile
index 7bc8db08d7ef..9b703454b93e 100644
--- a/net/8021q/Makefile
+++ b/net/8021q/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the Linux VLAN layer. 3# Makefile for the Linux VLAN layer.
3# 4#
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 9649579b5b9f..8dfdd94e430f 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -138,7 +138,7 @@ int vlan_check_real_dev(struct net_device *real_dev,
138 return 0; 138 return 0;
139} 139}
140 140
141int register_vlan_dev(struct net_device *dev) 141int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack)
142{ 142{
143 struct vlan_dev_priv *vlan = vlan_dev_priv(dev); 143 struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
144 struct net_device *real_dev = vlan->real_dev; 144 struct net_device *real_dev = vlan->real_dev;
@@ -174,7 +174,7 @@ int register_vlan_dev(struct net_device *dev)
174 if (err < 0) 174 if (err < 0)
175 goto out_uninit_mvrp; 175 goto out_uninit_mvrp;
176 176
177 err = netdev_upper_dev_link(real_dev, dev); 177 err = netdev_upper_dev_link(real_dev, dev, extack);
178 if (err) 178 if (err)
179 goto out_unregister_netdev; 179 goto out_unregister_netdev;
180 180
@@ -270,7 +270,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
270 vlan->flags = VLAN_FLAG_REORDER_HDR; 270 vlan->flags = VLAN_FLAG_REORDER_HDR;
271 271
272 new_dev->rtnl_link_ops = &vlan_link_ops; 272 new_dev->rtnl_link_ops = &vlan_link_ops;
273 err = register_vlan_dev(new_dev); 273 err = register_vlan_dev(new_dev, NULL);
274 if (err < 0) 274 if (err < 0)
275 goto out_free_newdev; 275 goto out_free_newdev;
276 276
@@ -328,6 +328,9 @@ static void vlan_transfer_features(struct net_device *dev,
328 vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid; 328 vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid;
329#endif 329#endif
330 330
331 vlandev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
332 vlandev->priv_flags |= (vlan->real_dev->priv_flags & IFF_XMIT_DST_RELEASE);
333
331 netdev_update_features(vlandev); 334 netdev_update_features(vlandev);
332} 335}
333 336
@@ -376,6 +379,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
376 dev->name); 379 dev->name);
377 vlan_vid_add(dev, htons(ETH_P_8021Q), 0); 380 vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
378 } 381 }
382 if (event == NETDEV_DOWN &&
383 (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
384 vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
379 385
380 vlan_info = rtnl_dereference(dev->vlan_info); 386 vlan_info = rtnl_dereference(dev->vlan_info);
381 if (!vlan_info) 387 if (!vlan_info)
@@ -423,9 +429,6 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
423 struct net_device *tmp; 429 struct net_device *tmp;
424 LIST_HEAD(close_list); 430 LIST_HEAD(close_list);
425 431
426 if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
427 vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
428
429 /* Put all VLANs for this dev in the down state too. */ 432 /* Put all VLANs for this dev in the down state too. */
430 vlan_group_for_each_dev(grp, i, vlandev) { 433 vlan_group_for_each_dev(grp, i, vlandev) {
431 flgs = vlandev->flags; 434 flgs = vlandev->flags;
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index df8bd65dd370..a8ba51030b75 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __BEN_VLAN_802_1Q_INC__ 2#ifndef __BEN_VLAN_802_1Q_INC__
2#define __BEN_VLAN_802_1Q_INC__ 3#define __BEN_VLAN_802_1Q_INC__
3 4
@@ -107,7 +108,7 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
107int vlan_check_real_dev(struct net_device *real_dev, 108int vlan_check_real_dev(struct net_device *real_dev,
108 __be16 protocol, u16 vlan_id); 109 __be16 protocol, u16 vlan_id);
109void vlan_setup(struct net_device *dev); 110void vlan_setup(struct net_device *dev);
110int register_vlan_dev(struct net_device *dev); 111int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack);
111void unregister_vlan_dev(struct net_device *dev, struct list_head *head); 112void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
112bool vlan_dev_inherit_address(struct net_device *dev, 113bool vlan_dev_inherit_address(struct net_device *dev,
113 struct net_device *real_dev); 114 struct net_device *real_dev);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index e2ed69850489..64aa9f755e1d 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/skbuff.h> 2#include <linux/skbuff.h>
2#include <linux/netdevice.h> 3#include <linux/netdevice.h>
3#include <linux/if_vlan.h> 4#include <linux/if_vlan.h>
@@ -21,6 +22,12 @@ bool vlan_do_receive(struct sk_buff **skbp)
21 if (unlikely(!skb)) 22 if (unlikely(!skb))
22 return false; 23 return false;
23 24
25 if (unlikely(!(vlan_dev->flags & IFF_UP))) {
26 kfree_skb(skb);
27 *skbp = NULL;
28 return false;
29 }
30
24 skb->dev = vlan_dev; 31 skb->dev = vlan_dev;
25 if (unlikely(skb->pkt_type == PACKET_OTHERHOST)) { 32 if (unlikely(skb->pkt_type == PACKET_OTHERHOST)) {
26 /* Our lower layer thinks this is not local, let's make sure. 33 /* Our lower layer thinks this is not local, let's make sure.
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 5e831de3103e..6689c0b272a7 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -143,6 +143,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
143 vlan->vlan_proto = proto; 143 vlan->vlan_proto = proto;
144 vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]); 144 vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]);
145 vlan->real_dev = real_dev; 145 vlan->real_dev = real_dev;
146 dev->priv_flags |= (real_dev->priv_flags & IFF_XMIT_DST_RELEASE);
146 vlan->flags = VLAN_FLAG_REORDER_HDR; 147 vlan->flags = VLAN_FLAG_REORDER_HDR;
147 148
148 err = vlan_check_real_dev(real_dev, vlan->vlan_proto, vlan->vlan_id); 149 err = vlan_check_real_dev(real_dev, vlan->vlan_proto, vlan->vlan_id);
@@ -160,7 +161,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
160 if (err < 0) 161 if (err < 0)
161 return err; 162 return err;
162 163
163 return register_vlan_dev(dev); 164 return register_vlan_dev(dev, extack);
164} 165}
165 166
166static inline size_t vlan_qos_map_size(unsigned int n) 167static inline size_t vlan_qos_map_size(unsigned int n)
diff --git a/net/8021q/vlanproc.h b/net/8021q/vlanproc.h
index 8838a2e92eb6..48cd4b4784e8 100644
--- a/net/8021q/vlanproc.h
+++ b/net/8021q/vlanproc.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __BEN_VLAN_PROC_INC__ 2#ifndef __BEN_VLAN_PROC_INC__
2#define __BEN_VLAN_PROC_INC__ 3#define __BEN_VLAN_PROC_INC__
3 4
diff --git a/net/9p/Makefile b/net/9p/Makefile
index 697ea7caf466..c0486cfc85d9 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1obj-$(CONFIG_NET_9P) := 9pnet.o 2obj-$(CONFIG_NET_9P) := 9pnet.o
2obj-$(CONFIG_NET_9P_XEN) += 9pnet_xen.o 3obj-$(CONFIG_NET_9P_XEN) += 9pnet_xen.o
3obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o 4obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
diff --git a/net/9p/client.c b/net/9p/client.c
index 4674235b0d9b..b433aff5ff13 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -82,7 +82,7 @@ int p9_show_client_options(struct seq_file *m, struct p9_client *clnt)
82{ 82{
83 if (clnt->msize != 8192) 83 if (clnt->msize != 8192)
84 seq_printf(m, ",msize=%u", clnt->msize); 84 seq_printf(m, ",msize=%u", clnt->msize);
85 seq_printf(m, "trans=%s", clnt->trans_mod->name); 85 seq_printf(m, ",trans=%s", clnt->trans_mod->name);
86 86
87 switch (clnt->proto_version) { 87 switch (clnt->proto_version) {
88 case p9_proto_legacy: 88 case p9_proto_legacy:
@@ -773,8 +773,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
773 } 773 }
774again: 774again:
775 /* Wait for the response */ 775 /* Wait for the response */
776 err = wait_event_interruptible(*req->wq, 776 err = wait_event_killable(*req->wq, req->status >= REQ_STATUS_RCVD);
777 req->status >= REQ_STATUS_RCVD);
778 777
779 /* 778 /*
780 * Make sure our req is coherent with regard to updates in other 779 * Make sure our req is coherent with regard to updates in other
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 903a190319b9..985046ae4231 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -724,12 +724,12 @@ static int p9_fd_show_options(struct seq_file *m, struct p9_client *clnt)
724{ 724{
725 if (clnt->trans_mod == &p9_tcp_trans) { 725 if (clnt->trans_mod == &p9_tcp_trans) {
726 if (clnt->trans_opts.tcp.port != P9_PORT) 726 if (clnt->trans_opts.tcp.port != P9_PORT)
727 seq_printf(m, "port=%u", clnt->trans_opts.tcp.port); 727 seq_printf(m, ",port=%u", clnt->trans_opts.tcp.port);
728 } else if (clnt->trans_mod == &p9_fd_trans) { 728 } else if (clnt->trans_mod == &p9_fd_trans) {
729 if (clnt->trans_opts.fd.rfd != ~0) 729 if (clnt->trans_opts.fd.rfd != ~0)
730 seq_printf(m, "rfd=%u", clnt->trans_opts.fd.rfd); 730 seq_printf(m, ",rfd=%u", clnt->trans_opts.fd.rfd);
731 if (clnt->trans_opts.fd.wfd != ~0) 731 if (clnt->trans_opts.fd.wfd != ~0)
732 seq_printf(m, "wfd=%u", clnt->trans_opts.fd.wfd); 732 seq_printf(m, ",wfd=%u", clnt->trans_opts.fd.wfd);
733 } 733 }
734 return 0; 734 return 0;
735} 735}
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index f24b25c25106..f3a4efcf1456 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -286,8 +286,8 @@ req_retry:
286 if (err == -ENOSPC) { 286 if (err == -ENOSPC) {
287 chan->ring_bufs_avail = 0; 287 chan->ring_bufs_avail = 0;
288 spin_unlock_irqrestore(&chan->lock, flags); 288 spin_unlock_irqrestore(&chan->lock, flags);
289 err = wait_event_interruptible(*chan->vc_wq, 289 err = wait_event_killable(*chan->vc_wq,
290 chan->ring_bufs_avail); 290 chan->ring_bufs_avail);
291 if (err == -ERESTARTSYS) 291 if (err == -ERESTARTSYS)
292 return err; 292 return err;
293 293
@@ -327,7 +327,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
327 * Other zc request to finish here 327 * Other zc request to finish here
328 */ 328 */
329 if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { 329 if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
330 err = wait_event_interruptible(vp_wq, 330 err = wait_event_killable(vp_wq,
331 (atomic_read(&vp_pinned) < chan->p9_max_pages)); 331 (atomic_read(&vp_pinned) < chan->p9_max_pages));
332 if (err == -ERESTARTSYS) 332 if (err == -ERESTARTSYS)
333 return err; 333 return err;
@@ -471,8 +471,8 @@ req_retry_pinned:
471 if (err == -ENOSPC) { 471 if (err == -ENOSPC) {
472 chan->ring_bufs_avail = 0; 472 chan->ring_bufs_avail = 0;
473 spin_unlock_irqrestore(&chan->lock, flags); 473 spin_unlock_irqrestore(&chan->lock, flags);
474 err = wait_event_interruptible(*chan->vc_wq, 474 err = wait_event_killable(*chan->vc_wq,
475 chan->ring_bufs_avail); 475 chan->ring_bufs_avail);
476 if (err == -ERESTARTSYS) 476 if (err == -ERESTARTSYS)
477 goto err_out; 477 goto err_out;
478 478
@@ -489,8 +489,7 @@ req_retry_pinned:
489 virtqueue_kick(chan->vq); 489 virtqueue_kick(chan->vq);
490 spin_unlock_irqrestore(&chan->lock, flags); 490 spin_unlock_irqrestore(&chan->lock, flags);
491 p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); 491 p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n");
492 err = wait_event_interruptible(*req->wq, 492 err = wait_event_killable(*req->wq, req->status >= REQ_STATUS_RCVD);
493 req->status >= REQ_STATUS_RCVD);
494 /* 493 /*
495 * Non kernel buffers are pinned, unpin them 494 * Non kernel buffers are pinned, unpin them
496 */ 495 */
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 6ad3e043c617..325c56043007 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -156,8 +156,8 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
156 ring = &priv->rings[num]; 156 ring = &priv->rings[num];
157 157
158again: 158again:
159 while (wait_event_interruptible(ring->wq, 159 while (wait_event_killable(ring->wq,
160 p9_xen_write_todo(ring, size)) != 0) 160 p9_xen_write_todo(ring, size)) != 0)
161 ; 161 ;
162 162
163 spin_lock_irqsave(&ring->lock, flags); 163 spin_lock_irqsave(&ring->lock, flags);
diff --git a/net/Makefile b/net/Makefile
index ae2fe2283d2f..14fede520840 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the linux networking. 3# Makefile for the linux networking.
3# 4#
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 8ad3ec2610b6..309d7dbb36e8 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -310,7 +310,7 @@ static void __aarp_expire_device(struct aarp_entry **n, struct net_device *dev)
310} 310}
311 311
312/* Handle the timer event */ 312/* Handle the timer event */
313static void aarp_expire_timeout(unsigned long unused) 313static void aarp_expire_timeout(struct timer_list *unused)
314{ 314{
315 int ct; 315 int ct;
316 316
@@ -884,7 +884,7 @@ void __init aarp_proto_init(void)
884 aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv); 884 aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv);
885 if (!aarp_dl) 885 if (!aarp_dl)
886 printk(KERN_CRIT "Unable to register AARP with SNAP.\n"); 886 printk(KERN_CRIT "Unable to register AARP with SNAP.\n");
887 setup_timer(&aarp_timer, aarp_expire_timeout, 0); 887 timer_setup(&aarp_timer, aarp_expire_timeout, 0);
888 aarp_timer.expires = jiffies + sysctl_aarp_expiry_time; 888 aarp_timer.expires = jiffies + sysctl_aarp_expiry_time;
889 add_timer(&aarp_timer); 889 add_timer(&aarp_timer);
890 register_netdevice_notifier(&aarp_notifier); 890 register_netdevice_notifier(&aarp_notifier);
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 5d035c1f1156..03a9fc0771c0 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -158,9 +158,9 @@ found:
158 return s; 158 return s;
159} 159}
160 160
161static void atalk_destroy_timer(unsigned long data) 161static void atalk_destroy_timer(struct timer_list *t)
162{ 162{
163 struct sock *sk = (struct sock *)data; 163 struct sock *sk = from_timer(sk, t, sk_timer);
164 164
165 if (sk_has_allocations(sk)) { 165 if (sk_has_allocations(sk)) {
166 sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME; 166 sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME;
@@ -175,8 +175,7 @@ static inline void atalk_destroy_socket(struct sock *sk)
175 skb_queue_purge(&sk->sk_receive_queue); 175 skb_queue_purge(&sk->sk_receive_queue);
176 176
177 if (sk_has_allocations(sk)) { 177 if (sk_has_allocations(sk)) {
178 setup_timer(&sk->sk_timer, atalk_destroy_timer, 178 timer_setup(&sk->sk_timer, atalk_destroy_timer, 0);
179 (unsigned long)sk);
180 sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME; 179 sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME;
181 add_timer(&sk->sk_timer); 180 add_timer(&sk->sk_timer);
182 } else 181 } else
diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c
index e4158b8b926d..284c8e585533 100644
--- a/net/appletalk/dev.c
+++ b/net/appletalk/dev.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Moved here from drivers/net/net_init.c, which is: 3 * Moved here from drivers/net/net_init.c, which is:
3 * Written 1993,1994,1995 by Donald Becker. 4 * Written 1993,1994,1995 by Donald Becker.
diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c
index ebb864361f7a..c744a853fa5f 100644
--- a/net/appletalk/sysctl_net_atalk.c
+++ b/net/appletalk/sysctl_net_atalk.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * sysctl_net_atalk.c: sysctl interface to net AppleTalk subsystem. 3 * sysctl_net_atalk.c: sysctl interface to net AppleTalk subsystem.
3 * 4 *
diff --git a/net/atm/Makefile b/net/atm/Makefile
index cc50bd1ff1de..bfec0f2d83b5 100644
--- a/net/atm/Makefile
+++ b/net/atm/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the ATM Protocol Families. 3# Makefile for the ATM Protocol Families.
3# 4#
diff --git a/net/atm/addr.c b/net/atm/addr.c
index dcda35c66f15..0530b63f509a 100644
--- a/net/atm/addr.c
+++ b/net/atm/addr.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* net/atm/addr.c - Local ATM address registry */ 2/* net/atm/addr.c - Local ATM address registry */
2 3
3/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ 4/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/addr.h b/net/atm/addr.h
index 6837e9e7eb13..da3f848411a0 100644
--- a/net/atm/addr.h
+++ b/net/atm/addr.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* net/atm/addr.h - Local ATM address registry */ 2/* net/atm/addr.h - Local ATM address registry */
2 3
3/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ 4/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c
index 876fbe83e2e4..a30b83c1cb3f 100644
--- a/net/atm/atm_misc.c
+++ b/net/atm/atm_misc.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* net/atm/atm_misc.c - Various functions for use by ATM drivers */ 2/* net/atm/atm_misc.c - Various functions for use by ATM drivers */
2 3
3/* Written 1995-2000 by Werner Almesberger, EPFL ICA */ 4/* Written 1995-2000 by Werner Almesberger, EPFL ICA */
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 350bf62b2ae3..5d2fed9f5710 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* ATM driver model support. */ 2/* ATM driver model support. */
2 3
3#include <linux/kernel.h> 4#include <linux/kernel.h>
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 65f706e4344c..d4f6029d5109 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -153,7 +153,7 @@ static int neigh_check_cb(struct neighbour *n)
153 return 1; 153 return 1;
154} 154}
155 155
156static void idle_timer_check(unsigned long dummy) 156static void idle_timer_check(struct timer_list *unused)
157{ 157{
158 write_lock(&arp_tbl.lock); 158 write_lock(&arp_tbl.lock);
159 __neigh_for_each_release(&arp_tbl, neigh_check_cb); 159 __neigh_for_each_release(&arp_tbl, neigh_check_cb);
@@ -887,7 +887,7 @@ static int __init atm_clip_init(void)
887 register_netdevice_notifier(&clip_dev_notifier); 887 register_netdevice_notifier(&clip_dev_notifier);
888 register_inetaddr_notifier(&clip_inet_notifier); 888 register_inetaddr_notifier(&clip_inet_notifier);
889 889
890 setup_timer(&idle_timer, idle_timer_check, 0); 890 timer_setup(&idle_timer, idle_timer_check, 0);
891 891
892#ifdef CONFIG_PROC_FS 892#ifdef CONFIG_PROC_FS
893 { 893 {
diff --git a/net/atm/common.h b/net/atm/common.h
index 959436b87182..d9d583712a91 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* net/atm/common.h - ATM sockets (common part for PVC and SVC) */ 2/* net/atm/common.h - ATM sockets (common part for PVC and SVC) */
2 3
3/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ 4/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index bbd3b639992e..2ff0e5e470e3 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* ATM ioctl handling */ 2/* ATM ioctl handling */
2 3
3/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ 4/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/lec.c b/net/atm/lec.c
index a3d93a1bb133..6676e3433261 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1232,7 +1232,7 @@ static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr,
1232#define LEC_ARP_REFRESH_INTERVAL (3*HZ) 1232#define LEC_ARP_REFRESH_INTERVAL (3*HZ)
1233 1233
1234static void lec_arp_check_expire(struct work_struct *work); 1234static void lec_arp_check_expire(struct work_struct *work);
1235static void lec_arp_expire_arp(unsigned long data); 1235static void lec_arp_expire_arp(struct timer_list *t);
1236 1236
1237/* 1237/*
1238 * Arp table funcs 1238 * Arp table funcs
@@ -1559,8 +1559,7 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv,
1559 } 1559 }
1560 ether_addr_copy(to_return->mac_addr, mac_addr); 1560 ether_addr_copy(to_return->mac_addr, mac_addr);
1561 INIT_HLIST_NODE(&to_return->next); 1561 INIT_HLIST_NODE(&to_return->next);
1562 setup_timer(&to_return->timer, lec_arp_expire_arp, 1562 timer_setup(&to_return->timer, lec_arp_expire_arp, 0);
1563 (unsigned long)to_return);
1564 to_return->last_used = jiffies; 1563 to_return->last_used = jiffies;
1565 to_return->priv = priv; 1564 to_return->priv = priv;
1566 skb_queue_head_init(&to_return->tx_wait); 1565 skb_queue_head_init(&to_return->tx_wait);
@@ -1569,11 +1568,11 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv,
1569} 1568}
1570 1569
1571/* Arp sent timer expired */ 1570/* Arp sent timer expired */
1572static void lec_arp_expire_arp(unsigned long data) 1571static void lec_arp_expire_arp(struct timer_list *t)
1573{ 1572{
1574 struct lec_arp_table *entry; 1573 struct lec_arp_table *entry;
1575 1574
1576 entry = (struct lec_arp_table *)data; 1575 entry = from_timer(entry, t, timer);
1577 1576
1578 pr_debug("\n"); 1577 pr_debug("\n");
1579 if (entry->status == ESI_ARP_PENDING) { 1578 if (entry->status == ESI_ARP_PENDING) {
@@ -1591,10 +1590,10 @@ static void lec_arp_expire_arp(unsigned long data)
1591} 1590}
1592 1591
1593/* Unknown/unused vcc expire, remove associated entry */ 1592/* Unknown/unused vcc expire, remove associated entry */
1594static void lec_arp_expire_vcc(unsigned long data) 1593static void lec_arp_expire_vcc(struct timer_list *t)
1595{ 1594{
1596 unsigned long flags; 1595 unsigned long flags;
1597 struct lec_arp_table *to_remove = (struct lec_arp_table *)data; 1596 struct lec_arp_table *to_remove = from_timer(to_remove, t, timer);
1598 struct lec_priv *priv = to_remove->priv; 1597 struct lec_priv *priv = to_remove->priv;
1599 1598
1600 del_timer(&to_remove->timer); 1599 del_timer(&to_remove->timer);
diff --git a/net/atm/lec.h b/net/atm/lec.h
index 4149db1b7885..be0e2667bd8c 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Lan Emulation client header file 3 * Lan Emulation client header file
3 * 4 *
diff --git a/net/atm/lec_arpc.h b/net/atm/lec_arpc.h
index d923f53812a3..1205d8792d28 100644
--- a/net/atm/lec_arpc.h
+++ b/net/atm/lec_arpc.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Lec arp cache 3 * Lec arp cache
3 * 4 *
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 5677147209e8..7c6a1cc760a2 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -95,7 +95,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb,
95static int mpoa_event_listener(struct notifier_block *mpoa_notifier, 95static int mpoa_event_listener(struct notifier_block *mpoa_notifier,
96 unsigned long event, void *dev); 96 unsigned long event, void *dev);
97static void mpc_timer_refresh(void); 97static void mpc_timer_refresh(void);
98static void mpc_cache_check(unsigned long checking_time); 98static void mpc_cache_check(struct timer_list *unused);
99 99
100static struct llc_snap_hdr llc_snap_mpoa_ctrl = { 100static struct llc_snap_hdr llc_snap_mpoa_ctrl = {
101 0xaa, 0xaa, 0x03, 101 0xaa, 0xaa, 0x03,
@@ -121,7 +121,7 @@ static struct notifier_block mpoa_notifier = {
121 121
122struct mpoa_client *mpcs = NULL; /* FIXME */ 122struct mpoa_client *mpcs = NULL; /* FIXME */
123static struct atm_mpoa_qos *qos_head = NULL; 123static struct atm_mpoa_qos *qos_head = NULL;
124static DEFINE_TIMER(mpc_timer, NULL, 0, 0); 124static DEFINE_TIMER(mpc_timer, mpc_cache_check);
125 125
126 126
127static struct mpoa_client *find_mpc_by_itfnum(int itf) 127static struct mpoa_client *find_mpc_by_itfnum(int itf)
@@ -799,7 +799,6 @@ static int atm_mpoa_mpoad_attach(struct atm_vcc *vcc, int arg)
799 int err; 799 int err;
800 800
801 if (mpcs == NULL) { 801 if (mpcs == NULL) {
802 init_timer(&mpc_timer);
803 mpc_timer_refresh(); 802 mpc_timer_refresh();
804 803
805 /* This lets us now how our LECs are doing */ 804 /* This lets us now how our LECs are doing */
@@ -1408,15 +1407,16 @@ static void clean_up(struct k_message *msg, struct mpoa_client *mpc, int action)
1408 msg_to_mpoad(msg, mpc); 1407 msg_to_mpoad(msg, mpc);
1409} 1408}
1410 1409
1410static unsigned long checking_time;
1411
1411static void mpc_timer_refresh(void) 1412static void mpc_timer_refresh(void)
1412{ 1413{
1413 mpc_timer.expires = jiffies + (MPC_P2 * HZ); 1414 mpc_timer.expires = jiffies + (MPC_P2 * HZ);
1414 mpc_timer.data = mpc_timer.expires; 1415 checking_time = mpc_timer.expires;
1415 mpc_timer.function = mpc_cache_check;
1416 add_timer(&mpc_timer); 1416 add_timer(&mpc_timer);
1417} 1417}
1418 1418
1419static void mpc_cache_check(unsigned long checking_time) 1419static void mpc_cache_check(struct timer_list *unused)
1420{ 1420{
1421 struct mpoa_client *mpc = mpcs; 1421 struct mpoa_client *mpc = mpcs;
1422 static unsigned long previous_resolving_check_time; 1422 static unsigned long previous_resolving_check_time;
diff --git a/net/atm/mpc.h b/net/atm/mpc.h
index cfc7b745aa91..454abd07651a 100644
--- a/net/atm/mpc.h
+++ b/net/atm/mpc.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef _MPC_H_ 2#ifndef _MPC_H_
2#define _MPC_H_ 3#define _MPC_H_
3 4
diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index 4ccaa16b1eb1..e01450bb32d6 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/types.h> 2#include <linux/types.h>
2#include <linux/atmmpc.h> 3#include <linux/atmmpc.h>
3#include <linux/slab.h> 4#include <linux/slab.h>
diff --git a/net/atm/mpoa_caches.h b/net/atm/mpoa_caches.h
index 30fe34841ced..6a266669ebf4 100644
--- a/net/atm/mpoa_caches.h
+++ b/net/atm/mpoa_caches.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef MPOA_CACHES_H 2#ifndef MPOA_CACHES_H
2#define MPOA_CACHES_H 3#define MPOA_CACHES_H
3 4
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index 2df34eb5d65f..8a0c17e1c203 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ 2#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
2 3
3#ifdef CONFIG_PROC_FS 4#ifdef CONFIG_PROC_FS
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 4caca2a90ec4..642f9272ab95 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* net/atm/proc.c - ATM /proc interface 2/* net/atm/proc.c - ATM /proc interface
2 * 3 *
3 * Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA 4 * Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA
diff --git a/net/atm/protocols.h b/net/atm/protocols.h
index acdfc856222d..18d4d008bac3 100644
--- a/net/atm/protocols.h
+++ b/net/atm/protocols.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* net/atm/protocols.h - ATM protocol handler entry points */ 2/* net/atm/protocols.h - ATM protocol handler entry points */
2 3
3/* Written 1995-1997 by Werner Almesberger, EPFL LRC */ 4/* Written 1995-1997 by Werner Almesberger, EPFL LRC */
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 040207ec399f..e1140b3bdcaa 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* net/atm/pvc.c - ATM PVC sockets */ 2/* net/atm/pvc.c - ATM PVC sockets */
2 3
3/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ 4/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/raw.c b/net/atm/raw.c
index 821c0797553d..ee10e8d46185 100644
--- a/net/atm/raw.c
+++ b/net/atm/raw.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* net/atm/raw.c - Raw AAL0 and AAL5 transports */ 2/* net/atm/raw.c - Raw AAL0 and AAL5 transports */
2 3
3/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ 4/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 918244757b7d..bada395ecdb1 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* net/atm/resources.c - Statically allocated resources */ 2/* net/atm/resources.c - Statically allocated resources */
2 3
3/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ 4/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/resources.h b/net/atm/resources.h
index 521431e30507..048232e4d4c6 100644
--- a/net/atm/resources.h
+++ b/net/atm/resources.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* net/atm/resources.h - ATM-related resources */ 2/* net/atm/resources.h - ATM-related resources */
2 3
3/* Written 1995-1998 by Werner Almesberger, EPFL LRC/ICA */ 4/* Written 1995-1998 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 0a20f6e953ac..6c11cdf4dd4c 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* net/atm/signaling.c - ATM signaling */ 2/* net/atm/signaling.c - ATM signaling */
2 3
3/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ 4/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/signaling.h b/net/atm/signaling.h
index 08b2a69cc572..2df8220f7ab5 100644
--- a/net/atm/signaling.h
+++ b/net/atm/signaling.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* net/atm/signaling.h - ATM signaling */ 2/* net/atm/signaling.h - ATM signaling */
2 3
3/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ 4/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 5589de7086af..c458adcbc177 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* net/atm/svc.c - ATM SVC sockets */ 2/* net/atm/svc.c - ATM SVC sockets */
2 3
3/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ 4/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/ax25/Makefile b/net/ax25/Makefile
index 43c46d2cafb6..2e53affc8568 100644
--- a/net/ax25/Makefile
+++ b/net/ax25/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the Linux AX.25 layer. 3# Makefile for the Linux AX.25 layer.
3# 4#
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index f3f9d18891de..06eac1f50c5e 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -268,9 +268,9 @@ void ax25_destroy_socket(ax25_cb *);
268/* 268/*
269 * Handler for deferred kills. 269 * Handler for deferred kills.
270 */ 270 */
271static void ax25_destroy_timer(unsigned long data) 271static void ax25_destroy_timer(struct timer_list *t)
272{ 272{
273 ax25_cb *ax25=(ax25_cb *)data; 273 ax25_cb *ax25 = from_timer(ax25, t, dtimer);
274 struct sock *sk; 274 struct sock *sk;
275 275
276 sk=ax25->sk; 276 sk=ax25->sk;
@@ -326,8 +326,7 @@ void ax25_destroy_socket(ax25_cb *ax25)
326 if (ax25->sk != NULL) { 326 if (ax25->sk != NULL) {
327 if (sk_has_allocations(ax25->sk)) { 327 if (sk_has_allocations(ax25->sk)) {
328 /* Defer: outstanding buffers */ 328 /* Defer: outstanding buffers */
329 setup_timer(&ax25->dtimer, ax25_destroy_timer, 329 timer_setup(&ax25->dtimer, ax25_destroy_timer, 0);
330 (unsigned long)ax25);
331 ax25->dtimer.expires = jiffies + 2 * HZ; 330 ax25->dtimer.expires = jiffies + 2 * HZ;
332 add_timer(&ax25->dtimer); 331 add_timer(&ax25->dtimer);
333 } else { 332 } else {
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 5fb2104b7304..e9d11313d45b 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -29,7 +29,7 @@
29#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/interrupt.h> 30#include <linux/interrupt.h>
31 31
32static void ax25_ds_timeout(unsigned long); 32static void ax25_ds_timeout(struct timer_list *);
33 33
34/* 34/*
35 * Add DAMA slave timeout timer to timer list. 35 * Add DAMA slave timeout timer to timer list.
@@ -41,8 +41,7 @@ static void ax25_ds_timeout(unsigned long);
41 41
42void ax25_ds_setup_timer(ax25_dev *ax25_dev) 42void ax25_ds_setup_timer(ax25_dev *ax25_dev)
43{ 43{
44 setup_timer(&ax25_dev->dama.slave_timer, ax25_ds_timeout, 44 timer_setup(&ax25_dev->dama.slave_timer, ax25_ds_timeout, 0);
45 (unsigned long)ax25_dev);
46} 45}
47 46
48void ax25_ds_del_timer(ax25_dev *ax25_dev) 47void ax25_ds_del_timer(ax25_dev *ax25_dev)
@@ -66,9 +65,9 @@ void ax25_ds_set_timer(ax25_dev *ax25_dev)
66 * Silently discard all (slave) connections in case our master forgot us... 65 * Silently discard all (slave) connections in case our master forgot us...
67 */ 66 */
68 67
69static void ax25_ds_timeout(unsigned long arg) 68static void ax25_ds_timeout(struct timer_list *t)
70{ 69{
71 ax25_dev *ax25_dev = (struct ax25_dev *) arg; 70 ax25_dev *ax25_dev = from_timer(ax25_dev, t, dama.slave_timer);
72 ax25_cb *ax25; 71 ax25_cb *ax25;
73 72
74 if (ax25_dev == NULL || !ax25_dev->dama.slave) 73 if (ax25_dev == NULL || !ax25_dev->dama.slave)
diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c
index 23a6f38a80bf..c47b7ee1e4da 100644
--- a/net/ax25/ax25_timer.c
+++ b/net/ax25/ax25_timer.c
@@ -33,20 +33,19 @@
33#include <linux/mm.h> 33#include <linux/mm.h>
34#include <linux/interrupt.h> 34#include <linux/interrupt.h>
35 35
36static void ax25_heartbeat_expiry(unsigned long); 36static void ax25_heartbeat_expiry(struct timer_list *);
37static void ax25_t1timer_expiry(unsigned long); 37static void ax25_t1timer_expiry(struct timer_list *);
38static void ax25_t2timer_expiry(unsigned long); 38static void ax25_t2timer_expiry(struct timer_list *);
39static void ax25_t3timer_expiry(unsigned long); 39static void ax25_t3timer_expiry(struct timer_list *);
40static void ax25_idletimer_expiry(unsigned long); 40static void ax25_idletimer_expiry(struct timer_list *);
41 41
42void ax25_setup_timers(ax25_cb *ax25) 42void ax25_setup_timers(ax25_cb *ax25)
43{ 43{
44 setup_timer(&ax25->timer, ax25_heartbeat_expiry, (unsigned long)ax25); 44 timer_setup(&ax25->timer, ax25_heartbeat_expiry, 0);
45 setup_timer(&ax25->t1timer, ax25_t1timer_expiry, (unsigned long)ax25); 45 timer_setup(&ax25->t1timer, ax25_t1timer_expiry, 0);
46 setup_timer(&ax25->t2timer, ax25_t2timer_expiry, (unsigned long)ax25); 46 timer_setup(&ax25->t2timer, ax25_t2timer_expiry, 0);
47 setup_timer(&ax25->t3timer, ax25_t3timer_expiry, (unsigned long)ax25); 47 timer_setup(&ax25->t3timer, ax25_t3timer_expiry, 0);
48 setup_timer(&ax25->idletimer, ax25_idletimer_expiry, 48 timer_setup(&ax25->idletimer, ax25_idletimer_expiry, 0);
49 (unsigned long)ax25);
50} 49}
51 50
52void ax25_start_heartbeat(ax25_cb *ax25) 51void ax25_start_heartbeat(ax25_cb *ax25)
@@ -120,10 +119,10 @@ unsigned long ax25_display_timer(struct timer_list *timer)
120 119
121EXPORT_SYMBOL(ax25_display_timer); 120EXPORT_SYMBOL(ax25_display_timer);
122 121
123static void ax25_heartbeat_expiry(unsigned long param) 122static void ax25_heartbeat_expiry(struct timer_list *t)
124{ 123{
125 int proto = AX25_PROTO_STD_SIMPLEX; 124 int proto = AX25_PROTO_STD_SIMPLEX;
126 ax25_cb *ax25 = (ax25_cb *)param; 125 ax25_cb *ax25 = from_timer(ax25, t, timer);
127 126
128 if (ax25->ax25_dev) 127 if (ax25->ax25_dev)
129 proto = ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]; 128 proto = ax25->ax25_dev->values[AX25_VALUES_PROTOCOL];
@@ -145,9 +144,9 @@ static void ax25_heartbeat_expiry(unsigned long param)
145 } 144 }
146} 145}
147 146
148static void ax25_t1timer_expiry(unsigned long param) 147static void ax25_t1timer_expiry(struct timer_list *t)
149{ 148{
150 ax25_cb *ax25 = (ax25_cb *)param; 149 ax25_cb *ax25 = from_timer(ax25, t, t1timer);
151 150
152 switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { 151 switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
153 case AX25_PROTO_STD_SIMPLEX: 152 case AX25_PROTO_STD_SIMPLEX:
@@ -164,9 +163,9 @@ static void ax25_t1timer_expiry(unsigned long param)
164 } 163 }
165} 164}
166 165
167static void ax25_t2timer_expiry(unsigned long param) 166static void ax25_t2timer_expiry(struct timer_list *t)
168{ 167{
169 ax25_cb *ax25 = (ax25_cb *)param; 168 ax25_cb *ax25 = from_timer(ax25, t, t2timer);
170 169
171 switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { 170 switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
172 case AX25_PROTO_STD_SIMPLEX: 171 case AX25_PROTO_STD_SIMPLEX:
@@ -183,9 +182,9 @@ static void ax25_t2timer_expiry(unsigned long param)
183 } 182 }
184} 183}
185 184
186static void ax25_t3timer_expiry(unsigned long param) 185static void ax25_t3timer_expiry(struct timer_list *t)
187{ 186{
188 ax25_cb *ax25 = (ax25_cb *)param; 187 ax25_cb *ax25 = from_timer(ax25, t, t3timer);
189 188
190 switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { 189 switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
191 case AX25_PROTO_STD_SIMPLEX: 190 case AX25_PROTO_STD_SIMPLEX:
@@ -204,9 +203,9 @@ static void ax25_t3timer_expiry(unsigned long param)
204 } 203 }
205} 204}
206 205
207static void ax25_idletimer_expiry(unsigned long param) 206static void ax25_idletimer_expiry(struct timer_list *t)
208{ 207{
209 ax25_cb *ax25 = (ax25_cb *)param; 208 ax25_cb *ax25 = from_timer(ax25, t, idletimer);
210 209
211 switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { 210 switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
212 case AX25_PROTO_STD_SIMPLEX: 211 case AX25_PROTO_STD_SIMPLEX:
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 83ba5483455a..1b659ab652fb 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -916,8 +916,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
916 u16 tvlv_len = 0; 916 u16 tvlv_len = 0;
917 unsigned long send_time; 917 unsigned long send_time;
918 918
919 if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) || 919 if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
920 (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)) 920 hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
921 return; 921 return;
922 922
923 /* the interface gets activated here to avoid race conditions between 923 /* the interface gets activated here to avoid race conditions between
@@ -1264,7 +1264,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
1264 * drops as they can't send and receive at the same time. 1264 * drops as they can't send and receive at the same time.
1265 */ 1265 */
1266 tq_iface_penalty = BATADV_TQ_MAX_VALUE; 1266 tq_iface_penalty = BATADV_TQ_MAX_VALUE;
1267 if (if_outgoing && (if_incoming == if_outgoing) && 1267 if (if_outgoing && if_incoming == if_outgoing &&
1268 batadv_is_wifi_hardif(if_outgoing)) 1268 batadv_is_wifi_hardif(if_outgoing))
1269 tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE, 1269 tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE,
1270 bat_priv); 1270 bat_priv);
@@ -1369,7 +1369,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
1369 ret = BATADV_NEIGH_DUP; 1369 ret = BATADV_NEIGH_DUP;
1370 } else { 1370 } else {
1371 set_mark = 0; 1371 set_mark = 0;
1372 if (is_dup && (ret != BATADV_NEIGH_DUP)) 1372 if (is_dup && ret != BATADV_NEIGH_DUP)
1373 ret = BATADV_ORIG_DUP; 1373 ret = BATADV_ORIG_DUP;
1374 } 1374 }
1375 1375
@@ -1515,7 +1515,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
1515 /* drop packet if sender is not a direct neighbor and if we 1515 /* drop packet if sender is not a direct neighbor and if we
1516 * don't route towards it 1516 * don't route towards it
1517 */ 1517 */
1518 if (!is_single_hop_neigh && (!orig_neigh_router)) { 1518 if (!is_single_hop_neigh && !orig_neigh_router) {
1519 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 1519 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
1520 "Drop packet: OGM via unknown neighbor!\n"); 1520 "Drop packet: OGM via unknown neighbor!\n");
1521 goto out_neigh; 1521 goto out_neigh;
@@ -1535,7 +1535,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
1535 sameseq = orig_ifinfo->last_real_seqno == ntohl(ogm_packet->seqno); 1535 sameseq = orig_ifinfo->last_real_seqno == ntohl(ogm_packet->seqno);
1536 similar_ttl = (orig_ifinfo->last_ttl - 3) <= ogm_packet->ttl; 1536 similar_ttl = (orig_ifinfo->last_ttl - 3) <= ogm_packet->ttl;
1537 1537
1538 if (is_bidirect && ((dup_status == BATADV_NO_DUP) || 1538 if (is_bidirect && (dup_status == BATADV_NO_DUP ||
1539 (sameseq && similar_ttl))) { 1539 (sameseq && similar_ttl))) {
1540 batadv_iv_ogm_orig_update(bat_priv, orig_node, 1540 batadv_iv_ogm_orig_update(bat_priv, orig_node,
1541 orig_ifinfo, ethhdr, 1541 orig_ifinfo, ethhdr,
@@ -1553,8 +1553,8 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
1553 /* OGMs from secondary interfaces should only scheduled once 1553 /* OGMs from secondary interfaces should only scheduled once
1554 * per interface where it has been received, not multiple times 1554 * per interface where it has been received, not multiple times
1555 */ 1555 */
1556 if ((ogm_packet->ttl <= 2) && 1556 if (ogm_packet->ttl <= 2 &&
1557 (if_incoming != if_outgoing)) { 1557 if_incoming != if_outgoing) {
1558 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 1558 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
1559 "Drop packet: OGM from secondary interface and wrong outgoing interface\n"); 1559 "Drop packet: OGM from secondary interface and wrong outgoing interface\n");
1560 goto out_neigh; 1560 goto out_neigh;
@@ -1590,7 +1590,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
1590 if_incoming, if_outgoing); 1590 if_incoming, if_outgoing);
1591 1591
1592out_neigh: 1592out_neigh:
1593 if ((orig_neigh_node) && (!is_single_hop_neigh)) 1593 if (orig_neigh_node && !is_single_hop_neigh)
1594 batadv_orig_node_put(orig_neigh_node); 1594 batadv_orig_node_put(orig_neigh_node);
1595out: 1595out:
1596 if (router_ifinfo) 1596 if (router_ifinfo)
@@ -2523,9 +2523,9 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
2523 tmp_gw_factor *= 100 * 100; 2523 tmp_gw_factor *= 100 * 100;
2524 tmp_gw_factor >>= 18; 2524 tmp_gw_factor >>= 18;
2525 2525
2526 if ((tmp_gw_factor > max_gw_factor) || 2526 if (tmp_gw_factor > max_gw_factor ||
2527 ((tmp_gw_factor == max_gw_factor) && 2527 (tmp_gw_factor == max_gw_factor &&
2528 (tq_avg > max_tq))) { 2528 tq_avg > max_tq)) {
2529 if (curr_gw) 2529 if (curr_gw)
2530 batadv_gw_node_put(curr_gw); 2530 batadv_gw_node_put(curr_gw);
2531 curr_gw = gw_node; 2531 curr_gw = gw_node;
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 4e2724c5b33d..341ceab8338d 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -19,7 +19,6 @@
19#include "main.h" 19#include "main.h"
20 20
21#include <linux/atomic.h> 21#include <linux/atomic.h>
22#include <linux/bug.h>
23#include <linux/cache.h> 22#include <linux/cache.h>
24#include <linux/errno.h> 23#include <linux/errno.h>
25#include <linux/if_ether.h> 24#include <linux/if_ether.h>
@@ -623,11 +622,11 @@ static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1,
623 int ret = 0; 622 int ret = 0;
624 623
625 ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); 624 ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
626 if (WARN_ON(!ifinfo1)) 625 if (!ifinfo1)
627 goto err_ifinfo1; 626 goto err_ifinfo1;
628 627
629 ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); 628 ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
630 if (WARN_ON(!ifinfo2)) 629 if (!ifinfo2)
631 goto err_ifinfo2; 630 goto err_ifinfo2;
632 631
633 ret = ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput; 632 ret = ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput;
@@ -649,11 +648,11 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1,
649 bool ret = false; 648 bool ret = false;
650 649
651 ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); 650 ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
652 if (WARN_ON(!ifinfo1)) 651 if (!ifinfo1)
653 goto err_ifinfo1; 652 goto err_ifinfo1;
654 653
655 ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); 654 ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
656 if (WARN_ON(!ifinfo2)) 655 if (!ifinfo2)
657 goto err_ifinfo2; 656 goto err_ifinfo2;
658 657
659 threshold = ifinfo1->bat_v.throughput / 4; 658 threshold = ifinfo1->bat_v.throughput / 4;
@@ -767,7 +766,7 @@ batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv)
767 if (batadv_v_gw_throughput_get(gw_node, &bw) < 0) 766 if (batadv_v_gw_throughput_get(gw_node, &bw) < 0)
768 goto next; 767 goto next;
769 768
770 if (curr_gw && (bw <= max_bw)) 769 if (curr_gw && bw <= max_bw)
771 goto next; 770 goto next;
772 771
773 if (curr_gw) 772 if (curr_gw)
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index bd1064d98e16..1de992c58b35 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -134,7 +134,7 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
134 hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX; 134 hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX;
135 135
136 throughput = link_settings.base.speed; 136 throughput = link_settings.base.speed;
137 if (throughput && (throughput != SPEED_UNKNOWN)) 137 if (throughput && throughput != SPEED_UNKNOWN)
138 return throughput * 10; 138 return throughput * 10;
139 } 139 }
140 140
@@ -263,8 +263,8 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
263 goto out; 263 goto out;
264 264
265 /* we are in the process of shutting this interface down */ 265 /* we are in the process of shutting this interface down */
266 if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) || 266 if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
267 (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)) 267 hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
268 goto out; 268 goto out;
269 269
270 /* the interface was enabled but may not be ready yet */ 270 /* the interface was enabled but may not be ready yet */
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 8be61734fc43..c251445a42a0 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -304,8 +304,8 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv,
304 * due to the store & forward characteristics of WIFI. 304 * due to the store & forward characteristics of WIFI.
305 * Very low throughput values are the exception. 305 * Very low throughput values are the exception.
306 */ 306 */
307 if ((throughput > 10) && 307 if (throughput > 10 &&
308 (if_incoming == if_outgoing) && 308 if_incoming == if_outgoing &&
309 !(if_incoming->bat_v.flags & BATADV_FULL_DUPLEX)) 309 !(if_incoming->bat_v.flags & BATADV_FULL_DUPLEX))
310 return throughput / 2; 310 return throughput / 2;
311 311
@@ -455,7 +455,7 @@ static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv,
455 /* drop packets with old seqnos, however accept the first packet after 455 /* drop packets with old seqnos, however accept the first packet after
456 * a host has been rebooted. 456 * a host has been rebooted.
457 */ 457 */
458 if ((seq_diff < 0) && !protection_started) 458 if (seq_diff < 0 && !protection_started)
459 goto out; 459 goto out;
460 460
461 neigh_node->last_seen = jiffies; 461 neigh_node->last_seen = jiffies;
@@ -568,8 +568,8 @@ static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
568 router_throughput = router_ifinfo->bat_v.throughput; 568 router_throughput = router_ifinfo->bat_v.throughput;
569 neigh_throughput = neigh_ifinfo->bat_v.throughput; 569 neigh_throughput = neigh_ifinfo->bat_v.throughput;
570 570
571 if ((neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF) && 571 if (neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF &&
572 (router_throughput >= neigh_throughput)) 572 router_throughput >= neigh_throughput)
573 goto out; 573 goto out;
574 } 574 }
575 575
@@ -621,7 +621,7 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv,
621 return; 621 return;
622 622
623 /* only unknown & newer OGMs contain TVLVs we are interested in */ 623 /* only unknown & newer OGMs contain TVLVs we are interested in */
624 if ((seqno_age > 0) && (if_outgoing == BATADV_IF_DEFAULT)) 624 if (seqno_age > 0 && if_outgoing == BATADV_IF_DEFAULT)
625 batadv_tvlv_containers_process(bat_priv, true, orig_node, 625 batadv_tvlv_containers_process(bat_priv, true, orig_node,
626 NULL, NULL, 626 NULL, NULL,
627 (unsigned char *)(ogm2 + 1), 627 (unsigned char *)(ogm2 + 1),
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index b6cfa78e9381..760c0de72582 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -492,8 +492,8 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
492 /* this is an hash collision with the temporary selected node. Choose 492 /* this is an hash collision with the temporary selected node. Choose
493 * the one with the lowest address 493 * the one with the lowest address
494 */ 494 */
495 if ((tmp_max == max) && max_orig_node && 495 if (tmp_max == max && max_orig_node &&
496 (batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)) 496 batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)
497 goto out; 497 goto out;
498 498
499 ret = true; 499 ret = true;
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index de9955d5224d..10d521f0b17f 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -248,12 +248,12 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
248 } 248 }
249 } 249 }
250 250
251 if ((curr_gw) && (!next_gw)) { 251 if (curr_gw && !next_gw) {
252 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 252 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
253 "Removing selected gateway - no gateway in range\n"); 253 "Removing selected gateway - no gateway in range\n");
254 batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL, 254 batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL,
255 NULL); 255 NULL);
256 } else if ((!curr_gw) && (next_gw)) { 256 } else if (!curr_gw && next_gw) {
257 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 257 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
258 "Adding route to gateway %pM (bandwidth: %u.%u/%u.%u MBit, tq: %i)\n", 258 "Adding route to gateway %pM (bandwidth: %u.%u/%u.%u MBit, tq: %i)\n",
259 next_gw->orig_node->orig, 259 next_gw->orig_node->orig,
@@ -411,8 +411,8 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
411 goto out; 411 goto out;
412 } 412 }
413 413
414 if ((gw_node->bandwidth_down == ntohl(gateway->bandwidth_down)) && 414 if (gw_node->bandwidth_down == ntohl(gateway->bandwidth_down) &&
415 (gw_node->bandwidth_up == ntohl(gateway->bandwidth_up))) 415 gw_node->bandwidth_up == ntohl(gateway->bandwidth_up))
416 goto out; 416 goto out;
417 417
418 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 418 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 33940c5c74a8..2c26039c23fc 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -56,8 +56,8 @@ bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
56 if (strncasecmp(tmp_ptr, "mbit", 4) == 0) 56 if (strncasecmp(tmp_ptr, "mbit", 4) == 0)
57 bw_unit_type = BATADV_BW_UNIT_MBIT; 57 bw_unit_type = BATADV_BW_UNIT_MBIT;
58 58
59 if ((strncasecmp(tmp_ptr, "kbit", 4) == 0) || 59 if (strncasecmp(tmp_ptr, "kbit", 4) == 0 ||
60 (bw_unit_type == BATADV_BW_UNIT_MBIT)) 60 bw_unit_type == BATADV_BW_UNIT_MBIT)
61 *tmp_ptr = '\0'; 61 *tmp_ptr = '\0';
62 } 62 }
63 63
@@ -190,7 +190,7 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
190 if (!up_new) 190 if (!up_new)
191 up_new = 1; 191 up_new = 1;
192 192
193 if ((down_curr == down_new) && (up_curr == up_new)) 193 if (down_curr == down_new && up_curr == up_new)
194 return count; 194 return count;
195 195
196 batadv_gw_reselect(bat_priv); 196 batadv_gw_reselect(bat_priv);
@@ -224,16 +224,16 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
224 /* only fetch the tvlv value if the handler wasn't called via the 224 /* only fetch the tvlv value if the handler wasn't called via the
225 * CIFNOTFND flag and if there is data to fetch 225 * CIFNOTFND flag and if there is data to fetch
226 */ 226 */
227 if ((flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) || 227 if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND ||
228 (tvlv_value_len < sizeof(gateway))) { 228 tvlv_value_len < sizeof(gateway)) {
229 gateway.bandwidth_down = 0; 229 gateway.bandwidth_down = 0;
230 gateway.bandwidth_up = 0; 230 gateway.bandwidth_up = 0;
231 } else { 231 } else {
232 gateway_ptr = tvlv_value; 232 gateway_ptr = tvlv_value;
233 gateway.bandwidth_down = gateway_ptr->bandwidth_down; 233 gateway.bandwidth_down = gateway_ptr->bandwidth_down;
234 gateway.bandwidth_up = gateway_ptr->bandwidth_up; 234 gateway.bandwidth_up = gateway_ptr->bandwidth_up;
235 if ((gateway.bandwidth_down == 0) || 235 if (gateway.bandwidth_down == 0 ||
236 (gateway.bandwidth_up == 0)) { 236 gateway.bandwidth_up == 0) {
237 gateway.bandwidth_down = 0; 237 gateway.bandwidth_down = 0;
238 gateway.bandwidth_up = 0; 238 gateway.bandwidth_up = 0;
239 } 239 }
@@ -242,8 +242,8 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
242 batadv_gw_node_update(bat_priv, orig, &gateway); 242 batadv_gw_node_update(bat_priv, orig, &gateway);
243 243
244 /* restart gateway selection */ 244 /* restart gateway selection */
245 if ((gateway.bandwidth_down != 0) && 245 if (gateway.bandwidth_down != 0 &&
246 (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT)) 246 atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT)
247 batadv_gw_check_election(bat_priv, orig); 247 batadv_gw_check_election(bat_priv, orig);
248} 248}
249 249
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index e348f76ea8c1..4e3d5340ad96 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -504,8 +504,8 @@ static void batadv_check_known_mac_addr(const struct net_device *net_dev)
504 504
505 rcu_read_lock(); 505 rcu_read_lock();
506 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { 506 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
507 if ((hard_iface->if_status != BATADV_IF_ACTIVE) && 507 if (hard_iface->if_status != BATADV_IF_ACTIVE &&
508 (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)) 508 hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
509 continue; 509 continue;
510 510
511 if (hard_iface->net_dev == net_dev) 511 if (hard_iface->net_dev == net_dev)
@@ -568,8 +568,8 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
568 568
569 rcu_read_lock(); 569 rcu_read_lock();
570 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { 570 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
571 if ((hard_iface->if_status != BATADV_IF_ACTIVE) && 571 if (hard_iface->if_status != BATADV_IF_ACTIVE &&
572 (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)) 572 hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
573 continue; 573 continue;
574 574
575 if (hard_iface->soft_iface != soft_iface) 575 if (hard_iface->soft_iface != soft_iface)
@@ -654,8 +654,8 @@ out:
654static void 654static void
655batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface) 655batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
656{ 656{
657 if ((hard_iface->if_status != BATADV_IF_ACTIVE) && 657 if (hard_iface->if_status != BATADV_IF_ACTIVE &&
658 (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)) 658 hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
659 return; 659 return;
660 660
661 hard_iface->if_status = BATADV_IF_INACTIVE; 661 hard_iface->if_status = BATADV_IF_INACTIVE;
@@ -738,7 +738,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
738 bat_priv = netdev_priv(hard_iface->soft_iface); 738 bat_priv = netdev_priv(hard_iface->soft_iface);
739 739
740 ret = netdev_master_upper_dev_link(hard_iface->net_dev, 740 ret = netdev_master_upper_dev_link(hard_iface->net_dev,
741 soft_iface, NULL, NULL); 741 soft_iface, NULL, NULL, NULL);
742 if (ret) 742 if (ret)
743 goto err_dev; 743 goto err_dev;
744 744
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 8ead292886d1..bded31121d12 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -132,10 +132,10 @@ static ssize_t batadv_socket_read(struct file *file, char __user *buf,
132 size_t packet_len; 132 size_t packet_len;
133 int error; 133 int error;
134 134
135 if ((file->f_flags & O_NONBLOCK) && (socket_client->queue_len == 0)) 135 if ((file->f_flags & O_NONBLOCK) && socket_client->queue_len == 0)
136 return -EAGAIN; 136 return -EAGAIN;
137 137
138 if ((!buf) || (count < sizeof(struct batadv_icmp_packet))) 138 if (!buf || count < sizeof(struct batadv_icmp_packet))
139 return -EINVAL; 139 return -EINVAL;
140 140
141 if (!access_ok(VERIFY_WRITE, buf, count)) 141 if (!access_ok(VERIFY_WRITE, buf, count))
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index fb381fb26a66..4daed7ad46f2 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -73,8 +73,8 @@
73 * list traversals just rcu-locked 73 * list traversals just rcu-locked
74 */ 74 */
75struct list_head batadv_hardif_list; 75struct list_head batadv_hardif_list;
76static int (*batadv_rx_handler[256])(struct sk_buff *, 76static int (*batadv_rx_handler[256])(struct sk_buff *skb,
77 struct batadv_hard_iface *); 77 struct batadv_hard_iface *recv_if);
78 78
79unsigned char batadv_broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 79unsigned char batadv_broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
80 80
@@ -540,12 +540,12 @@ batadv_recv_handler_register(u8 packet_type,
540 int (*recv_handler)(struct sk_buff *, 540 int (*recv_handler)(struct sk_buff *,
541 struct batadv_hard_iface *)) 541 struct batadv_hard_iface *))
542{ 542{
543 int (*curr)(struct sk_buff *, 543 int (*curr)(struct sk_buff *skb,
544 struct batadv_hard_iface *); 544 struct batadv_hard_iface *recv_if);
545 curr = batadv_rx_handler[packet_type]; 545 curr = batadv_rx_handler[packet_type];
546 546
547 if ((curr != batadv_recv_unhandled_packet) && 547 if (curr != batadv_recv_unhandled_packet &&
548 (curr != batadv_recv_unhandled_unicast_packet)) 548 curr != batadv_recv_unhandled_unicast_packet)
549 return -EBUSY; 549 return -EBUSY;
550 550
551 batadv_rx_handler[packet_type] = recv_handler; 551 batadv_rx_handler[packet_type] = recv_handler;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 05cc7637c064..edb2f239d04d 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
24#define BATADV_DRIVER_DEVICE "batman-adv" 24#define BATADV_DRIVER_DEVICE "batman-adv"
25 25
26#ifndef BATADV_SOURCE_VERSION 26#ifndef BATADV_SOURCE_VERSION
27#define BATADV_SOURCE_VERSION "2017.3" 27#define BATADV_SOURCE_VERSION "2017.4"
28#endif 28#endif
29 29
30/* B.A.T.M.A.N. parameters */ 30/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index d327670641ac..e553a8770a89 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1126,7 +1126,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
1126 bool orig_initialized; 1126 bool orig_initialized;
1127 1127
1128 if (orig_mcast_enabled && tvlv_value && 1128 if (orig_mcast_enabled && tvlv_value &&
1129 (tvlv_value_len >= sizeof(mcast_flags))) 1129 tvlv_value_len >= sizeof(mcast_flags))
1130 mcast_flags = *(u8 *)tvlv_value; 1130 mcast_flags = *(u8 *)tvlv_value;
1131 1131
1132 spin_lock_bh(&orig->mcast_handler_lock); 1132 spin_lock_bh(&orig->mcast_handler_lock);
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 8e2a4b205257..2967b86c13da 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1062,9 +1062,9 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv,
1062 continue; 1062 continue;
1063 1063
1064 /* don't purge if the interface is not (going) down */ 1064 /* don't purge if the interface is not (going) down */
1065 if ((if_outgoing->if_status != BATADV_IF_INACTIVE) && 1065 if (if_outgoing->if_status != BATADV_IF_INACTIVE &&
1066 (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) && 1066 if_outgoing->if_status != BATADV_IF_NOT_IN_USE &&
1067 (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)) 1067 if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)
1068 continue; 1068 continue;
1069 1069
1070 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 1070 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -1106,9 +1106,9 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv,
1106 continue; 1106 continue;
1107 1107
1108 /* don't purge if the interface is not (going) down */ 1108 /* don't purge if the interface is not (going) down */
1109 if ((if_outgoing->if_status != BATADV_IF_INACTIVE) && 1109 if (if_outgoing->if_status != BATADV_IF_INACTIVE &&
1110 (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) && 1110 if_outgoing->if_status != BATADV_IF_NOT_IN_USE &&
1111 (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)) 1111 if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)
1112 continue; 1112 continue;
1113 1113
1114 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 1114 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -1155,13 +1155,13 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
1155 last_seen = neigh_node->last_seen; 1155 last_seen = neigh_node->last_seen;
1156 if_incoming = neigh_node->if_incoming; 1156 if_incoming = neigh_node->if_incoming;
1157 1157
1158 if ((batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT)) || 1158 if (batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT) ||
1159 (if_incoming->if_status == BATADV_IF_INACTIVE) || 1159 if_incoming->if_status == BATADV_IF_INACTIVE ||
1160 (if_incoming->if_status == BATADV_IF_NOT_IN_USE) || 1160 if_incoming->if_status == BATADV_IF_NOT_IN_USE ||
1161 (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) { 1161 if_incoming->if_status == BATADV_IF_TO_BE_REMOVED) {
1162 if ((if_incoming->if_status == BATADV_IF_INACTIVE) || 1162 if (if_incoming->if_status == BATADV_IF_INACTIVE ||
1163 (if_incoming->if_status == BATADV_IF_NOT_IN_USE) || 1163 if_incoming->if_status == BATADV_IF_NOT_IN_USE ||
1164 (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) 1164 if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)
1165 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 1165 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
1166 "neighbor purge: originator %pM, neighbor: %pM, iface: %s\n", 1166 "neighbor purge: originator %pM, neighbor: %pM, iface: %s\n",
1167 orig_node->orig, neigh_node->addr, 1167 orig_node->orig, neigh_node->addr,
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index f10e3ff26f9d..40d9bf3e5bfe 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -93,14 +93,14 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
93 batadv_orig_ifinfo_put(orig_ifinfo); 93 batadv_orig_ifinfo_put(orig_ifinfo);
94 94
95 /* route deleted */ 95 /* route deleted */
96 if ((curr_router) && (!neigh_node)) { 96 if (curr_router && !neigh_node) {
97 batadv_dbg(BATADV_DBG_ROUTES, bat_priv, 97 batadv_dbg(BATADV_DBG_ROUTES, bat_priv,
98 "Deleting route towards: %pM\n", orig_node->orig); 98 "Deleting route towards: %pM\n", orig_node->orig);
99 batadv_tt_global_del_orig(bat_priv, orig_node, -1, 99 batadv_tt_global_del_orig(bat_priv, orig_node, -1,
100 "Deleted route towards originator"); 100 "Deleted route towards originator");
101 101
102 /* route added */ 102 /* route added */
103 } else if ((!curr_router) && (neigh_node)) { 103 } else if (!curr_router && neigh_node) {
104 batadv_dbg(BATADV_DBG_ROUTES, bat_priv, 104 batadv_dbg(BATADV_DBG_ROUTES, bat_priv,
105 "Adding route towards: %pM (via %pM)\n", 105 "Adding route towards: %pM (via %pM)\n",
106 orig_node->orig, neigh_node->addr); 106 orig_node->orig, neigh_node->addr);
@@ -381,7 +381,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
381 /* add record route information if not full */ 381 /* add record route information if not full */
382 if ((icmph->msg_type == BATADV_ECHO_REPLY || 382 if ((icmph->msg_type == BATADV_ECHO_REPLY ||
383 icmph->msg_type == BATADV_ECHO_REQUEST) && 383 icmph->msg_type == BATADV_ECHO_REQUEST) &&
384 (skb->len >= sizeof(struct batadv_icmp_packet_rr))) { 384 skb->len >= sizeof(struct batadv_icmp_packet_rr)) {
385 if (skb_linearize(skb) < 0) 385 if (skb_linearize(skb) < 0)
386 goto free_skb; 386 goto free_skb;
387 387
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 054a65e6eb68..7895323fd2a7 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -142,7 +142,7 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
142#ifdef CONFIG_BATMAN_ADV_BATMAN_V 142#ifdef CONFIG_BATMAN_ADV_BATMAN_V
143 hardif_neigh = batadv_hardif_neigh_get(neigh->if_incoming, neigh->addr); 143 hardif_neigh = batadv_hardif_neigh_get(neigh->if_incoming, neigh->addr);
144 144
145 if ((hardif_neigh) && (ret != NET_XMIT_DROP)) 145 if (hardif_neigh && ret != NET_XMIT_DROP)
146 hardif_neigh->bat_v.last_unicast_tx = jiffies; 146 hardif_neigh->bat_v.last_unicast_tx = jiffies;
147 147
148 if (hardif_neigh) 148 if (hardif_neigh)
@@ -615,8 +615,8 @@ batadv_forw_packet_list_steal(struct hlist_head *forw_list,
615 * we delete only packets belonging to the given interface 615 * we delete only packets belonging to the given interface
616 */ 616 */
617 if (hard_iface && 617 if (hard_iface &&
618 (forw_packet->if_incoming != hard_iface) && 618 forw_packet->if_incoming != hard_iface &&
619 (forw_packet->if_outgoing != hard_iface)) 619 forw_packet->if_outgoing != hard_iface)
620 continue; 620 continue;
621 621
622 hlist_del(&forw_packet->list); 622 hlist_del(&forw_packet->list);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 10f7edfb176e..9f673cdfecf8 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -69,8 +69,8 @@ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len)
69 int result; 69 int result;
70 70
71 /* TODO: We must check if we can release all references to non-payload 71 /* TODO: We must check if we can release all references to non-payload
72 * data using skb_header_release in our skbs to allow skb_cow_header to 72 * data using __skb_header_release in our skbs to allow skb_cow_header
73 * work optimally. This means that those skbs are not allowed to read 73 * to work optimally. This means that those skbs are not allowed to read
74 * or write any data which is before the current position of skb->data 74 * or write any data which is before the current position of skb->data
75 * after that call and thus allow other skbs with the same data buffer 75 * after that call and thus allow other skbs with the same data buffer
76 * to write freely in that area. 76 * to write freely in that area.
@@ -160,7 +160,7 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
160static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu) 160static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
161{ 161{
162 /* check ranges */ 162 /* check ranges */
163 if ((new_mtu < 68) || (new_mtu > batadv_hardif_min_mtu(dev))) 163 if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev))
164 return -EINVAL; 164 return -EINVAL;
165 165
166 dev->mtu = new_mtu; 166 dev->mtu = new_mtu;
@@ -863,11 +863,13 @@ free_bat_counters:
863 * batadv_softif_slave_add - Add a slave interface to a batadv_soft_interface 863 * batadv_softif_slave_add - Add a slave interface to a batadv_soft_interface
864 * @dev: batadv_soft_interface used as master interface 864 * @dev: batadv_soft_interface used as master interface
865 * @slave_dev: net_device which should become the slave interface 865 * @slave_dev: net_device which should become the slave interface
866 * @extack: extended ACK report struct
866 * 867 *
867 * Return: 0 if successful or error otherwise. 868 * Return: 0 if successful or error otherwise.
868 */ 869 */
869static int batadv_softif_slave_add(struct net_device *dev, 870static int batadv_softif_slave_add(struct net_device *dev,
870 struct net_device *slave_dev) 871 struct net_device *slave_dev,
872 struct netlink_ext_ack *extack)
871{ 873{
872 struct batadv_hard_iface *hard_iface; 874 struct batadv_hard_iface *hard_iface;
873 struct net *net = dev_net(dev); 875 struct net *net = dev_net(dev);
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 0ae8b30e4eaa..aa187fd42475 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -925,8 +925,8 @@ static int batadv_store_mesh_iface_finish(struct net_device *net_dev,
925 if (hard_iface->if_status == status_tmp) 925 if (hard_iface->if_status == status_tmp)
926 goto out; 926 goto out;
927 927
928 if ((hard_iface->soft_iface) && 928 if (hard_iface->soft_iface &&
929 (strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0)) 929 strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0)
930 goto out; 930 goto out;
931 931
932 if (status_tmp == BATADV_IF_NOT_IN_USE) { 932 if (status_tmp == BATADV_IF_NOT_IN_USE) {
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index bfe8effe9238..15cd2139381e 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -488,9 +488,9 @@ static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars)
488 * Switch to Slow Start, set the ss_threshold to half of the current cwnd and 488 * Switch to Slow Start, set the ss_threshold to half of the current cwnd and
489 * reset the cwnd to 3*MSS 489 * reset the cwnd to 3*MSS
490 */ 490 */
491static void batadv_tp_sender_timeout(unsigned long arg) 491static void batadv_tp_sender_timeout(struct timer_list *t)
492{ 492{
493 struct batadv_tp_vars *tp_vars = (struct batadv_tp_vars *)arg; 493 struct batadv_tp_vars *tp_vars = from_timer(tp_vars, t, timer);
494 struct batadv_priv *bat_priv = tp_vars->bat_priv; 494 struct batadv_priv *bat_priv = tp_vars->bat_priv;
495 495
496 if (atomic_read(&tp_vars->sending) == 0) 496 if (atomic_read(&tp_vars->sending) == 0)
@@ -1020,8 +1020,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
1020 atomic64_set(&tp_vars->tot_sent, 0); 1020 atomic64_set(&tp_vars->tot_sent, 0);
1021 1021
1022 kref_get(&tp_vars->refcount); 1022 kref_get(&tp_vars->refcount);
1023 setup_timer(&tp_vars->timer, batadv_tp_sender_timeout, 1023 timer_setup(&tp_vars->timer, batadv_tp_sender_timeout, 0);
1024 (unsigned long)tp_vars);
1025 1024
1026 tp_vars->bat_priv = bat_priv; 1025 tp_vars->bat_priv = bat_priv;
1027 tp_vars->start_time = jiffies; 1026 tp_vars->start_time = jiffies;
@@ -1109,9 +1108,9 @@ static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars)
1109 * reached without received ack 1108 * reached without received ack
1110 * @arg: address of the related tp_vars 1109 * @arg: address of the related tp_vars
1111 */ 1110 */
1112static void batadv_tp_receiver_shutdown(unsigned long arg) 1111static void batadv_tp_receiver_shutdown(struct timer_list *t)
1113{ 1112{
1114 struct batadv_tp_vars *tp_vars = (struct batadv_tp_vars *)arg; 1113 struct batadv_tp_vars *tp_vars = from_timer(tp_vars, t, timer);
1115 struct batadv_tp_unacked *un, *safe; 1114 struct batadv_tp_unacked *un, *safe;
1116 struct batadv_priv *bat_priv; 1115 struct batadv_priv *bat_priv;
1117 1116
@@ -1206,7 +1205,7 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst,
1206 1205
1207 /* send the ack */ 1206 /* send the ack */
1208 r = batadv_send_skb_to_orig(skb, orig_node, NULL); 1207 r = batadv_send_skb_to_orig(skb, orig_node, NULL);
1209 if (unlikely(r < 0) || (r == NET_XMIT_DROP)) { 1208 if (unlikely(r < 0) || r == NET_XMIT_DROP) {
1210 ret = BATADV_TP_REASON_DST_UNREACHABLE; 1209 ret = BATADV_TP_REASON_DST_UNREACHABLE;
1211 goto out; 1210 goto out;
1212 } 1211 }
@@ -1373,8 +1372,7 @@ batadv_tp_init_recv(struct batadv_priv *bat_priv,
1373 hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list); 1372 hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list);
1374 1373
1375 kref_get(&tp_vars->refcount); 1374 kref_get(&tp_vars->refcount);
1376 setup_timer(&tp_vars->timer, batadv_tp_receiver_shutdown, 1375 timer_setup(&tp_vars->timer, batadv_tp_receiver_shutdown, 0);
1377 (unsigned long)tp_vars);
1378 1376
1379 batadv_tp_reset_receiver_timer(tp_vars); 1377 batadv_tp_reset_receiver_timer(tp_vars);
1380 1378
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index c18115d22f00..db82a40875e8 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -126,14 +126,4 @@ config BT_DEBUGFS
126 Provide extensive information about internal Bluetooth states 126 Provide extensive information about internal Bluetooth states
127 in debugfs. 127 in debugfs.
128 128
129config BT_LEGACY_IOCTL
130 bool "Enable legacy ioctl interfaces"
131 depends on BT && BT_BREDR
132 default y
133 help
134 Enable support for legacy ioctl interfaces. This is only needed
135 for old and deprecated applications using direct ioctl calls for
136 controller management. Since Linux 3.4 all configuration and
137 setup is done via mgmt interface and this is no longer needed.
138
139source "drivers/bluetooth/Kconfig" 129source "drivers/bluetooth/Kconfig"
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 5d0a113e2e40..fda41c0b4781 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the Linux Bluetooth subsystem. 3# Makefile for the Linux Bluetooth subsystem.
3# 4#
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index aad994edd3bb..51c2cf2d8923 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -573,7 +573,7 @@ static int a2mp_discphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
573 hcon = hci_conn_hash_lookup_ba(hdev, AMP_LINK, 573 hcon = hci_conn_hash_lookup_ba(hdev, AMP_LINK,
574 &mgr->l2cap_conn->hcon->dst); 574 &mgr->l2cap_conn->hcon->dst);
575 if (!hcon) { 575 if (!hcon) {
576 BT_ERR("No phys link exist"); 576 bt_dev_err(hdev, "no phys link exist");
577 rsp.status = A2MP_STATUS_NO_PHYSICAL_LINK_EXISTS; 577 rsp.status = A2MP_STATUS_NO_PHYSICAL_LINK_EXISTS;
578 goto clean; 578 goto clean;
579 } 579 }
diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
index ebcab5bbadd7..78bec8df8525 100644
--- a/net/bluetooth/amp.c
+++ b/net/bluetooth/amp.c
@@ -187,7 +187,7 @@ int phylink_gen_key(struct hci_conn *conn, u8 *data, u8 *len, u8 *type)
187 187
188 /* Legacy key */ 188 /* Legacy key */
189 if (conn->key_type < 3) { 189 if (conn->key_type < 3) {
190 BT_ERR("Legacy key type %d", conn->key_type); 190 bt_dev_err(hdev, "legacy key type %d", conn->key_type);
191 return -EACCES; 191 return -EACCES;
192 } 192 }
193 193
@@ -207,7 +207,7 @@ int phylink_gen_key(struct hci_conn *conn, u8 *data, u8 *len, u8 *type)
207 /* Derive Generic AMP Link Key (gamp) */ 207 /* Derive Generic AMP Link Key (gamp) */
208 err = hmac_sha256(keybuf, HCI_AMP_LINK_KEY_SIZE, "gamp", 4, gamp_key); 208 err = hmac_sha256(keybuf, HCI_AMP_LINK_KEY_SIZE, "gamp", 4, gamp_key);
209 if (err) { 209 if (err) {
210 BT_ERR("Could not derive Generic AMP Key: err %d", err); 210 bt_dev_err(hdev, "could not derive Generic AMP Key: err %d", err);
211 return err; 211 return err;
212 } 212 }
213 213
diff --git a/net/bluetooth/ecdh_helper.c b/net/bluetooth/ecdh_helper.c
index c7b1a9aee579..2155ce802877 100644
--- a/net/bluetooth/ecdh_helper.c
+++ b/net/bluetooth/ecdh_helper.c
@@ -23,7 +23,6 @@
23#include "ecdh_helper.h" 23#include "ecdh_helper.h"
24 24
25#include <linux/scatterlist.h> 25#include <linux/scatterlist.h>
26#include <crypto/kpp.h>
27#include <crypto/ecdh.h> 26#include <crypto/ecdh.h>
28 27
29struct ecdh_completion { 28struct ecdh_completion {
@@ -50,55 +49,35 @@ static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits)
50 out[i] = __swab64(in[ndigits - 1 - i]); 49 out[i] = __swab64(in[ndigits - 1 - i]);
51} 50}
52 51
53bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32], 52/* compute_ecdh_secret() - function assumes that the private key was
54 u8 secret[32]) 53 * already set.
54 * @tfm: KPP tfm handle allocated with crypto_alloc_kpp().
55 * @public_key: pair's ecc public key.
56 * secret: memory where the ecdh computed shared secret will be saved.
57 *
58 * Return: zero on success; error code in case of error.
59 */
60int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64],
61 u8 secret[32])
55{ 62{
56 struct crypto_kpp *tfm;
57 struct kpp_request *req; 63 struct kpp_request *req;
58 struct ecdh p; 64 u8 *tmp;
59 struct ecdh_completion result; 65 struct ecdh_completion result;
60 struct scatterlist src, dst; 66 struct scatterlist src, dst;
61 u8 *tmp, *buf; 67 int err;
62 unsigned int buf_len;
63 int err = -ENOMEM;
64 68
65 tmp = kmalloc(64, GFP_KERNEL); 69 tmp = kmalloc(64, GFP_KERNEL);
66 if (!tmp) 70 if (!tmp)
67 return false; 71 return -ENOMEM;
68 72
69 tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); 73 req = kpp_request_alloc(tfm, GFP_KERNEL);
70 if (IS_ERR(tfm)) { 74 if (!req) {
71 pr_err("alg: kpp: Failed to load tfm for kpp: %ld\n", 75 err = -ENOMEM;
72 PTR_ERR(tfm));
73 goto free_tmp; 76 goto free_tmp;
74 } 77 }
75 78
76 req = kpp_request_alloc(tfm, GFP_KERNEL);
77 if (!req)
78 goto free_kpp;
79
80 init_completion(&result.completion); 79 init_completion(&result.completion);
81 80
82 /* Security Manager Protocol holds digits in litte-endian order
83 * while ECC API expect big-endian data
84 */
85 swap_digits((u64 *)private_key, (u64 *)tmp, 4);
86 p.key = (char *)tmp;
87 p.key_size = 32;
88 /* Set curve_id */
89 p.curve_id = ECC_CURVE_NIST_P256;
90 buf_len = crypto_ecdh_key_len(&p);
91 buf = kmalloc(buf_len, GFP_KERNEL);
92 if (!buf)
93 goto free_req;
94
95 crypto_ecdh_encode_key(buf, buf_len, &p);
96
97 /* Set A private Key */
98 err = crypto_kpp_set_secret(tfm, (void *)buf, buf_len);
99 if (err)
100 goto free_all;
101
102 swap_digits((u64 *)public_key, (u64 *)tmp, 4); /* x */ 81 swap_digits((u64 *)public_key, (u64 *)tmp, 4); /* x */
103 swap_digits((u64 *)&public_key[32], (u64 *)&tmp[32], 4); /* y */ 82 swap_digits((u64 *)&public_key[32], (u64 *)&tmp[32], 4); /* y */
104 83
@@ -123,104 +102,129 @@ bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32],
123 memcpy(secret, tmp, 32); 102 memcpy(secret, tmp, 32);
124 103
125free_all: 104free_all:
126 kzfree(buf);
127free_req:
128 kpp_request_free(req); 105 kpp_request_free(req);
129free_kpp:
130 crypto_free_kpp(tfm);
131free_tmp: 106free_tmp:
132 kfree(tmp); 107 kzfree(tmp);
133 return (err == 0); 108 return err;
134} 109}
135 110
136bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32]) 111/* set_ecdh_privkey() - set or generate ecc private key.
112 *
113 * Function generates an ecc private key in the crypto subsystem when receiving
114 * a NULL private key or sets the received key when not NULL.
115 *
116 * @tfm: KPP tfm handle allocated with crypto_alloc_kpp().
117 * @private_key: user's ecc private key. When not NULL, the key is expected
118 * in little endian format.
119 *
120 * Return: zero on success; error code in case of error.
121 */
122int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 private_key[32])
123{
124 u8 *buf, *tmp = NULL;
125 unsigned int buf_len;
126 int err;
127 struct ecdh p = {0};
128
129 p.curve_id = ECC_CURVE_NIST_P256;
130
131 if (private_key) {
132 tmp = kmalloc(32, GFP_KERNEL);
133 if (!tmp)
134 return -ENOMEM;
135 swap_digits((u64 *)private_key, (u64 *)tmp, 4);
136 p.key = tmp;
137 p.key_size = 32;
138 }
139
140 buf_len = crypto_ecdh_key_len(&p);
141 buf = kmalloc(buf_len, GFP_KERNEL);
142 if (!buf) {
143 err = -ENOMEM;
144 goto free_tmp;
145 }
146
147 err = crypto_ecdh_encode_key(buf, buf_len, &p);
148 if (err)
149 goto free_all;
150
151 err = crypto_kpp_set_secret(tfm, buf, buf_len);
152 /* fall through */
153free_all:
154 kzfree(buf);
155free_tmp:
156 kzfree(tmp);
157 return err;
158}
159
160/* generate_ecdh_public_key() - function assumes that the private key was
161 * already set.
162 *
163 * @tfm: KPP tfm handle allocated with crypto_alloc_kpp().
164 * @public_key: memory where the computed ecc public key will be saved.
165 *
166 * Return: zero on success; error code in case of error.
167 */
168int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64])
137{ 169{
138 struct crypto_kpp *tfm;
139 struct kpp_request *req; 170 struct kpp_request *req;
140 struct ecdh p; 171 u8 *tmp;
141 struct ecdh_completion result; 172 struct ecdh_completion result;
142 struct scatterlist dst; 173 struct scatterlist dst;
143 u8 *tmp, *buf; 174 int err;
144 unsigned int buf_len;
145 int err = -ENOMEM;
146 const unsigned short max_tries = 16;
147 unsigned short tries = 0;
148 175
149 tmp = kmalloc(64, GFP_KERNEL); 176 tmp = kmalloc(64, GFP_KERNEL);
150 if (!tmp) 177 if (!tmp)
151 return false; 178 return -ENOMEM;
152 179
153 tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); 180 req = kpp_request_alloc(tfm, GFP_KERNEL);
154 if (IS_ERR(tfm)) { 181 if (!req) {
155 pr_err("alg: kpp: Failed to load tfm for kpp: %ld\n", 182 err = -ENOMEM;
156 PTR_ERR(tfm));
157 goto free_tmp; 183 goto free_tmp;
158 } 184 }
159 185
160 req = kpp_request_alloc(tfm, GFP_KERNEL);
161 if (!req)
162 goto free_kpp;
163
164 init_completion(&result.completion); 186 init_completion(&result.completion);
187 sg_init_one(&dst, tmp, 64);
188 kpp_request_set_input(req, NULL, 0);
189 kpp_request_set_output(req, &dst, 64);
190 kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
191 ecdh_complete, &result);
165 192
166 /* Set curve_id */ 193 err = crypto_kpp_generate_public_key(req);
167 p.curve_id = ECC_CURVE_NIST_P256; 194 if (err == -EINPROGRESS) {
168 p.key_size = 32; 195 wait_for_completion(&result.completion);
169 buf_len = crypto_ecdh_key_len(&p); 196 err = result.err;
170 buf = kmalloc(buf_len, GFP_KERNEL); 197 }
171 if (!buf) 198 if (err < 0)
172 goto free_req; 199 goto free_all;
173 200
174 do { 201 /* The public key is handed back in little endian as expected by
175 if (tries++ >= max_tries) 202 * the Security Manager Protocol.
176 goto free_all;
177
178 /* Set private Key */
179 p.key = (char *)private_key;
180 crypto_ecdh_encode_key(buf, buf_len, &p);
181 err = crypto_kpp_set_secret(tfm, buf, buf_len);
182 if (err)
183 goto free_all;
184
185 sg_init_one(&dst, tmp, 64);
186 kpp_request_set_input(req, NULL, 0);
187 kpp_request_set_output(req, &dst, 64);
188 kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
189 ecdh_complete, &result);
190
191 err = crypto_kpp_generate_public_key(req);
192
193 if (err == -EINPROGRESS) {
194 wait_for_completion(&result.completion);
195 err = result.err;
196 }
197
198 /* Private key is not valid. Regenerate */
199 if (err == -EINVAL)
200 continue;
201
202 if (err < 0)
203 goto free_all;
204 else
205 break;
206
207 } while (true);
208
209 /* Keys are handed back in little endian as expected by Security
210 * Manager Protocol
211 */ 203 */
212 swap_digits((u64 *)tmp, (u64 *)public_key, 4); /* x */ 204 swap_digits((u64 *)tmp, (u64 *)public_key, 4); /* x */
213 swap_digits((u64 *)&tmp[32], (u64 *)&public_key[32], 4); /* y */ 205 swap_digits((u64 *)&tmp[32], (u64 *)&public_key[32], 4); /* y */
214 swap_digits((u64 *)private_key, (u64 *)tmp, 4);
215 memcpy(private_key, tmp, 32);
216 206
217free_all: 207free_all:
218 kzfree(buf);
219free_req:
220 kpp_request_free(req); 208 kpp_request_free(req);
221free_kpp:
222 crypto_free_kpp(tfm);
223free_tmp: 209free_tmp:
224 kfree(tmp); 210 kfree(tmp);
225 return (err == 0); 211 return err;
212}
213
214/* generate_ecdh_keys() - generate ecc key pair.
215 *
216 * @tfm: KPP tfm handle allocated with crypto_alloc_kpp().
217 * @public_key: memory where the computed ecc public key will be saved.
218 *
219 * Return: zero on success; error code in case of error.
220 */
221int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64])
222{
223 int err;
224
225 err = set_ecdh_privkey(tfm, NULL);
226 if (err)
227 return err;
228
229 return generate_ecdh_public_key(tfm, public_key);
226} 230}
diff --git a/net/bluetooth/ecdh_helper.h b/net/bluetooth/ecdh_helper.h
index 7a423faf76e5..a6f8d03d4aaf 100644
--- a/net/bluetooth/ecdh_helper.h
+++ b/net/bluetooth/ecdh_helper.h
@@ -20,8 +20,11 @@
20 * COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 20 * COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
21 * SOFTWARE IS DISCLAIMED. 21 * SOFTWARE IS DISCLAIMED.
22 */ 22 */
23#include <crypto/kpp.h>
23#include <linux/types.h> 24#include <linux/types.h>
24 25
25bool compute_ecdh_secret(const u8 pub_a[64], const u8 priv_b[32], 26int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 pair_public_key[64],
26 u8 secret[32]); 27 u8 secret[32]);
27bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32]); 28int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 *private_key);
29int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64]);
30int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64]);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index dc59eae54717..a9682534c377 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -729,8 +729,8 @@ static void create_le_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode)
729 goto done; 729 goto done;
730 } 730 }
731 731
732 BT_ERR("HCI request failed to create LE connection: status 0x%2.2x", 732 bt_dev_err(hdev, "request failed to create LE connection: "
733 status); 733 "status 0x%2.2x", status);
734 734
735 if (!conn) 735 if (!conn)
736 goto done; 736 goto done;
@@ -907,7 +907,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
907 */ 907 */
908 if (hci_dev_test_flag(hdev, HCI_LE_SCAN) && 908 if (hci_dev_test_flag(hdev, HCI_LE_SCAN) &&
909 hdev->le_scan_type == LE_SCAN_ACTIVE) { 909 hdev->le_scan_type == LE_SCAN_ACTIVE) {
910 skb_queue_purge(&req.cmd_q); 910 hci_req_purge(&req);
911 hci_conn_del(conn); 911 hci_conn_del(conn);
912 return ERR_PTR(-EBUSY); 912 return ERR_PTR(-EBUSY);
913 } 913 }
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 6bc679cd3481..40d260f2bea5 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -267,7 +267,7 @@ static int hci_init1_req(struct hci_request *req, unsigned long opt)
267 amp_init1(req); 267 amp_init1(req);
268 break; 268 break;
269 default: 269 default:
270 BT_ERR("Unknown device type %d", hdev->dev_type); 270 bt_dev_err(hdev, "Unknown device type %d", hdev->dev_type);
271 break; 271 break;
272 } 272 }
273 273
@@ -2150,8 +2150,7 @@ static void hci_error_reset(struct work_struct *work)
2150 if (hdev->hw_error) 2150 if (hdev->hw_error)
2151 hdev->hw_error(hdev, hdev->hw_error_code); 2151 hdev->hw_error(hdev, hdev->hw_error_code);
2152 else 2152 else
2153 BT_ERR("%s hardware error 0x%2.2x", hdev->name, 2153 bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code);
2154 hdev->hw_error_code);
2155 2154
2156 if (hci_dev_do_close(hdev)) 2155 if (hci_dev_do_close(hdev))
2157 return; 2156 return;
@@ -2524,9 +2523,9 @@ static void hci_cmd_timeout(struct work_struct *work)
2524 struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data; 2523 struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data;
2525 u16 opcode = __le16_to_cpu(sent->opcode); 2524 u16 opcode = __le16_to_cpu(sent->opcode);
2526 2525
2527 BT_ERR("%s command 0x%4.4x tx timeout", hdev->name, opcode); 2526 bt_dev_err(hdev, "command 0x%4.4x tx timeout", opcode);
2528 } else { 2527 } else {
2529 BT_ERR("%s command tx timeout", hdev->name); 2528 bt_dev_err(hdev, "command tx timeout");
2530 } 2529 }
2531 2530
2532 atomic_set(&hdev->cmd_cnt, 1); 2531 atomic_set(&hdev->cmd_cnt, 1);
@@ -2858,7 +2857,7 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
2858 2857
2859 params = kzalloc(sizeof(*params), GFP_KERNEL); 2858 params = kzalloc(sizeof(*params), GFP_KERNEL);
2860 if (!params) { 2859 if (!params) {
2861 BT_ERR("Out of memory"); 2860 bt_dev_err(hdev, "out of memory");
2862 return NULL; 2861 return NULL;
2863 } 2862 }
2864 2863
@@ -3393,7 +3392,7 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
3393 3392
3394 err = hdev->send(hdev, skb); 3393 err = hdev->send(hdev, skb);
3395 if (err < 0) { 3394 if (err < 0) {
3396 BT_ERR("%s sending frame failed (%d)", hdev->name, err); 3395 bt_dev_err(hdev, "sending frame failed (%d)", err);
3397 kfree_skb(skb); 3396 kfree_skb(skb);
3398 } 3397 }
3399} 3398}
@@ -3408,7 +3407,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
3408 3407
3409 skb = hci_prepare_cmd(hdev, opcode, plen, param); 3408 skb = hci_prepare_cmd(hdev, opcode, plen, param);
3410 if (!skb) { 3409 if (!skb) {
3411 BT_ERR("%s no memory for command", hdev->name); 3410 bt_dev_err(hdev, "no memory for command");
3412 return -ENOMEM; 3411 return -ENOMEM;
3413 } 3412 }
3414 3413
@@ -3493,7 +3492,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue,
3493 hci_add_acl_hdr(skb, chan->handle, flags); 3492 hci_add_acl_hdr(skb, chan->handle, flags);
3494 break; 3493 break;
3495 default: 3494 default:
3496 BT_ERR("%s unknown dev_type %d", hdev->name, hdev->dev_type); 3495 bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type);
3497 return; 3496 return;
3498 } 3497 }
3499 3498
@@ -3618,7 +3617,7 @@ static struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type,
3618 break; 3617 break;
3619 default: 3618 default:
3620 cnt = 0; 3619 cnt = 0;
3621 BT_ERR("Unknown link type"); 3620 bt_dev_err(hdev, "unknown link type %d", conn->type);
3622 } 3621 }
3623 3622
3624 q = cnt / num; 3623 q = cnt / num;
@@ -3635,15 +3634,15 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
3635 struct hci_conn_hash *h = &hdev->conn_hash; 3634 struct hci_conn_hash *h = &hdev->conn_hash;
3636 struct hci_conn *c; 3635 struct hci_conn *c;
3637 3636
3638 BT_ERR("%s link tx timeout", hdev->name); 3637 bt_dev_err(hdev, "link tx timeout");
3639 3638
3640 rcu_read_lock(); 3639 rcu_read_lock();
3641 3640
3642 /* Kill stalled connections */ 3641 /* Kill stalled connections */
3643 list_for_each_entry_rcu(c, &h->list, list) { 3642 list_for_each_entry_rcu(c, &h->list, list) {
3644 if (c->type == type && c->sent) { 3643 if (c->type == type && c->sent) {
3645 BT_ERR("%s killing stalled connection %pMR", 3644 bt_dev_err(hdev, "killing stalled connection %pMR",
3646 hdev->name, &c->dst); 3645 &c->dst);
3647 hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM); 3646 hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM);
3648 } 3647 }
3649 } 3648 }
@@ -3724,7 +3723,7 @@ static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type,
3724 break; 3723 break;
3725 default: 3724 default:
3726 cnt = 0; 3725 cnt = 0;
3727 BT_ERR("Unknown link type"); 3726 bt_dev_err(hdev, "unknown link type %d", chan->conn->type);
3728 } 3727 }
3729 3728
3730 q = cnt / num; 3729 q = cnt / num;
@@ -4066,8 +4065,8 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb)
4066 l2cap_recv_acldata(conn, skb, flags); 4065 l2cap_recv_acldata(conn, skb, flags);
4067 return; 4066 return;
4068 } else { 4067 } else {
4069 BT_ERR("%s ACL packet for unknown connection handle %d", 4068 bt_dev_err(hdev, "ACL packet for unknown connection handle %d",
4070 hdev->name, handle); 4069 handle);
4071 } 4070 }
4072 4071
4073 kfree_skb(skb); 4072 kfree_skb(skb);
@@ -4097,8 +4096,8 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
4097 sco_recv_scodata(conn, skb); 4096 sco_recv_scodata(conn, skb);
4098 return; 4097 return;
4099 } else { 4098 } else {
4100 BT_ERR("%s SCO packet for unknown connection handle %d", 4099 bt_dev_err(hdev, "SCO packet for unknown connection handle %d",
4101 hdev->name, handle); 4100 handle);
4102 } 4101 }
4103 4102
4104 kfree_skb(skb); 4103 kfree_skb(skb);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0b4dba08a14e..cd3bbb766c24 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1188,7 +1188,8 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
1188 break; 1188 break;
1189 1189
1190 default: 1190 default:
1191 BT_ERR("Used reserved LE_Scan_Enable param %d", cp->enable); 1191 bt_dev_err(hdev, "use of reserved LE_Scan_Enable param %d",
1192 cp->enable);
1192 break; 1193 break;
1193 } 1194 }
1194 1195
@@ -1485,7 +1486,7 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
1485 conn = hci_conn_add(hdev, ACL_LINK, &cp->bdaddr, 1486 conn = hci_conn_add(hdev, ACL_LINK, &cp->bdaddr,
1486 HCI_ROLE_MASTER); 1487 HCI_ROLE_MASTER);
1487 if (!conn) 1488 if (!conn)
1488 BT_ERR("No memory for new connection"); 1489 bt_dev_err(hdev, "no memory for new connection");
1489 } 1490 }
1490 } 1491 }
1491 1492
@@ -2269,7 +2270,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
2269 conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr, 2270 conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr,
2270 HCI_ROLE_SLAVE); 2271 HCI_ROLE_SLAVE);
2271 if (!conn) { 2272 if (!conn) {
2272 BT_ERR("No memory for new connection"); 2273 bt_dev_err(hdev, "no memory for new connection");
2273 hci_dev_unlock(hdev); 2274 hci_dev_unlock(hdev);
2274 return; 2275 return;
2275 } 2276 }
@@ -2431,7 +2432,7 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
2431 2432
2432 if (!hci_conn_ssp_enabled(conn) && 2433 if (!hci_conn_ssp_enabled(conn) &&
2433 test_bit(HCI_CONN_REAUTH_PEND, &conn->flags)) { 2434 test_bit(HCI_CONN_REAUTH_PEND, &conn->flags)) {
2434 BT_INFO("re-auth of legacy device is not possible."); 2435 bt_dev_info(hdev, "re-auth of legacy device is not possible.");
2435 } else { 2436 } else {
2436 set_bit(HCI_CONN_AUTH, &conn->flags); 2437 set_bit(HCI_CONN_AUTH, &conn->flags);
2437 conn->sec_level = conn->pending_sec_level; 2438 conn->sec_level = conn->pending_sec_level;
@@ -2535,8 +2536,7 @@ static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status,
2535 BT_DBG("%s status 0x%02x", hdev->name, status); 2536 BT_DBG("%s status 0x%02x", hdev->name, status);
2536 2537
2537 if (!skb || skb->len < sizeof(*rp)) { 2538 if (!skb || skb->len < sizeof(*rp)) {
2538 BT_ERR("%s invalid HCI Read Encryption Key Size response", 2539 bt_dev_err(hdev, "invalid read key size response");
2539 hdev->name);
2540 return; 2540 return;
2541 } 2541 }
2542 2542
@@ -2554,8 +2554,8 @@ static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status,
2554 * supported. 2554 * supported.
2555 */ 2555 */
2556 if (rp->status) { 2556 if (rp->status) {
2557 BT_ERR("%s failed to read key size for handle %u", hdev->name, 2557 bt_dev_err(hdev, "failed to read key size for handle %u",
2558 handle); 2558 handle);
2559 conn->enc_key_size = HCI_LINK_KEY_SIZE; 2559 conn->enc_key_size = HCI_LINK_KEY_SIZE;
2560 } else { 2560 } else {
2561 conn->enc_key_size = rp->key_size; 2561 conn->enc_key_size = rp->key_size;
@@ -2664,7 +2664,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
2664 hci_req_add(&req, HCI_OP_READ_ENC_KEY_SIZE, sizeof(cp), &cp); 2664 hci_req_add(&req, HCI_OP_READ_ENC_KEY_SIZE, sizeof(cp), &cp);
2665 2665
2666 if (hci_req_run_skb(&req, read_enc_key_size_complete)) { 2666 if (hci_req_run_skb(&req, read_enc_key_size_complete)) {
2667 BT_ERR("Sending HCI Read Encryption Key Size failed"); 2667 bt_dev_err(hdev, "sending read key size failed");
2668 conn->enc_key_size = HCI_LINK_KEY_SIZE; 2668 conn->enc_key_size = HCI_LINK_KEY_SIZE;
2669 goto notify; 2669 goto notify;
2670 } 2670 }
@@ -3197,7 +3197,7 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb)
3197 int i; 3197 int i;
3198 3198
3199 if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_PACKET_BASED) { 3199 if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_PACKET_BASED) {
3200 BT_ERR("Wrong event for mode %d", hdev->flow_ctl_mode); 3200 bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode);
3201 return; 3201 return;
3202 } 3202 }
3203 3203
@@ -3249,7 +3249,8 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb)
3249 break; 3249 break;
3250 3250
3251 default: 3251 default:
3252 BT_ERR("Unknown type %d conn %p", conn->type, conn); 3252 bt_dev_err(hdev, "unknown type %d conn %p",
3253 conn->type, conn);
3253 break; 3254 break;
3254 } 3255 }
3255 } 3256 }
@@ -3271,7 +3272,7 @@ static struct hci_conn *__hci_conn_lookup_handle(struct hci_dev *hdev,
3271 return chan->conn; 3272 return chan->conn;
3272 break; 3273 break;
3273 default: 3274 default:
3274 BT_ERR("%s unknown dev_type %d", hdev->name, hdev->dev_type); 3275 bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type);
3275 break; 3276 break;
3276 } 3277 }
3277 3278
@@ -3284,7 +3285,7 @@ static void hci_num_comp_blocks_evt(struct hci_dev *hdev, struct sk_buff *skb)
3284 int i; 3285 int i;
3285 3286
3286 if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_BLOCK_BASED) { 3287 if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_BLOCK_BASED) {
3287 BT_ERR("Wrong event for mode %d", hdev->flow_ctl_mode); 3288 bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode);
3288 return; 3289 return;
3289 } 3290 }
3290 3291
@@ -3320,7 +3321,8 @@ static void hci_num_comp_blocks_evt(struct hci_dev *hdev, struct sk_buff *skb)
3320 break; 3321 break;
3321 3322
3322 default: 3323 default:
3323 BT_ERR("Unknown type %d conn %p", conn->type, conn); 3324 bt_dev_err(hdev, "unknown type %d conn %p",
3325 conn->type, conn);
3324 break; 3326 break;
3325 } 3327 }
3326 } 3328 }
@@ -4479,7 +4481,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
4479 if (!conn) { 4481 if (!conn) {
4480 conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr, ev->role); 4482 conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr, ev->role);
4481 if (!conn) { 4483 if (!conn) {
4482 BT_ERR("No memory for new connection"); 4484 bt_dev_err(hdev, "no memory for new connection");
4483 goto unlock; 4485 goto unlock;
4484 } 4486 }
4485 4487
@@ -4749,8 +4751,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
4749 case LE_ADV_SCAN_RSP: 4751 case LE_ADV_SCAN_RSP:
4750 break; 4752 break;
4751 default: 4753 default:
4752 BT_ERR_RATELIMITED("Unknown advertising packet type: 0x%02x", 4754 bt_dev_err_ratelimited(hdev, "unknown advertising packet "
4753 type); 4755 "type: 0x%02x", type);
4754 return; 4756 return;
4755 } 4757 }
4756 4758
@@ -4769,8 +4771,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
4769 4771
4770 /* Adjust for actual length */ 4772 /* Adjust for actual length */
4771 if (len != real_len) { 4773 if (len != real_len) {
4772 BT_ERR_RATELIMITED("%s advertising data length corrected", 4774 bt_dev_err_ratelimited(hdev, "advertising data len corrected");
4773 hdev->name);
4774 len = real_len; 4775 len = real_len;
4775 } 4776 }
4776 4777
@@ -5192,7 +5193,7 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
5192 return false; 5193 return false;
5193 5194
5194 if (skb->len < sizeof(*hdr)) { 5195 if (skb->len < sizeof(*hdr)) {
5195 BT_ERR("Too short HCI event"); 5196 bt_dev_err(hdev, "too short HCI event");
5196 return false; 5197 return false;
5197 } 5198 }
5198 5199
@@ -5206,12 +5207,13 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
5206 } 5207 }
5207 5208
5208 if (hdr->evt != HCI_EV_CMD_COMPLETE) { 5209 if (hdr->evt != HCI_EV_CMD_COMPLETE) {
5209 BT_DBG("Last event is not cmd complete (0x%2.2x)", hdr->evt); 5210 bt_dev_err(hdev, "last event is not cmd complete (0x%2.2x)",
5211 hdr->evt);
5210 return false; 5212 return false;
5211 } 5213 }
5212 5214
5213 if (skb->len < sizeof(*ev)) { 5215 if (skb->len < sizeof(*ev)) {
5214 BT_ERR("Too short cmd_complete event"); 5216 bt_dev_err(hdev, "too short cmd_complete event");
5215 return false; 5217 return false;
5216 } 5218 }
5217 5219
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index b73ac149de34..abc0f3224dd1 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -41,6 +41,11 @@ void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
41 req->err = 0; 41 req->err = 0;
42} 42}
43 43
44void hci_req_purge(struct hci_request *req)
45{
46 skb_queue_purge(&req->cmd_q);
47}
48
44static int req_run(struct hci_request *req, hci_req_complete_t complete, 49static int req_run(struct hci_request *req, hci_req_complete_t complete,
45 hci_req_complete_skb_t complete_skb) 50 hci_req_complete_skb_t complete_skb)
46{ 51{
@@ -331,8 +336,8 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
331 336
332 skb = hci_prepare_cmd(hdev, opcode, plen, param); 337 skb = hci_prepare_cmd(hdev, opcode, plen, param);
333 if (!skb) { 338 if (!skb) {
334 BT_ERR("%s no memory for command (opcode 0x%4.4x)", 339 bt_dev_err(hdev, "no memory for command (opcode 0x%4.4x)",
335 hdev->name, opcode); 340 opcode);
336 req->err = -ENOMEM; 341 req->err = -ENOMEM;
337 return; 342 return;
338 } 343 }
@@ -1421,7 +1426,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
1421 1426
1422 err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); 1427 err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
1423 if (err < 0) { 1428 if (err < 0) {
1424 BT_ERR("%s failed to generate new RPA", hdev->name); 1429 bt_dev_err(hdev, "failed to generate new RPA");
1425 return err; 1430 return err;
1426 } 1431 }
1427 1432
@@ -1783,7 +1788,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
1783 1788
1784 err = hci_req_run(&req, abort_conn_complete); 1789 err = hci_req_run(&req, abort_conn_complete);
1785 if (err && err != -ENODATA) { 1790 if (err && err != -ENODATA) {
1786 BT_ERR("Failed to run HCI request: err %d", err); 1791 bt_dev_err(conn->hdev, "failed to run HCI request: err %d", err);
1787 return err; 1792 return err;
1788 } 1793 }
1789 1794
@@ -1867,7 +1872,8 @@ static void le_scan_disable_work(struct work_struct *work)
1867 1872
1868 hci_req_sync(hdev, le_scan_disable, 0, HCI_CMD_TIMEOUT, &status); 1873 hci_req_sync(hdev, le_scan_disable, 0, HCI_CMD_TIMEOUT, &status);
1869 if (status) { 1874 if (status) {
1870 BT_ERR("Failed to disable LE scan: status 0x%02x", status); 1875 bt_dev_err(hdev, "failed to disable LE scan: status 0x%02x",
1876 status);
1871 return; 1877 return;
1872 } 1878 }
1873 1879
@@ -1898,7 +1904,7 @@ static void le_scan_disable_work(struct work_struct *work)
1898 hci_req_sync(hdev, bredr_inquiry, DISCOV_INTERLEAVED_INQUIRY_LEN, 1904 hci_req_sync(hdev, bredr_inquiry, DISCOV_INTERLEAVED_INQUIRY_LEN,
1899 HCI_CMD_TIMEOUT, &status); 1905 HCI_CMD_TIMEOUT, &status);
1900 if (status) { 1906 if (status) {
1901 BT_ERR("Inquiry failed: status 0x%02x", status); 1907 bt_dev_err(hdev, "inquiry failed: status 0x%02x", status);
1902 goto discov_stopped; 1908 goto discov_stopped;
1903 } 1909 }
1904 1910
@@ -1940,7 +1946,8 @@ static void le_scan_restart_work(struct work_struct *work)
1940 1946
1941 hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status); 1947 hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status);
1942 if (status) { 1948 if (status) {
1943 BT_ERR("Failed to restart LE scan: status %d", status); 1949 bt_dev_err(hdev, "failed to restart LE scan: status %d",
1950 status);
1944 return; 1951 return;
1945 } 1952 }
1946 1953
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index dde77bd59f91..702beb140d9f 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -36,6 +36,7 @@ struct hci_request {
36}; 36};
37 37
38void hci_req_init(struct hci_request *req, struct hci_dev *hdev); 38void hci_req_init(struct hci_request *req, struct hci_dev *hdev);
39void hci_req_purge(struct hci_request *req);
39int hci_req_run(struct hci_request *req, hci_req_complete_t complete); 40int hci_req_run(struct hci_request *req, hci_req_complete_t complete);
40int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete); 41int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete);
41void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, 42void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 0bad296fe0af..923e9a271872 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -251,15 +251,13 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
251} 251}
252 252
253/* Send frame to sockets with specific channel */ 253/* Send frame to sockets with specific channel */
254void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, 254static void __hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
255 int flag, struct sock *skip_sk) 255 int flag, struct sock *skip_sk)
256{ 256{
257 struct sock *sk; 257 struct sock *sk;
258 258
259 BT_DBG("channel %u len %d", channel, skb->len); 259 BT_DBG("channel %u len %d", channel, skb->len);
260 260
261 read_lock(&hci_sk_list.lock);
262
263 sk_for_each(sk, &hci_sk_list.head) { 261 sk_for_each(sk, &hci_sk_list.head) {
264 struct sk_buff *nskb; 262 struct sk_buff *nskb;
265 263
@@ -285,6 +283,13 @@ void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
285 kfree_skb(nskb); 283 kfree_skb(nskb);
286 } 284 }
287 285
286}
287
288void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
289 int flag, struct sock *skip_sk)
290{
291 read_lock(&hci_sk_list.lock);
292 __hci_send_to_channel(channel, skb, flag, skip_sk);
288 read_unlock(&hci_sk_list.lock); 293 read_unlock(&hci_sk_list.lock);
289} 294}
290 295
@@ -388,8 +393,8 @@ void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event,
388 hdr->index = index; 393 hdr->index = index;
389 hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); 394 hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);
390 395
391 hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, 396 __hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
392 HCI_SOCK_TRUSTED, NULL); 397 HCI_SOCK_TRUSTED, NULL);
393 kfree_skb(skb); 398 kfree_skb(skb);
394 } 399 }
395 400
@@ -878,7 +883,6 @@ static int hci_sock_release(struct socket *sock)
878 return 0; 883 return 0;
879} 884}
880 885
881#ifdef CONFIG_BT_LEGACY_IOCTL
882static int hci_sock_blacklist_add(struct hci_dev *hdev, void __user *arg) 886static int hci_sock_blacklist_add(struct hci_dev *hdev, void __user *arg)
883{ 887{
884 bdaddr_t bdaddr; 888 bdaddr_t bdaddr;
@@ -1050,7 +1054,6 @@ done:
1050 release_sock(sk); 1054 release_sock(sk);
1051 return err; 1055 return err;
1052} 1056}
1053#endif
1054 1057
1055static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, 1058static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
1056 int addr_len) 1059 int addr_len)
@@ -1971,11 +1974,7 @@ static const struct proto_ops hci_sock_ops = {
1971 .getname = hci_sock_getname, 1974 .getname = hci_sock_getname,
1972 .sendmsg = hci_sock_sendmsg, 1975 .sendmsg = hci_sock_sendmsg,
1973 .recvmsg = hci_sock_recvmsg, 1976 .recvmsg = hci_sock_recvmsg,
1974#ifdef CONFIG_BT_LEGACY_IOCTL
1975 .ioctl = hci_sock_ioctl, 1977 .ioctl = hci_sock_ioctl,
1976#else
1977 .ioctl = sock_no_ioctl,
1978#endif
1979 .poll = datagram_poll, 1978 .poll = datagram_poll,
1980 .listen = sock_no_listen, 1979 .listen = sock_no_listen,
1981 .shutdown = sock_no_shutdown, 1980 .shutdown = sock_no_shutdown,
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index aa300f3a0d51..9874844a95a9 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* Bluetooth HCI driver model support. */ 2/* Bluetooth HCI driver model support. */
2 3
3#include <linux/module.h> 4#include <linux/module.h>
@@ -50,7 +51,7 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
50 dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); 51 dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
51 52
52 if (device_add(&conn->dev) < 0) { 53 if (device_add(&conn->dev) < 0) {
53 BT_ERR("Failed to register connection device"); 54 bt_dev_err(hdev, "failed to register connection device");
54 return; 55 return;
55 } 56 }
56 57
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 8112893037bd..f2cec70d520c 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -398,9 +398,9 @@ static int hidp_raw_request(struct hid_device *hid, unsigned char reportnum,
398 } 398 }
399} 399}
400 400
401static void hidp_idle_timeout(unsigned long arg) 401static void hidp_idle_timeout(struct timer_list *t)
402{ 402{
403 struct hidp_session *session = (struct hidp_session *) arg; 403 struct hidp_session *session = from_timer(session, t, timer);
404 404
405 /* The HIDP user-space API only contains calls to add and remove 405 /* The HIDP user-space API only contains calls to add and remove
406 * devices. There is no way to forward events of any kind. Therefore, 406 * devices. There is no way to forward events of any kind. Therefore,
@@ -944,8 +944,7 @@ static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr,
944 944
945 /* device management */ 945 /* device management */
946 INIT_WORK(&session->dev_init, hidp_session_dev_work); 946 INIT_WORK(&session->dev_init, hidp_session_dev_work);
947 setup_timer(&session->timer, hidp_idle_timeout, 947 timer_setup(&session->timer, hidp_idle_timeout, 0);
948 (unsigned long)session);
949 948
950 /* session data */ 949 /* session data */
951 mutex_init(&session->report_mutex); 950 mutex_init(&session->report_mutex);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 1fba2a03f8ae..6e9fc86d8daf 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2159,8 +2159,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
2159 2159
2160 key_count = __le16_to_cpu(cp->key_count); 2160 key_count = __le16_to_cpu(cp->key_count);
2161 if (key_count > max_key_count) { 2161 if (key_count > max_key_count) {
2162 BT_ERR("load_link_keys: too big key_count value %u", 2162 bt_dev_err(hdev, "load_link_keys: too big key_count value %u",
2163 key_count); 2163 key_count);
2164 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, 2164 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS,
2165 MGMT_STATUS_INVALID_PARAMS); 2165 MGMT_STATUS_INVALID_PARAMS);
2166 } 2166 }
@@ -2168,8 +2168,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
2168 expected_len = sizeof(*cp) + key_count * 2168 expected_len = sizeof(*cp) + key_count *
2169 sizeof(struct mgmt_link_key_info); 2169 sizeof(struct mgmt_link_key_info);
2170 if (expected_len != len) { 2170 if (expected_len != len) {
2171 BT_ERR("load_link_keys: expected %u bytes, got %u bytes", 2171 bt_dev_err(hdev, "load_link_keys: expected %u bytes, got %u bytes",
2172 expected_len, len); 2172 expected_len, len);
2173 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, 2173 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS,
2174 MGMT_STATUS_INVALID_PARAMS); 2174 MGMT_STATUS_INVALID_PARAMS);
2175 } 2175 }
@@ -2561,7 +2561,7 @@ static int pin_code_reply(struct sock *sk, struct hci_dev *hdev, void *data,
2561 2561
2562 memcpy(&ncp.addr, &cp->addr, sizeof(ncp.addr)); 2562 memcpy(&ncp.addr, &cp->addr, sizeof(ncp.addr));
2563 2563
2564 BT_ERR("PIN code is not 16 bytes long"); 2564 bt_dev_err(hdev, "PIN code is not 16 bytes long");
2565 2565
2566 err = send_pin_code_neg_reply(sk, hdev, &ncp); 2566 err = send_pin_code_neg_reply(sk, hdev, &ncp);
2567 if (err >= 0) 2567 if (err >= 0)
@@ -3391,7 +3391,8 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev,
3391 MGMT_OP_ADD_REMOTE_OOB_DATA, 3391 MGMT_OP_ADD_REMOTE_OOB_DATA,
3392 status, &cp->addr, sizeof(cp->addr)); 3392 status, &cp->addr, sizeof(cp->addr));
3393 } else { 3393 } else {
3394 BT_ERR("add_remote_oob_data: invalid length of %u bytes", len); 3394 bt_dev_err(hdev, "add_remote_oob_data: invalid len of %u bytes",
3395 len);
3395 err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, 3396 err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA,
3396 MGMT_STATUS_INVALID_PARAMS); 3397 MGMT_STATUS_INVALID_PARAMS);
3397 } 3398 }
@@ -3604,8 +3605,8 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
3604 3605
3605 uuid_count = __le16_to_cpu(cp->uuid_count); 3606 uuid_count = __le16_to_cpu(cp->uuid_count);
3606 if (uuid_count > max_uuid_count) { 3607 if (uuid_count > max_uuid_count) {
3607 BT_ERR("service_discovery: too big uuid_count value %u", 3608 bt_dev_err(hdev, "service_discovery: too big uuid_count value %u",
3608 uuid_count); 3609 uuid_count);
3609 err = mgmt_cmd_complete(sk, hdev->id, 3610 err = mgmt_cmd_complete(sk, hdev->id,
3610 MGMT_OP_START_SERVICE_DISCOVERY, 3611 MGMT_OP_START_SERVICE_DISCOVERY,
3611 MGMT_STATUS_INVALID_PARAMS, &cp->type, 3612 MGMT_STATUS_INVALID_PARAMS, &cp->type,
@@ -3615,8 +3616,8 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
3615 3616
3616 expected_len = sizeof(*cp) + uuid_count * 16; 3617 expected_len = sizeof(*cp) + uuid_count * 16;
3617 if (expected_len != len) { 3618 if (expected_len != len) {
3618 BT_ERR("service_discovery: expected %u bytes, got %u bytes", 3619 bt_dev_err(hdev, "service_discovery: expected %u bytes, got %u bytes",
3619 expected_len, len); 3620 expected_len, len);
3620 err = mgmt_cmd_complete(sk, hdev->id, 3621 err = mgmt_cmd_complete(sk, hdev->id,
3621 MGMT_OP_START_SERVICE_DISCOVERY, 3622 MGMT_OP_START_SERVICE_DISCOVERY,
3622 MGMT_STATUS_INVALID_PARAMS, &cp->type, 3623 MGMT_STATUS_INVALID_PARAMS, &cp->type,
@@ -3943,7 +3944,7 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status,
3943 err = hci_req_run(&req, enable_advertising_instance); 3944 err = hci_req_run(&req, enable_advertising_instance);
3944 3945
3945 if (err) 3946 if (err)
3946 BT_ERR("Failed to re-configure advertising"); 3947 bt_dev_err(hdev, "failed to re-configure advertising");
3947 3948
3948unlock: 3949unlock:
3949 hci_dev_unlock(hdev); 3950 hci_dev_unlock(hdev);
@@ -4664,15 +4665,16 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
4664 4665
4665 irk_count = __le16_to_cpu(cp->irk_count); 4666 irk_count = __le16_to_cpu(cp->irk_count);
4666 if (irk_count > max_irk_count) { 4667 if (irk_count > max_irk_count) {
4667 BT_ERR("load_irks: too big irk_count value %u", irk_count); 4668 bt_dev_err(hdev, "load_irks: too big irk_count value %u",
4669 irk_count);
4668 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, 4670 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS,
4669 MGMT_STATUS_INVALID_PARAMS); 4671 MGMT_STATUS_INVALID_PARAMS);
4670 } 4672 }
4671 4673
4672 expected_len = sizeof(*cp) + irk_count * sizeof(struct mgmt_irk_info); 4674 expected_len = sizeof(*cp) + irk_count * sizeof(struct mgmt_irk_info);
4673 if (expected_len != len) { 4675 if (expected_len != len) {
4674 BT_ERR("load_irks: expected %u bytes, got %u bytes", 4676 bt_dev_err(hdev, "load_irks: expected %u bytes, got %u bytes",
4675 expected_len, len); 4677 expected_len, len);
4676 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, 4678 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS,
4677 MGMT_STATUS_INVALID_PARAMS); 4679 MGMT_STATUS_INVALID_PARAMS);
4678 } 4680 }
@@ -4745,7 +4747,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
4745 4747
4746 key_count = __le16_to_cpu(cp->key_count); 4748 key_count = __le16_to_cpu(cp->key_count);
4747 if (key_count > max_key_count) { 4749 if (key_count > max_key_count) {
4748 BT_ERR("load_ltks: too big key_count value %u", key_count); 4750 bt_dev_err(hdev, "load_ltks: too big key_count value %u",
4751 key_count);
4749 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, 4752 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS,
4750 MGMT_STATUS_INVALID_PARAMS); 4753 MGMT_STATUS_INVALID_PARAMS);
4751 } 4754 }
@@ -4753,8 +4756,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
4753 expected_len = sizeof(*cp) + key_count * 4756 expected_len = sizeof(*cp) + key_count *
4754 sizeof(struct mgmt_ltk_info); 4757 sizeof(struct mgmt_ltk_info);
4755 if (expected_len != len) { 4758 if (expected_len != len) {
4756 BT_ERR("load_keys: expected %u bytes, got %u bytes", 4759 bt_dev_err(hdev, "load_keys: expected %u bytes, got %u bytes",
4757 expected_len, len); 4760 expected_len, len);
4758 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, 4761 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS,
4759 MGMT_STATUS_INVALID_PARAMS); 4762 MGMT_STATUS_INVALID_PARAMS);
4760 } 4763 }
@@ -4873,14 +4876,15 @@ static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status,
4873 } 4876 }
4874 4877
4875 if (!cp) { 4878 if (!cp) {
4876 BT_ERR("invalid sent_cmd in conn_info response"); 4879 bt_dev_err(hdev, "invalid sent_cmd in conn_info response");
4877 goto unlock; 4880 goto unlock;
4878 } 4881 }
4879 4882
4880 handle = __le16_to_cpu(cp->handle); 4883 handle = __le16_to_cpu(cp->handle);
4881 conn = hci_conn_hash_lookup_handle(hdev, handle); 4884 conn = hci_conn_hash_lookup_handle(hdev, handle);
4882 if (!conn) { 4885 if (!conn) {
4883 BT_ERR("unknown handle (%d) in conn_info response", handle); 4886 bt_dev_err(hdev, "unknown handle (%d) in conn_info response",
4887 handle);
4884 goto unlock; 4888 goto unlock;
4885 } 4889 }
4886 4890
@@ -5477,8 +5481,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
5477 5481
5478 param_count = __le16_to_cpu(cp->param_count); 5482 param_count = __le16_to_cpu(cp->param_count);
5479 if (param_count > max_param_count) { 5483 if (param_count > max_param_count) {
5480 BT_ERR("load_conn_param: too big param_count value %u", 5484 bt_dev_err(hdev, "load_conn_param: too big param_count value %u",
5481 param_count); 5485 param_count);
5482 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, 5486 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM,
5483 MGMT_STATUS_INVALID_PARAMS); 5487 MGMT_STATUS_INVALID_PARAMS);
5484 } 5488 }
@@ -5486,8 +5490,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
5486 expected_len = sizeof(*cp) + param_count * 5490 expected_len = sizeof(*cp) + param_count *
5487 sizeof(struct mgmt_conn_param); 5491 sizeof(struct mgmt_conn_param);
5488 if (expected_len != len) { 5492 if (expected_len != len) {
5489 BT_ERR("load_conn_param: expected %u bytes, got %u bytes", 5493 bt_dev_err(hdev, "load_conn_param: expected %u bytes, got %u bytes",
5490 expected_len, len); 5494 expected_len, len);
5491 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, 5495 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM,
5492 MGMT_STATUS_INVALID_PARAMS); 5496 MGMT_STATUS_INVALID_PARAMS);
5493 } 5497 }
@@ -5512,7 +5516,7 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
5512 } else if (param->addr.type == BDADDR_LE_RANDOM) { 5516 } else if (param->addr.type == BDADDR_LE_RANDOM) {
5513 addr_type = ADDR_LE_DEV_RANDOM; 5517 addr_type = ADDR_LE_DEV_RANDOM;
5514 } else { 5518 } else {
5515 BT_ERR("Ignoring invalid connection parameters"); 5519 bt_dev_err(hdev, "ignoring invalid connection parameters");
5516 continue; 5520 continue;
5517 } 5521 }
5518 5522
@@ -5525,14 +5529,14 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
5525 min, max, latency, timeout); 5529 min, max, latency, timeout);
5526 5530
5527 if (hci_check_conn_params(min, max, latency, timeout) < 0) { 5531 if (hci_check_conn_params(min, max, latency, timeout) < 0) {
5528 BT_ERR("Ignoring invalid connection parameters"); 5532 bt_dev_err(hdev, "ignoring invalid connection parameters");
5529 continue; 5533 continue;
5530 } 5534 }
5531 5535
5532 hci_param = hci_conn_params_add(hdev, &param->addr.bdaddr, 5536 hci_param = hci_conn_params_add(hdev, &param->addr.bdaddr,
5533 addr_type); 5537 addr_type);
5534 if (!hci_param) { 5538 if (!hci_param) {
5535 BT_ERR("Failed to add connection parameters"); 5539 bt_dev_err(hdev, "failed to add connection parameters");
5536 continue; 5540 continue;
5537 } 5541 }
5538 5542
@@ -6383,6 +6387,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
6383 if (skb_queue_empty(&req.cmd_q) || 6387 if (skb_queue_empty(&req.cmd_q) ||
6384 !hdev_is_powered(hdev) || 6388 !hdev_is_powered(hdev) ||
6385 hci_dev_test_flag(hdev, HCI_ADVERTISING)) { 6389 hci_dev_test_flag(hdev, HCI_ADVERTISING)) {
6390 hci_req_purge(&req);
6386 rp.instance = cp->instance; 6391 rp.instance = cp->instance;
6387 err = mgmt_cmd_complete(sk, hdev->id, 6392 err = mgmt_cmd_complete(sk, hdev->id,
6388 MGMT_OP_REMOVE_ADVERTISING, 6393 MGMT_OP_REMOVE_ADVERTISING,
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 4a0b41d75c84..b98225d65e87 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -233,9 +233,9 @@ static int rfcomm_check_security(struct rfcomm_dlc *d)
233 d->out); 233 d->out);
234} 234}
235 235
236static void rfcomm_session_timeout(unsigned long arg) 236static void rfcomm_session_timeout(struct timer_list *t)
237{ 237{
238 struct rfcomm_session *s = (void *) arg; 238 struct rfcomm_session *s = from_timer(s, t, timer);
239 239
240 BT_DBG("session %p state %ld", s, s->state); 240 BT_DBG("session %p state %ld", s, s->state);
241 241
@@ -258,9 +258,9 @@ static void rfcomm_session_clear_timer(struct rfcomm_session *s)
258} 258}
259 259
260/* ---- RFCOMM DLCs ---- */ 260/* ---- RFCOMM DLCs ---- */
261static void rfcomm_dlc_timeout(unsigned long arg) 261static void rfcomm_dlc_timeout(struct timer_list *t)
262{ 262{
263 struct rfcomm_dlc *d = (void *) arg; 263 struct rfcomm_dlc *d = from_timer(d, t, timer);
264 264
265 BT_DBG("dlc %p state %ld", d, d->state); 265 BT_DBG("dlc %p state %ld", d, d->state);
266 266
@@ -307,7 +307,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio)
307 if (!d) 307 if (!d)
308 return NULL; 308 return NULL;
309 309
310 setup_timer(&d->timer, rfcomm_dlc_timeout, (unsigned long)d); 310 timer_setup(&d->timer, rfcomm_dlc_timeout, 0);
311 311
312 skb_queue_head_init(&d->tx_queue); 312 skb_queue_head_init(&d->tx_queue);
313 mutex_init(&d->lock); 313 mutex_init(&d->lock);
@@ -650,7 +650,7 @@ static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state)
650 650
651 BT_DBG("session %p sock %p", s, sock); 651 BT_DBG("session %p sock %p", s, sock);
652 652
653 setup_timer(&s->timer, rfcomm_session_timeout, (unsigned long) s); 653 timer_setup(&s->timer, rfcomm_session_timeout, 0);
654 654
655 INIT_LIST_HEAD(&s->dlcs); 655 INIT_LIST_HEAD(&s->dlcs);
656 s->state = state; 656 s->state = state;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 795e920a3281..08df57665e1f 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -73,9 +73,9 @@ struct sco_pinfo {
73#define SCO_CONN_TIMEOUT (HZ * 40) 73#define SCO_CONN_TIMEOUT (HZ * 40)
74#define SCO_DISCONN_TIMEOUT (HZ * 2) 74#define SCO_DISCONN_TIMEOUT (HZ * 2)
75 75
76static void sco_sock_timeout(unsigned long arg) 76static void sco_sock_timeout(struct timer_list *t)
77{ 77{
78 struct sock *sk = (struct sock *)arg; 78 struct sock *sk = from_timer(sk, t, sk_timer);
79 79
80 BT_DBG("sock %p state %d", sk, sk->sk_state); 80 BT_DBG("sock %p state %d", sk, sk->sk_state);
81 81
@@ -487,7 +487,7 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock,
487 487
488 sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT; 488 sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT;
489 489
490 setup_timer(&sk->sk_timer, sco_sock_timeout, (unsigned long)sk); 490 timer_setup(&sk->sk_timer, sco_sock_timeout, 0);
491 491
492 bt_sock_link(&sco_sk_list, sk); 492 bt_sock_link(&sco_sk_list, sk);
493 return sk; 493 return sk;
diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c
index 34a1227f4391..03e3c89c3046 100644
--- a/net/bluetooth/selftest.c
+++ b/net/bluetooth/selftest.c
@@ -138,12 +138,12 @@ static const u8 dhkey_3[32] __initconst = {
138 0x7c, 0x1c, 0xf9, 0x49, 0xe6, 0xd7, 0xaa, 0x70, 138 0x7c, 0x1c, 0xf9, 0x49, 0xe6, 0xd7, 0xaa, 0x70,
139}; 139};
140 140
141static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32], 141static int __init test_ecdh_sample(struct crypto_kpp *tfm, const u8 priv_a[32],
142 const u8 pub_a[64], const u8 pub_b[64], 142 const u8 priv_b[32], const u8 pub_a[64],
143 const u8 dhkey[32]) 143 const u8 pub_b[64], const u8 dhkey[32])
144{ 144{
145 u8 *tmp, *dhkey_a, *dhkey_b; 145 u8 *tmp, *dhkey_a, *dhkey_b;
146 int ret = 0; 146 int ret;
147 147
148 tmp = kmalloc(64, GFP_KERNEL); 148 tmp = kmalloc(64, GFP_KERNEL);
149 if (!tmp) 149 if (!tmp)
@@ -152,17 +152,30 @@ static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32],
152 dhkey_a = &tmp[0]; 152 dhkey_a = &tmp[0];
153 dhkey_b = &tmp[32]; 153 dhkey_b = &tmp[32];
154 154
155 compute_ecdh_secret(pub_b, priv_a, dhkey_a); 155 ret = set_ecdh_privkey(tfm, priv_a);
156 compute_ecdh_secret(pub_a, priv_b, dhkey_b); 156 if (ret)
157 goto out;
158
159 ret = compute_ecdh_secret(tfm, pub_b, dhkey_a);
160 if (ret)
161 goto out;
157 162
158 if (memcmp(dhkey_a, dhkey, 32)) { 163 if (memcmp(dhkey_a, dhkey, 32)) {
159 ret = -EINVAL; 164 ret = -EINVAL;
160 goto out; 165 goto out;
161 } 166 }
162 167
168 ret = set_ecdh_privkey(tfm, priv_b);
169 if (ret)
170 goto out;
171
172 ret = compute_ecdh_secret(tfm, pub_a, dhkey_b);
173 if (ret)
174 goto out;
175
163 if (memcmp(dhkey_b, dhkey, 32)) 176 if (memcmp(dhkey_b, dhkey, 32))
164 ret = -EINVAL; 177 ret = -EINVAL;
165 178 /* fall through*/
166out: 179out:
167 kfree(tmp); 180 kfree(tmp);
168 return ret; 181 return ret;
@@ -185,30 +198,43 @@ static const struct file_operations test_ecdh_fops = {
185 198
186static int __init test_ecdh(void) 199static int __init test_ecdh(void)
187{ 200{
201 struct crypto_kpp *tfm;
188 ktime_t calltime, delta, rettime; 202 ktime_t calltime, delta, rettime;
189 unsigned long long duration; 203 unsigned long long duration = 0;
190 int err; 204 int err;
191 205
192 calltime = ktime_get(); 206 calltime = ktime_get();
193 207
194 err = test_ecdh_sample(priv_a_1, priv_b_1, pub_a_1, pub_b_1, dhkey_1); 208 tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
209 if (IS_ERR(tfm)) {
210 BT_ERR("Unable to create ECDH crypto context");
211 err = PTR_ERR(tfm);
212 goto done;
213 }
214
215 err = test_ecdh_sample(tfm, priv_a_1, priv_b_1, pub_a_1, pub_b_1,
216 dhkey_1);
195 if (err) { 217 if (err) {
196 BT_ERR("ECDH sample 1 failed"); 218 BT_ERR("ECDH sample 1 failed");
197 goto done; 219 goto done;
198 } 220 }
199 221
200 err = test_ecdh_sample(priv_a_2, priv_b_2, pub_a_2, pub_b_2, dhkey_2); 222 err = test_ecdh_sample(tfm, priv_a_2, priv_b_2, pub_a_2, pub_b_2,
223 dhkey_2);
201 if (err) { 224 if (err) {
202 BT_ERR("ECDH sample 2 failed"); 225 BT_ERR("ECDH sample 2 failed");
203 goto done; 226 goto done;
204 } 227 }
205 228
206 err = test_ecdh_sample(priv_a_3, priv_a_3, pub_a_3, pub_a_3, dhkey_3); 229 err = test_ecdh_sample(tfm, priv_a_3, priv_a_3, pub_a_3, pub_a_3,
230 dhkey_3);
207 if (err) { 231 if (err) {
208 BT_ERR("ECDH sample 3 failed"); 232 BT_ERR("ECDH sample 3 failed");
209 goto done; 233 goto done;
210 } 234 }
211 235
236 crypto_free_kpp(tfm);
237
212 rettime = ktime_get(); 238 rettime = ktime_get();
213 delta = ktime_sub(rettime, calltime); 239 delta = ktime_sub(rettime, calltime);
214 duration = (unsigned long long) ktime_to_ns(delta) >> 10; 240 duration = (unsigned long long) ktime_to_ns(delta) >> 10;
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index a0ef89772c36..01117ae84f1d 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -26,6 +26,7 @@
26#include <crypto/algapi.h> 26#include <crypto/algapi.h>
27#include <crypto/b128ops.h> 27#include <crypto/b128ops.h>
28#include <crypto/hash.h> 28#include <crypto/hash.h>
29#include <crypto/kpp.h>
29 30
30#include <net/bluetooth/bluetooth.h> 31#include <net/bluetooth/bluetooth.h>
31#include <net/bluetooth/hci_core.h> 32#include <net/bluetooth/hci_core.h>
@@ -83,7 +84,6 @@ enum {
83struct smp_dev { 84struct smp_dev {
84 /* Secure Connections OOB data */ 85 /* Secure Connections OOB data */
85 u8 local_pk[64]; 86 u8 local_pk[64];
86 u8 local_sk[32];
87 u8 local_rand[16]; 87 u8 local_rand[16];
88 bool debug_key; 88 bool debug_key;
89 89
@@ -92,6 +92,7 @@ struct smp_dev {
92 92
93 struct crypto_cipher *tfm_aes; 93 struct crypto_cipher *tfm_aes;
94 struct crypto_shash *tfm_cmac; 94 struct crypto_shash *tfm_cmac;
95 struct crypto_kpp *tfm_ecdh;
95}; 96};
96 97
97struct smp_chan { 98struct smp_chan {
@@ -124,13 +125,13 @@ struct smp_chan {
124 125
125 /* Secure Connections variables */ 126 /* Secure Connections variables */
126 u8 local_pk[64]; 127 u8 local_pk[64];
127 u8 local_sk[32];
128 u8 remote_pk[64]; 128 u8 remote_pk[64];
129 u8 dhkey[32]; 129 u8 dhkey[32];
130 u8 mackey[16]; 130 u8 mackey[16];
131 131
132 struct crypto_cipher *tfm_aes; 132 struct crypto_cipher *tfm_aes;
133 struct crypto_shash *tfm_cmac; 133 struct crypto_shash *tfm_cmac;
134 struct crypto_kpp *tfm_ecdh;
134}; 135};
135 136
136/* These debug key values are defined in the SMP section of the core 137/* These debug key values are defined in the SMP section of the core
@@ -565,22 +566,22 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16])
565 566
566 if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) { 567 if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) {
567 BT_DBG("Using debug keys"); 568 BT_DBG("Using debug keys");
569 err = set_ecdh_privkey(smp->tfm_ecdh, debug_sk);
570 if (err)
571 return err;
568 memcpy(smp->local_pk, debug_pk, 64); 572 memcpy(smp->local_pk, debug_pk, 64);
569 memcpy(smp->local_sk, debug_sk, 32);
570 smp->debug_key = true; 573 smp->debug_key = true;
571 } else { 574 } else {
572 while (true) { 575 while (true) {
573 /* Seed private key with random number */ 576 /* Generate key pair for Secure Connections */
574 get_random_bytes(smp->local_sk, 32); 577 err = generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk);
575 578 if (err)
576 /* Generate local key pair for Secure Connections */ 579 return err;
577 if (!generate_ecdh_keys(smp->local_pk, smp->local_sk))
578 return -EIO;
579 580
580 /* This is unlikely, but we need to check that 581 /* This is unlikely, but we need to check that
581 * we didn't accidentially generate a debug key. 582 * we didn't accidentially generate a debug key.
582 */ 583 */
583 if (crypto_memneq(smp->local_sk, debug_sk, 32)) 584 if (crypto_memneq(smp->local_pk, debug_pk, 64))
584 break; 585 break;
585 } 586 }
586 smp->debug_key = false; 587 smp->debug_key = false;
@@ -588,7 +589,6 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16])
588 589
589 SMP_DBG("OOB Public Key X: %32phN", smp->local_pk); 590 SMP_DBG("OOB Public Key X: %32phN", smp->local_pk);
590 SMP_DBG("OOB Public Key Y: %32phN", smp->local_pk + 32); 591 SMP_DBG("OOB Public Key Y: %32phN", smp->local_pk + 32);
591 SMP_DBG("OOB Private Key: %32phN", smp->local_sk);
592 592
593 get_random_bytes(smp->local_rand, 16); 593 get_random_bytes(smp->local_rand, 16);
594 594
@@ -771,6 +771,7 @@ static void smp_chan_destroy(struct l2cap_conn *conn)
771 771
772 crypto_free_cipher(smp->tfm_aes); 772 crypto_free_cipher(smp->tfm_aes);
773 crypto_free_shash(smp->tfm_cmac); 773 crypto_free_shash(smp->tfm_cmac);
774 crypto_free_kpp(smp->tfm_ecdh);
774 775
775 /* Ensure that we don't leave any debug key around if debug key 776 /* Ensure that we don't leave any debug key around if debug key
776 * support hasn't been explicitly enabled. 777 * support hasn't been explicitly enabled.
@@ -995,7 +996,8 @@ static u8 smp_random(struct smp_chan *smp)
995 return SMP_UNSPECIFIED; 996 return SMP_UNSPECIFIED;
996 997
997 if (crypto_memneq(smp->pcnf, confirm, sizeof(smp->pcnf))) { 998 if (crypto_memneq(smp->pcnf, confirm, sizeof(smp->pcnf))) {
998 BT_ERR("Pairing failed (confirmation values mismatch)"); 999 bt_dev_err(hcon->hdev, "pairing failed "
1000 "(confirmation values mismatch)");
999 return SMP_CONFIRM_FAILED; 1001 return SMP_CONFIRM_FAILED;
1000 } 1002 }
1001 1003
@@ -1209,7 +1211,7 @@ static void sc_generate_ltk(struct smp_chan *smp)
1209 1211
1210 key = hci_find_link_key(hdev, &hcon->dst); 1212 key = hci_find_link_key(hdev, &hcon->dst);
1211 if (!key) { 1213 if (!key) {
1212 BT_ERR("%s No Link Key found to generate LTK", hdev->name); 1214 bt_dev_err(hdev, "no Link Key found to generate LTK");
1213 return; 1215 return;
1214 } 1216 }
1215 1217
@@ -1391,16 +1393,19 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
1391 smp->tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); 1393 smp->tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
1392 if (IS_ERR(smp->tfm_aes)) { 1394 if (IS_ERR(smp->tfm_aes)) {
1393 BT_ERR("Unable to create AES crypto context"); 1395 BT_ERR("Unable to create AES crypto context");
1394 kzfree(smp); 1396 goto zfree_smp;
1395 return NULL;
1396 } 1397 }
1397 1398
1398 smp->tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0); 1399 smp->tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
1399 if (IS_ERR(smp->tfm_cmac)) { 1400 if (IS_ERR(smp->tfm_cmac)) {
1400 BT_ERR("Unable to create CMAC crypto context"); 1401 BT_ERR("Unable to create CMAC crypto context");
1401 crypto_free_cipher(smp->tfm_aes); 1402 goto free_cipher;
1402 kzfree(smp); 1403 }
1403 return NULL; 1404
1405 smp->tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
1406 if (IS_ERR(smp->tfm_ecdh)) {
1407 BT_ERR("Unable to create ECDH crypto context");
1408 goto free_shash;
1404 } 1409 }
1405 1410
1406 smp->conn = conn; 1411 smp->conn = conn;
@@ -1413,6 +1418,14 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
1413 hci_conn_hold(conn->hcon); 1418 hci_conn_hold(conn->hcon);
1414 1419
1415 return smp; 1420 return smp;
1421
1422free_shash:
1423 crypto_free_shash(smp->tfm_cmac);
1424free_cipher:
1425 crypto_free_cipher(smp->tfm_aes);
1426zfree_smp:
1427 kzfree(smp);
1428 return NULL;
1416} 1429}
1417 1430
1418static int sc_mackey_and_ltk(struct smp_chan *smp, u8 mackey[16], u8 ltk[16]) 1431static int sc_mackey_and_ltk(struct smp_chan *smp, u8 mackey[16], u8 ltk[16])
@@ -1883,7 +1896,6 @@ static u8 sc_send_public_key(struct smp_chan *smp)
1883 smp_dev = chan->data; 1896 smp_dev = chan->data;
1884 1897
1885 memcpy(smp->local_pk, smp_dev->local_pk, 64); 1898 memcpy(smp->local_pk, smp_dev->local_pk, 64);
1886 memcpy(smp->local_sk, smp_dev->local_sk, 32);
1887 memcpy(smp->lr, smp_dev->local_rand, 16); 1899 memcpy(smp->lr, smp_dev->local_rand, 16);
1888 1900
1889 if (smp_dev->debug_key) 1901 if (smp_dev->debug_key)
@@ -1894,22 +1906,20 @@ static u8 sc_send_public_key(struct smp_chan *smp)
1894 1906
1895 if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) { 1907 if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) {
1896 BT_DBG("Using debug keys"); 1908 BT_DBG("Using debug keys");
1909 if (set_ecdh_privkey(smp->tfm_ecdh, debug_sk))
1910 return SMP_UNSPECIFIED;
1897 memcpy(smp->local_pk, debug_pk, 64); 1911 memcpy(smp->local_pk, debug_pk, 64);
1898 memcpy(smp->local_sk, debug_sk, 32);
1899 set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags); 1912 set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags);
1900 } else { 1913 } else {
1901 while (true) { 1914 while (true) {
1902 /* Seed private key with random number */ 1915 /* Generate key pair for Secure Connections */
1903 get_random_bytes(smp->local_sk, 32); 1916 if (generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk))
1904
1905 /* Generate local key pair for Secure Connections */
1906 if (!generate_ecdh_keys(smp->local_pk, smp->local_sk))
1907 return SMP_UNSPECIFIED; 1917 return SMP_UNSPECIFIED;
1908 1918
1909 /* This is unlikely, but we need to check that 1919 /* This is unlikely, but we need to check that
1910 * we didn't accidentially generate a debug key. 1920 * we didn't accidentially generate a debug key.
1911 */ 1921 */
1912 if (crypto_memneq(smp->local_sk, debug_sk, 32)) 1922 if (crypto_memneq(smp->local_pk, debug_pk, 64))
1913 break; 1923 break;
1914 } 1924 }
1915 } 1925 }
@@ -1917,7 +1927,6 @@ static u8 sc_send_public_key(struct smp_chan *smp)
1917done: 1927done:
1918 SMP_DBG("Local Public Key X: %32phN", smp->local_pk); 1928 SMP_DBG("Local Public Key X: %32phN", smp->local_pk);
1919 SMP_DBG("Local Public Key Y: %32phN", smp->local_pk + 32); 1929 SMP_DBG("Local Public Key Y: %32phN", smp->local_pk + 32);
1920 SMP_DBG("Local Private Key: %32phN", smp->local_sk);
1921 1930
1922 smp_send_cmd(smp->conn, SMP_CMD_PUBLIC_KEY, 64, smp->local_pk); 1931 smp_send_cmd(smp->conn, SMP_CMD_PUBLIC_KEY, 64, smp->local_pk);
1923 1932
@@ -2059,11 +2068,11 @@ static int fixup_sc_false_positive(struct smp_chan *smp)
2059 return SMP_UNSPECIFIED; 2068 return SMP_UNSPECIFIED;
2060 2069
2061 if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) { 2070 if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) {
2062 BT_ERR("Refusing SMP SC -> legacy fallback in SC-only mode"); 2071 bt_dev_err(hdev, "refusing legacy fallback in SC-only mode");
2063 return SMP_UNSPECIFIED; 2072 return SMP_UNSPECIFIED;
2064 } 2073 }
2065 2074
2066 BT_ERR("Trying to fall back to legacy SMP"); 2075 bt_dev_err(hdev, "trying to fall back to legacy SMP");
2067 2076
2068 req = (void *) &smp->preq[1]; 2077 req = (void *) &smp->preq[1];
2069 rsp = (void *) &smp->prsp[1]; 2078 rsp = (void *) &smp->prsp[1];
@@ -2074,7 +2083,7 @@ static int fixup_sc_false_positive(struct smp_chan *smp)
2074 auth = req->auth_req & AUTH_REQ_MASK(hdev); 2083 auth = req->auth_req & AUTH_REQ_MASK(hdev);
2075 2084
2076 if (tk_request(conn, 0, auth, rsp->io_capability, req->io_capability)) { 2085 if (tk_request(conn, 0, auth, rsp->io_capability, req->io_capability)) {
2077 BT_ERR("Failed to fall back to legacy SMP"); 2086 bt_dev_err(hdev, "failed to fall back to legacy SMP");
2078 return SMP_UNSPECIFIED; 2087 return SMP_UNSPECIFIED;
2079 } 2088 }
2080 2089
@@ -2347,7 +2356,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
2347 2356
2348 chan = conn->smp; 2357 chan = conn->smp;
2349 if (!chan) { 2358 if (!chan) {
2350 BT_ERR("SMP security requested but not available"); 2359 bt_dev_err(hcon->hdev, "security requested but not available");
2351 return 1; 2360 return 1;
2352 } 2361 }
2353 2362
@@ -2540,7 +2549,7 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn,
2540 */ 2549 */
2541 if (!bacmp(&info->bdaddr, BDADDR_ANY) || 2550 if (!bacmp(&info->bdaddr, BDADDR_ANY) ||
2542 !hci_is_identity_address(&info->bdaddr, info->addr_type)) { 2551 !hci_is_identity_address(&info->bdaddr, info->addr_type)) {
2543 BT_ERR("Ignoring IRK with no identity address"); 2552 bt_dev_err(hcon->hdev, "ignoring IRK with no identity address");
2544 goto distribute; 2553 goto distribute;
2545 } 2554 }
2546 2555
@@ -2645,6 +2654,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
2645 struct l2cap_chan *chan = conn->smp; 2654 struct l2cap_chan *chan = conn->smp;
2646 struct smp_chan *smp = chan->data; 2655 struct smp_chan *smp = chan->data;
2647 struct hci_dev *hdev = hcon->hdev; 2656 struct hci_dev *hdev = hcon->hdev;
2657 struct crypto_kpp *tfm_ecdh;
2648 struct smp_cmd_pairing_confirm cfm; 2658 struct smp_cmd_pairing_confirm cfm;
2649 int err; 2659 int err;
2650 2660
@@ -2677,7 +2687,18 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
2677 SMP_DBG("Remote Public Key X: %32phN", smp->remote_pk); 2687 SMP_DBG("Remote Public Key X: %32phN", smp->remote_pk);
2678 SMP_DBG("Remote Public Key Y: %32phN", smp->remote_pk + 32); 2688 SMP_DBG("Remote Public Key Y: %32phN", smp->remote_pk + 32);
2679 2689
2680 if (!compute_ecdh_secret(smp->remote_pk, smp->local_sk, smp->dhkey)) 2690 /* Compute the shared secret on the same crypto tfm on which the private
2691 * key was set/generated.
2692 */
2693 if (test_bit(SMP_FLAG_LOCAL_OOB, &smp->flags)) {
2694 struct smp_dev *smp_dev = chan->data;
2695
2696 tfm_ecdh = smp_dev->tfm_ecdh;
2697 } else {
2698 tfm_ecdh = smp->tfm_ecdh;
2699 }
2700
2701 if (compute_ecdh_secret(tfm_ecdh, smp->remote_pk, smp->dhkey))
2681 return SMP_UNSPECIFIED; 2702 return SMP_UNSPECIFIED;
2682 2703
2683 SMP_DBG("DHKey %32phN", smp->dhkey); 2704 SMP_DBG("DHKey %32phN", smp->dhkey);
@@ -2933,8 +2954,8 @@ done:
2933 return err; 2954 return err;
2934 2955
2935drop: 2956drop:
2936 BT_ERR("%s unexpected SMP command 0x%02x from %pMR", hcon->hdev->name, 2957 bt_dev_err(hcon->hdev, "unexpected SMP command 0x%02x from %pMR",
2937 code, &hcon->dst); 2958 code, &hcon->dst);
2938 kfree_skb(skb); 2959 kfree_skb(skb);
2939 return 0; 2960 return 0;
2940} 2961}
@@ -3001,8 +3022,7 @@ static void bredr_pairing(struct l2cap_chan *chan)
3001 3022
3002 smp = smp_chan_create(conn); 3023 smp = smp_chan_create(conn);
3003 if (!smp) { 3024 if (!smp) {
3004 BT_ERR("%s unable to create SMP context for BR/EDR", 3025 bt_dev_err(hdev, "unable to create SMP context for BR/EDR");
3005 hdev->name);
3006 return; 3026 return;
3007 } 3027 }
3008 3028
@@ -3169,6 +3189,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
3169 struct smp_dev *smp; 3189 struct smp_dev *smp;
3170 struct crypto_cipher *tfm_aes; 3190 struct crypto_cipher *tfm_aes;
3171 struct crypto_shash *tfm_cmac; 3191 struct crypto_shash *tfm_cmac;
3192 struct crypto_kpp *tfm_ecdh;
3172 3193
3173 if (cid == L2CAP_CID_SMP_BREDR) { 3194 if (cid == L2CAP_CID_SMP_BREDR) {
3174 smp = NULL; 3195 smp = NULL;
@@ -3194,8 +3215,18 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
3194 return ERR_CAST(tfm_cmac); 3215 return ERR_CAST(tfm_cmac);
3195 } 3216 }
3196 3217
3218 tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
3219 if (IS_ERR(tfm_ecdh)) {
3220 BT_ERR("Unable to create ECDH crypto context");
3221 crypto_free_shash(tfm_cmac);
3222 crypto_free_cipher(tfm_aes);
3223 kzfree(smp);
3224 return ERR_CAST(tfm_ecdh);
3225 }
3226
3197 smp->tfm_aes = tfm_aes; 3227 smp->tfm_aes = tfm_aes;
3198 smp->tfm_cmac = tfm_cmac; 3228 smp->tfm_cmac = tfm_cmac;
3229 smp->tfm_ecdh = tfm_ecdh;
3199 smp->min_key_size = SMP_MIN_ENC_KEY_SIZE; 3230 smp->min_key_size = SMP_MIN_ENC_KEY_SIZE;
3200 smp->max_key_size = SMP_MAX_ENC_KEY_SIZE; 3231 smp->max_key_size = SMP_MAX_ENC_KEY_SIZE;
3201 3232
@@ -3205,6 +3236,7 @@ create_chan:
3205 if (smp) { 3236 if (smp) {
3206 crypto_free_cipher(smp->tfm_aes); 3237 crypto_free_cipher(smp->tfm_aes);
3207 crypto_free_shash(smp->tfm_cmac); 3238 crypto_free_shash(smp->tfm_cmac);
3239 crypto_free_kpp(smp->tfm_ecdh);
3208 kzfree(smp); 3240 kzfree(smp);
3209 } 3241 }
3210 return ERR_PTR(-ENOMEM); 3242 return ERR_PTR(-ENOMEM);
@@ -3252,6 +3284,7 @@ static void smp_del_chan(struct l2cap_chan *chan)
3252 chan->data = NULL; 3284 chan->data = NULL;
3253 crypto_free_cipher(smp->tfm_aes); 3285 crypto_free_cipher(smp->tfm_aes);
3254 crypto_free_shash(smp->tfm_cmac); 3286 crypto_free_shash(smp->tfm_cmac);
3287 crypto_free_kpp(smp->tfm_ecdh);
3255 kzfree(smp); 3288 kzfree(smp);
3256 } 3289 }
3257 3290
@@ -3490,25 +3523,18 @@ void smp_unregister(struct hci_dev *hdev)
3490 3523
3491#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP) 3524#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP)
3492 3525
3493static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits) 3526static int __init test_debug_key(struct crypto_kpp *tfm_ecdh)
3494{ 3527{
3495 int i; 3528 u8 pk[64];
3496 3529 int err;
3497 for (i = 0; i < ndigits; i++)
3498 out[i] = __swab64(in[ndigits - 1 - i]);
3499}
3500
3501static int __init test_debug_key(void)
3502{
3503 u8 pk[64], sk[32];
3504
3505 swap_digits((u64 *)debug_sk, (u64 *)sk, 4);
3506 3530
3507 if (!generate_ecdh_keys(pk, sk)) 3531 err = set_ecdh_privkey(tfm_ecdh, debug_sk);
3508 return -EINVAL; 3532 if (err)
3533 return err;
3509 3534
3510 if (crypto_memneq(sk, debug_sk, 32)) 3535 err = generate_ecdh_public_key(tfm_ecdh, pk);
3511 return -EINVAL; 3536 if (err)
3537 return err;
3512 3538
3513 if (crypto_memneq(pk, debug_pk, 64)) 3539 if (crypto_memneq(pk, debug_pk, 64))
3514 return -EINVAL; 3540 return -EINVAL;
@@ -3763,7 +3789,8 @@ static const struct file_operations test_smp_fops = {
3763}; 3789};
3764 3790
3765static int __init run_selftests(struct crypto_cipher *tfm_aes, 3791static int __init run_selftests(struct crypto_cipher *tfm_aes,
3766 struct crypto_shash *tfm_cmac) 3792 struct crypto_shash *tfm_cmac,
3793 struct crypto_kpp *tfm_ecdh)
3767{ 3794{
3768 ktime_t calltime, delta, rettime; 3795 ktime_t calltime, delta, rettime;
3769 unsigned long long duration; 3796 unsigned long long duration;
@@ -3771,7 +3798,7 @@ static int __init run_selftests(struct crypto_cipher *tfm_aes,
3771 3798
3772 calltime = ktime_get(); 3799 calltime = ktime_get();
3773 3800
3774 err = test_debug_key(); 3801 err = test_debug_key(tfm_ecdh);
3775 if (err) { 3802 if (err) {
3776 BT_ERR("debug_key test failed"); 3803 BT_ERR("debug_key test failed");
3777 goto done; 3804 goto done;
@@ -3848,6 +3875,7 @@ int __init bt_selftest_smp(void)
3848{ 3875{
3849 struct crypto_cipher *tfm_aes; 3876 struct crypto_cipher *tfm_aes;
3850 struct crypto_shash *tfm_cmac; 3877 struct crypto_shash *tfm_cmac;
3878 struct crypto_kpp *tfm_ecdh;
3851 int err; 3879 int err;
3852 3880
3853 tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); 3881 tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
@@ -3863,10 +3891,19 @@ int __init bt_selftest_smp(void)
3863 return PTR_ERR(tfm_cmac); 3891 return PTR_ERR(tfm_cmac);
3864 } 3892 }
3865 3893
3866 err = run_selftests(tfm_aes, tfm_cmac); 3894 tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
3895 if (IS_ERR(tfm_ecdh)) {
3896 BT_ERR("Unable to create ECDH crypto context");
3897 crypto_free_shash(tfm_cmac);
3898 crypto_free_cipher(tfm_aes);
3899 return PTR_ERR(tfm_ecdh);
3900 }
3901
3902 err = run_selftests(tfm_aes, tfm_cmac, tfm_ecdh);
3867 3903
3868 crypto_free_shash(tfm_cmac); 3904 crypto_free_shash(tfm_cmac);
3869 crypto_free_cipher(tfm_aes); 3905 crypto_free_cipher(tfm_aes);
3906 crypto_free_kpp(tfm_ecdh);
3870 3907
3871 return err; 3908 return err;
3872} 3909}
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 6be41a44d688..a86e6687026e 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -133,7 +133,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
133 if (is_l2) 133 if (is_l2)
134 __skb_push(skb, ETH_HLEN); 134 __skb_push(skb, ETH_HLEN);
135 if (is_direct_pkt_access) 135 if (is_direct_pkt_access)
136 bpf_compute_data_end(skb); 136 bpf_compute_data_pointers(skb);
137 retval = bpf_test_run(prog, skb, repeat, &duration); 137 retval = bpf_test_run(prog, skb, repeat, &duration);
138 if (!is_l2) 138 if (!is_l2)
139 __skb_push(skb, ETH_HLEN); 139 __skb_push(skb, ETH_HLEN);
@@ -162,6 +162,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
162 162
163 xdp.data_hard_start = data; 163 xdp.data_hard_start = data;
164 xdp.data = data + XDP_PACKET_HEADROOM + NET_IP_ALIGN; 164 xdp.data = data + XDP_PACKET_HEADROOM + NET_IP_ALIGN;
165 xdp.data_meta = xdp.data;
165 xdp.data_end = xdp.data + size; 166 xdp.data_end = xdp.data + size;
166 167
167 retval = bpf_test_run(prog, &xdp, repeat, &duration); 168 retval = bpf_test_run(prog, &xdp, repeat, &duration);
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index 40b1ede527ca..ac9ef337f0fa 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the IEEE 802.1d ethernet bridging layer. 3# Makefile for the IEEE 802.1d ethernet bridging layer.
3# 4#
@@ -7,7 +8,7 @@ obj-$(CONFIG_BRIDGE) += bridge.o
7bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \ 8bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
8 br_ioctl.o br_stp.o br_stp_bpdu.o \ 9 br_ioctl.o br_stp.o br_stp_bpdu.o \
9 br_stp_if.o br_stp_timer.o br_netlink.o \ 10 br_stp_if.o br_stp_timer.o br_netlink.o \
10 br_netlink_tunnel.o 11 br_netlink_tunnel.o br_arp_nd_proxy.o
11 12
12bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o 13bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
13 14
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 1407d1ba7577..6bf06e756df2 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -112,7 +112,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
112 /* Events that may cause spanning tree to refresh */ 112 /* Events that may cause spanning tree to refresh */
113 if (event == NETDEV_CHANGEADDR || event == NETDEV_UP || 113 if (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
114 event == NETDEV_CHANGE || event == NETDEV_DOWN) 114 event == NETDEV_CHANGE || event == NETDEV_DOWN)
115 br_ifinfo_notify(RTM_NEWLINK, p); 115 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
116 116
117 return NOTIFY_DONE; 117 return NOTIFY_DONE;
118} 118}
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
new file mode 100644
index 000000000000..2cf7716254be
--- /dev/null
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -0,0 +1,469 @@
1/*
2 * Handle bridge arp/nd proxy/suppress
3 *
4 * Copyright (C) 2017 Cumulus Networks
5 * Copyright (c) 2017 Roopa Prabhu <roopa@cumulusnetworks.com>
6 *
7 * Authors:
8 * Roopa Prabhu <roopa@cumulusnetworks.com>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16#include <linux/kernel.h>
17#include <linux/netdevice.h>
18#include <linux/etherdevice.h>
19#include <linux/neighbour.h>
20#include <net/arp.h>
21#include <linux/if_vlan.h>
22#include <linux/inetdevice.h>
23#include <net/addrconf.h>
24#if IS_ENABLED(CONFIG_IPV6)
25#include <net/ip6_checksum.h>
26#endif
27
28#include "br_private.h"
29
30void br_recalculate_neigh_suppress_enabled(struct net_bridge *br)
31{
32 struct net_bridge_port *p;
33 bool neigh_suppress = false;
34
35 list_for_each_entry(p, &br->port_list, list) {
36 if (p->flags & BR_NEIGH_SUPPRESS) {
37 neigh_suppress = true;
38 break;
39 }
40 }
41
42 br->neigh_suppress_enabled = neigh_suppress;
43}
44
45#if IS_ENABLED(CONFIG_INET)
46static void br_arp_send(struct net_bridge *br, struct net_bridge_port *p,
47 struct net_device *dev, __be32 dest_ip, __be32 src_ip,
48 const unsigned char *dest_hw,
49 const unsigned char *src_hw,
50 const unsigned char *target_hw,
51 __be16 vlan_proto, u16 vlan_tci)
52{
53 struct net_bridge_vlan_group *vg;
54 struct sk_buff *skb;
55 u16 pvid;
56
57 netdev_dbg(dev, "arp send dev %s dst %pI4 dst_hw %pM src %pI4 src_hw %pM\n",
58 dev->name, &dest_ip, dest_hw, &src_ip, src_hw);
59
60 if (!vlan_tci) {
61 arp_send(ARPOP_REPLY, ETH_P_ARP, dest_ip, dev, src_ip,
62 dest_hw, src_hw, target_hw);
63 return;
64 }
65
66 skb = arp_create(ARPOP_REPLY, ETH_P_ARP, dest_ip, dev, src_ip,
67 dest_hw, src_hw, target_hw);
68 if (!skb)
69 return;
70
71 if (p)
72 vg = nbp_vlan_group_rcu(p);
73 else
74 vg = br_vlan_group_rcu(br);
75 pvid = br_get_pvid(vg);
76 if (pvid == (vlan_tci & VLAN_VID_MASK))
77 vlan_tci = 0;
78
79 if (vlan_tci)
80 __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
81
82 if (p) {
83 arp_xmit(skb);
84 } else {
85 skb_reset_mac_header(skb);
86 __skb_pull(skb, skb_network_offset(skb));
87 skb->ip_summed = CHECKSUM_UNNECESSARY;
88 skb->pkt_type = PACKET_HOST;
89
90 netif_rx_ni(skb);
91 }
92}
93
94static int br_chk_addr_ip(struct net_device *dev, void *data)
95{
96 __be32 ip = *(__be32 *)data;
97 struct in_device *in_dev;
98 __be32 addr = 0;
99
100 in_dev = __in_dev_get_rcu(dev);
101 if (in_dev)
102 addr = inet_confirm_addr(dev_net(dev), in_dev, 0, ip,
103 RT_SCOPE_HOST);
104
105 if (addr == ip)
106 return 1;
107
108 return 0;
109}
110
111static bool br_is_local_ip(struct net_device *dev, __be32 ip)
112{
113 if (br_chk_addr_ip(dev, &ip))
114 return true;
115
116 /* check if ip is configured on upper dev */
117 if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip, &ip))
118 return true;
119
120 return false;
121}
122
123void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
124 u16 vid, struct net_bridge_port *p)
125{
126 struct net_device *dev = br->dev;
127 struct net_device *vlandev = dev;
128 struct neighbour *n;
129 struct arphdr *parp;
130 u8 *arpptr, *sha;
131 __be32 sip, tip;
132
133 BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
134
135 if ((dev->flags & IFF_NOARP) ||
136 !pskb_may_pull(skb, arp_hdr_len(dev)))
137 return;
138
139 parp = arp_hdr(skb);
140
141 if (parp->ar_pro != htons(ETH_P_IP) ||
142 parp->ar_hln != dev->addr_len ||
143 parp->ar_pln != 4)
144 return;
145
146 arpptr = (u8 *)parp + sizeof(struct arphdr);
147 sha = arpptr;
148 arpptr += dev->addr_len; /* sha */
149 memcpy(&sip, arpptr, sizeof(sip));
150 arpptr += sizeof(sip);
151 arpptr += dev->addr_len; /* tha */
152 memcpy(&tip, arpptr, sizeof(tip));
153
154 if (ipv4_is_loopback(tip) ||
155 ipv4_is_multicast(tip))
156 return;
157
158 if (br->neigh_suppress_enabled) {
159 if (p && (p->flags & BR_NEIGH_SUPPRESS))
160 return;
161 if (ipv4_is_zeronet(sip) || sip == tip) {
162 /* prevent flooding to neigh suppress ports */
163 BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
164 return;
165 }
166 }
167
168 if (parp->ar_op != htons(ARPOP_REQUEST))
169 return;
170
171 if (vid != 0) {
172 vlandev = __vlan_find_dev_deep_rcu(br->dev, skb->vlan_proto,
173 vid);
174 if (!vlandev)
175 return;
176 }
177
178 if (br->neigh_suppress_enabled && br_is_local_ip(vlandev, tip)) {
179 /* its our local ip, so don't proxy reply
180 * and don't forward to neigh suppress ports
181 */
182 BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
183 return;
184 }
185
186 n = neigh_lookup(&arp_tbl, &tip, vlandev);
187 if (n) {
188 struct net_bridge_fdb_entry *f;
189
190 if (!(n->nud_state & NUD_VALID)) {
191 neigh_release(n);
192 return;
193 }
194
195 f = br_fdb_find_rcu(br, n->ha, vid);
196 if (f) {
197 bool replied = false;
198
199 if ((p && (p->flags & BR_PROXYARP)) ||
200 (f->dst && (f->dst->flags & (BR_PROXYARP_WIFI |
201 BR_NEIGH_SUPPRESS)))) {
202 if (!vid)
203 br_arp_send(br, p, skb->dev, sip, tip,
204 sha, n->ha, sha, 0, 0);
205 else
206 br_arp_send(br, p, skb->dev, sip, tip,
207 sha, n->ha, sha,
208 skb->vlan_proto,
209 skb_vlan_tag_get(skb));
210 replied = true;
211 }
212
213 /* If we have replied or as long as we know the
214 * mac, indicate to arp replied
215 */
216 if (replied || br->neigh_suppress_enabled)
217 BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
218 }
219
220 neigh_release(n);
221 }
222}
223#endif
224
225#if IS_ENABLED(CONFIG_IPV6)
226struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *msg)
227{
228 struct nd_msg *m;
229
230 m = skb_header_pointer(skb, skb_network_offset(skb) +
231 sizeof(struct ipv6hdr), sizeof(*msg), msg);
232 if (!m)
233 return NULL;
234
235 if (m->icmph.icmp6_code != 0 ||
236 (m->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION &&
237 m->icmph.icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT))
238 return NULL;
239
240 return m;
241}
242
243static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p,
244 struct sk_buff *request, struct neighbour *n,
245 __be16 vlan_proto, u16 vlan_tci, struct nd_msg *ns)
246{
247 struct net_device *dev = request->dev;
248 struct net_bridge_vlan_group *vg;
249 struct sk_buff *reply;
250 struct nd_msg *na;
251 struct ipv6hdr *pip6;
252 int na_olen = 8; /* opt hdr + ETH_ALEN for target */
253 int ns_olen;
254 int i, len;
255 u8 *daddr;
256 u16 pvid;
257
258 if (!dev)
259 return;
260
261 len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) +
262 sizeof(*na) + na_olen + dev->needed_tailroom;
263
264 reply = alloc_skb(len, GFP_ATOMIC);
265 if (!reply)
266 return;
267
268 reply->protocol = htons(ETH_P_IPV6);
269 reply->dev = dev;
270 skb_reserve(reply, LL_RESERVED_SPACE(dev));
271 skb_push(reply, sizeof(struct ethhdr));
272 skb_set_mac_header(reply, 0);
273
274 daddr = eth_hdr(request)->h_source;
275
276 /* Do we need option processing ? */
277 ns_olen = request->len - (skb_network_offset(request) +
278 sizeof(struct ipv6hdr)) - sizeof(*ns);
279 for (i = 0; i < ns_olen - 1; i += (ns->opt[i + 1] << 3)) {
280 if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) {
281 daddr = ns->opt + i + sizeof(struct nd_opt_hdr);
282 break;
283 }
284 }
285
286 /* Ethernet header */
287 ether_addr_copy(eth_hdr(reply)->h_dest, daddr);
288 ether_addr_copy(eth_hdr(reply)->h_source, n->ha);
289 eth_hdr(reply)->h_proto = htons(ETH_P_IPV6);
290 reply->protocol = htons(ETH_P_IPV6);
291
292 skb_pull(reply, sizeof(struct ethhdr));
293 skb_set_network_header(reply, 0);
294 skb_put(reply, sizeof(struct ipv6hdr));
295
296 /* IPv6 header */
297 pip6 = ipv6_hdr(reply);
298 memset(pip6, 0, sizeof(struct ipv6hdr));
299 pip6->version = 6;
300 pip6->priority = ipv6_hdr(request)->priority;
301 pip6->nexthdr = IPPROTO_ICMPV6;
302 pip6->hop_limit = 255;
303 pip6->daddr = ipv6_hdr(request)->saddr;
304 pip6->saddr = *(struct in6_addr *)n->primary_key;
305
306 skb_pull(reply, sizeof(struct ipv6hdr));
307 skb_set_transport_header(reply, 0);
308
309 na = (struct nd_msg *)skb_put(reply, sizeof(*na) + na_olen);
310
311 /* Neighbor Advertisement */
312 memset(na, 0, sizeof(*na) + na_olen);
313 na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
314 na->icmph.icmp6_router = 0; /* XXX: should be 1 ? */
315 na->icmph.icmp6_override = 1;
316 na->icmph.icmp6_solicited = 1;
317 na->target = ns->target;
318 ether_addr_copy(&na->opt[2], n->ha);
319 na->opt[0] = ND_OPT_TARGET_LL_ADDR;
320 na->opt[1] = na_olen >> 3;
321
322 na->icmph.icmp6_cksum = csum_ipv6_magic(&pip6->saddr,
323 &pip6->daddr,
324 sizeof(*na) + na_olen,
325 IPPROTO_ICMPV6,
326 csum_partial(na, sizeof(*na) + na_olen, 0));
327
328 pip6->payload_len = htons(sizeof(*na) + na_olen);
329
330 skb_push(reply, sizeof(struct ipv6hdr));
331 skb_push(reply, sizeof(struct ethhdr));
332
333 reply->ip_summed = CHECKSUM_UNNECESSARY;
334
335 if (p)
336 vg = nbp_vlan_group_rcu(p);
337 else
338 vg = br_vlan_group_rcu(br);
339 pvid = br_get_pvid(vg);
340 if (pvid == (vlan_tci & VLAN_VID_MASK))
341 vlan_tci = 0;
342
343 if (vlan_tci)
344 __vlan_hwaccel_put_tag(reply, vlan_proto, vlan_tci);
345
346 netdev_dbg(dev, "nd send dev %s dst %pI6 dst_hw %pM src %pI6 src_hw %pM\n",
347 dev->name, &pip6->daddr, daddr, &pip6->saddr, n->ha);
348
349 if (p) {
350 dev_queue_xmit(reply);
351 } else {
352 skb_reset_mac_header(reply);
353 __skb_pull(reply, skb_network_offset(reply));
354 reply->ip_summed = CHECKSUM_UNNECESSARY;
355 reply->pkt_type = PACKET_HOST;
356
357 netif_rx_ni(reply);
358 }
359}
360
361static int br_chk_addr_ip6(struct net_device *dev, void *data)
362{
363 struct in6_addr *addr = (struct in6_addr *)data;
364
365 if (ipv6_chk_addr(dev_net(dev), addr, dev, 0))
366 return 1;
367
368 return 0;
369}
370
371static bool br_is_local_ip6(struct net_device *dev, struct in6_addr *addr)
372
373{
374 if (br_chk_addr_ip6(dev, addr))
375 return true;
376
377 /* check if ip is configured on upper dev */
378 if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip6, addr))
379 return true;
380
381 return false;
382}
383
384void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
385 u16 vid, struct net_bridge_port *p, struct nd_msg *msg)
386{
387 struct net_device *dev = br->dev;
388 struct net_device *vlandev = NULL;
389 struct in6_addr *saddr, *daddr;
390 struct ipv6hdr *iphdr;
391 struct neighbour *n;
392
393 BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
394
395 if (p && (p->flags & BR_NEIGH_SUPPRESS))
396 return;
397
398 if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT &&
399 !msg->icmph.icmp6_solicited) {
400 /* prevent flooding to neigh suppress ports */
401 BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
402 return;
403 }
404
405 if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
406 return;
407
408 iphdr = ipv6_hdr(skb);
409 saddr = &iphdr->saddr;
410 daddr = &iphdr->daddr;
411
412 if (ipv6_addr_any(saddr) || !ipv6_addr_cmp(saddr, daddr)) {
413 /* prevent flooding to neigh suppress ports */
414 BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
415 return;
416 }
417
418 if (vid != 0) {
419 /* build neigh table lookup on the vlan device */
420 vlandev = __vlan_find_dev_deep_rcu(br->dev, skb->vlan_proto,
421 vid);
422 if (!vlandev)
423 return;
424 } else {
425 vlandev = dev;
426 }
427
428 if (br_is_local_ip6(vlandev, &msg->target)) {
429 /* its our own ip, so don't proxy reply
430 * and don't forward to arp suppress ports
431 */
432 BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
433 return;
434 }
435
436 n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, vlandev);
437 if (n) {
438 struct net_bridge_fdb_entry *f;
439
440 if (!(n->nud_state & NUD_VALID)) {
441 neigh_release(n);
442 return;
443 }
444
445 f = br_fdb_find_rcu(br, n->ha, vid);
446 if (f) {
447 bool replied = false;
448
449 if (f->dst && (f->dst->flags & BR_NEIGH_SUPPRESS)) {
450 if (vid != 0)
451 br_nd_send(br, p, skb, n,
452 skb->vlan_proto,
453 skb_vlan_tag_get(skb), msg);
454 else
455 br_nd_send(br, p, skb, n, 0, 0, msg);
456 replied = true;
457 }
458
459 /* If we have replied or as long as we know the
460 * mac, indicate to NEIGH_SUPPRESS ports that we
461 * have replied
462 */
463 if (replied || br->neigh_suppress_enabled)
464 BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
465 }
466 neigh_release(n);
467 }
468}
469#endif
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index f6b6a92f1c48..af5b8c87f590 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -39,6 +39,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
39 struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); 39 struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats);
40 const struct nf_br_ops *nf_ops; 40 const struct nf_br_ops *nf_ops;
41 const unsigned char *dest; 41 const unsigned char *dest;
42 struct ethhdr *eth;
42 u16 vid = 0; 43 u16 vid = 0;
43 44
44 rcu_read_lock(); 45 rcu_read_lock();
@@ -57,11 +58,30 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
57 BR_INPUT_SKB_CB(skb)->brdev = dev; 58 BR_INPUT_SKB_CB(skb)->brdev = dev;
58 59
59 skb_reset_mac_header(skb); 60 skb_reset_mac_header(skb);
61 eth = eth_hdr(skb);
60 skb_pull(skb, ETH_HLEN); 62 skb_pull(skb, ETH_HLEN);
61 63
62 if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid)) 64 if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
63 goto out; 65 goto out;
64 66
67 if (IS_ENABLED(CONFIG_INET) &&
68 (eth->h_proto == htons(ETH_P_ARP) ||
69 eth->h_proto == htons(ETH_P_RARP)) &&
70 br->neigh_suppress_enabled) {
71 br_do_proxy_suppress_arp(skb, br, vid, NULL);
72 } else if (IS_ENABLED(CONFIG_IPV6) &&
73 skb->protocol == htons(ETH_P_IPV6) &&
74 br->neigh_suppress_enabled &&
75 pskb_may_pull(skb, sizeof(struct ipv6hdr) +
76 sizeof(struct nd_msg)) &&
77 ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
78 struct nd_msg *msg, _msg;
79
80 msg = br_is_nd_neigh_msg(skb, &_msg);
81 if (msg)
82 br_do_suppress_nd(skb, br, vid, NULL, msg);
83 }
84
65 dest = eth_hdr(skb)->h_dest; 85 dest = eth_hdr(skb)->h_dest;
66 if (is_broadcast_ether_addr(dest)) { 86 if (is_broadcast_ether_addr(dest)) {
67 br_flood(br, skb, BR_PKT_BROADCAST, false, true); 87 br_flood(br, skb, BR_PKT_BROADCAST, false, true);
@@ -320,12 +340,13 @@ void br_netpoll_disable(struct net_bridge_port *p)
320 340
321#endif 341#endif
322 342
323static int br_add_slave(struct net_device *dev, struct net_device *slave_dev) 343static int br_add_slave(struct net_device *dev, struct net_device *slave_dev,
344 struct netlink_ext_ack *extack)
324 345
325{ 346{
326 struct net_bridge *br = netdev_priv(dev); 347 struct net_bridge *br = netdev_priv(dev);
327 348
328 return br_add_if(br, slave_dev); 349 return br_add_if(br, slave_dev, extack);
329} 350}
330 351
331static int br_del_slave(struct net_device *dev, struct net_device *slave_dev) 352static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
@@ -400,7 +421,7 @@ void br_dev_setup(struct net_device *dev)
400 br->bridge_id.prio[0] = 0x80; 421 br->bridge_id.prio[0] = 0x80;
401 br->bridge_id.prio[1] = 0x00; 422 br->bridge_id.prio[1] = 0x00;
402 423
403 ether_addr_copy(br->group_addr, eth_reserved_addr_base); 424 ether_addr_copy(br->group_addr, eth_stp_addr);
404 425
405 br->stp_enabled = BR_NO_STP; 426 br->stp_enabled = BR_NO_STP;
406 br->group_fwd_mask = BR_GROUPFWD_DEFAULT; 427 br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 48fb17417fac..b4eed113d2ec 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -204,7 +204,7 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
204 /* Do not flood to ports that enable proxy ARP */ 204 /* Do not flood to ports that enable proxy ARP */
205 if (p->flags & BR_PROXYARP) 205 if (p->flags & BR_PROXYARP)
206 continue; 206 continue;
207 if ((p->flags & BR_PROXYARP_WIFI) && 207 if ((p->flags & (BR_PROXYARP_WIFI | BR_NEIGH_SUPPRESS)) &&
208 BR_INPUT_SKB_CB(skb)->proxyarp_replied) 208 BR_INPUT_SKB_CB(skb)->proxyarp_replied)
209 continue; 209 continue;
210 210
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f3aef22931ab..9ba4ed65c52b 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -271,7 +271,7 @@ static void del_nbp(struct net_bridge_port *p)
271 br_stp_disable_port(p); 271 br_stp_disable_port(p);
272 spin_unlock_bh(&br->lock); 272 spin_unlock_bh(&br->lock);
273 273
274 br_ifinfo_notify(RTM_DELLINK, p); 274 br_ifinfo_notify(RTM_DELLINK, NULL, p);
275 275
276 list_del_rcu(&p->list); 276 list_del_rcu(&p->list);
277 if (netdev_get_fwd_headroom(dev) == br->dev->needed_headroom) 277 if (netdev_get_fwd_headroom(dev) == br->dev->needed_headroom)
@@ -310,6 +310,8 @@ void br_dev_delete(struct net_device *dev, struct list_head *head)
310 del_nbp(p); 310 del_nbp(p);
311 } 311 }
312 312
313 br_recalculate_neigh_suppress_enabled(br);
314
313 br_fdb_delete_by_port(br, NULL, 0, 1); 315 br_fdb_delete_by_port(br, NULL, 0, 1);
314 316
315 cancel_delayed_work_sync(&br->gc_work); 317 cancel_delayed_work_sync(&br->gc_work);
@@ -480,7 +482,8 @@ netdev_features_t br_features_recompute(struct net_bridge *br,
480} 482}
481 483
482/* called with RTNL */ 484/* called with RTNL */
483int br_add_if(struct net_bridge *br, struct net_device *dev) 485int br_add_if(struct net_bridge *br, struct net_device *dev,
486 struct netlink_ext_ack *extack)
484{ 487{
485 struct net_bridge_port *p; 488 struct net_bridge_port *p;
486 int err = 0; 489 int err = 0;
@@ -500,16 +503,22 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
500 return -EINVAL; 503 return -EINVAL;
501 504
502 /* No bridging of bridges */ 505 /* No bridging of bridges */
503 if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) 506 if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) {
507 NL_SET_ERR_MSG(extack,
508 "Can not enslave a bridge to a bridge");
504 return -ELOOP; 509 return -ELOOP;
510 }
505 511
506 /* Device is already being bridged */ 512 /* Device is already being bridged */
507 if (br_port_exists(dev)) 513 if (br_port_exists(dev))
508 return -EBUSY; 514 return -EBUSY;
509 515
510 /* No bridging devices that dislike that (e.g. wireless) */ 516 /* No bridging devices that dislike that (e.g. wireless) */
511 if (dev->priv_flags & IFF_DONT_BRIDGE) 517 if (dev->priv_flags & IFF_DONT_BRIDGE) {
518 NL_SET_ERR_MSG(extack,
519 "Device does not allow enslaving to a bridge");
512 return -EOPNOTSUPP; 520 return -EOPNOTSUPP;
521 }
513 522
514 p = new_nbp(br, dev); 523 p = new_nbp(br, dev);
515 if (IS_ERR(p)) 524 if (IS_ERR(p))
@@ -540,7 +549,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
540 549
541 dev->priv_flags |= IFF_BRIDGE_PORT; 550 dev->priv_flags |= IFF_BRIDGE_PORT;
542 551
543 err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL); 552 err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL, extack);
544 if (err) 553 if (err)
545 goto err5; 554 goto err5;
546 555
@@ -580,7 +589,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
580 br_stp_enable_port(p); 589 br_stp_enable_port(p);
581 spin_unlock_bh(&br->lock); 590 spin_unlock_bh(&br->lock);
582 591
583 br_ifinfo_notify(RTM_NEWLINK, p); 592 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
584 593
585 if (changed_addr) 594 if (changed_addr)
586 call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); 595 call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
@@ -653,4 +662,7 @@ void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
653 662
654 if (mask & BR_AUTO_MASK) 663 if (mask & BR_AUTO_MASK)
655 nbp_update_port_count(br); 664 nbp_update_port_count(br);
665
666 if (mask & BR_NEIGH_SUPPRESS)
667 br_recalculate_neigh_suppress_enabled(br);
656} 668}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 7637f58c1226..7f98a7d25866 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -71,62 +71,6 @@ static int br_pass_frame_up(struct sk_buff *skb)
71 br_netif_receive_skb); 71 br_netif_receive_skb);
72} 72}
73 73
74static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
75 u16 vid, struct net_bridge_port *p)
76{
77 struct net_device *dev = br->dev;
78 struct neighbour *n;
79 struct arphdr *parp;
80 u8 *arpptr, *sha;
81 __be32 sip, tip;
82
83 BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
84
85 if ((dev->flags & IFF_NOARP) ||
86 !pskb_may_pull(skb, arp_hdr_len(dev)))
87 return;
88
89 parp = arp_hdr(skb);
90
91 if (parp->ar_pro != htons(ETH_P_IP) ||
92 parp->ar_op != htons(ARPOP_REQUEST) ||
93 parp->ar_hln != dev->addr_len ||
94 parp->ar_pln != 4)
95 return;
96
97 arpptr = (u8 *)parp + sizeof(struct arphdr);
98 sha = arpptr;
99 arpptr += dev->addr_len; /* sha */
100 memcpy(&sip, arpptr, sizeof(sip));
101 arpptr += sizeof(sip);
102 arpptr += dev->addr_len; /* tha */
103 memcpy(&tip, arpptr, sizeof(tip));
104
105 if (ipv4_is_loopback(tip) ||
106 ipv4_is_multicast(tip))
107 return;
108
109 n = neigh_lookup(&arp_tbl, &tip, dev);
110 if (n) {
111 struct net_bridge_fdb_entry *f;
112
113 if (!(n->nud_state & NUD_VALID)) {
114 neigh_release(n);
115 return;
116 }
117
118 f = br_fdb_find_rcu(br, n->ha, vid);
119 if (f && ((p->flags & BR_PROXYARP) ||
120 (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)))) {
121 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip,
122 sha, n->ha, sha);
123 BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
124 }
125
126 neigh_release(n);
127 }
128}
129
130/* note: already called with rcu_read_lock */ 74/* note: already called with rcu_read_lock */
131int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 75int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
132{ 76{
@@ -171,15 +115,29 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
171 115
172 BR_INPUT_SKB_CB(skb)->brdev = br->dev; 116 BR_INPUT_SKB_CB(skb)->brdev = br->dev;
173 117
174 if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP)) 118 if (IS_ENABLED(CONFIG_INET) &&
175 br_do_proxy_arp(skb, br, vid, p); 119 (skb->protocol == htons(ETH_P_ARP) ||
120 skb->protocol == htons(ETH_P_RARP))) {
121 br_do_proxy_suppress_arp(skb, br, vid, p);
122 } else if (IS_ENABLED(CONFIG_IPV6) &&
123 skb->protocol == htons(ETH_P_IPV6) &&
124 br->neigh_suppress_enabled &&
125 pskb_may_pull(skb, sizeof(struct ipv6hdr) +
126 sizeof(struct nd_msg)) &&
127 ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
128 struct nd_msg *msg, _msg;
129
130 msg = br_is_nd_neigh_msg(skb, &_msg);
131 if (msg)
132 br_do_suppress_nd(skb, br, vid, p, msg);
133 }
176 134
177 switch (pkt_type) { 135 switch (pkt_type) {
178 case BR_PKT_MULTICAST: 136 case BR_PKT_MULTICAST:
179 mdst = br_mdb_get(br, skb, vid); 137 mdst = br_mdb_get(br, skb, vid);
180 if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && 138 if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
181 br_multicast_querier_exists(br, eth_hdr(skb))) { 139 br_multicast_querier_exists(br, eth_hdr(skb))) {
182 if ((mdst && mdst->mglist) || 140 if ((mdst && mdst->host_joined) ||
183 br_multicast_is_router(br)) { 141 br_multicast_is_router(br)) {
184 local_rcv = true; 142 local_rcv = true;
185 br->dev->stats.multicast++; 143 br->dev->stats.multicast++;
@@ -289,6 +247,7 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
289 * 247 *
290 * Others reserved for future standardization 248 * Others reserved for future standardization
291 */ 249 */
250 fwd_mask |= p->group_fwd_mask;
292 switch (dest[5]) { 251 switch (dest[5]) {
293 case 0x00: /* Bridge Group Address */ 252 case 0x00: /* Bridge Group Address */
294 /* If STP is turned off, 253 /* If STP is turned off,
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 7970f8540cbb..73b957fd639d 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -98,7 +98,7 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
98 return -EINVAL; 98 return -EINVAL;
99 99
100 if (isadd) 100 if (isadd)
101 ret = br_add_if(br, dev); 101 ret = br_add_if(br, dev, NULL);
102 else 102 else
103 ret = br_del_if(br, dev); 103 ret = br_del_if(br, dev);
104 104
@@ -293,7 +293,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
293 293
294 if (!ret) { 294 if (!ret) {
295 if (p) 295 if (p)
296 br_ifinfo_notify(RTM_NEWLINK, p); 296 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
297 else 297 else
298 netdev_state_change(br->dev); 298 netdev_state_change(br->dev);
299 } 299 }
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index ca01def49af0..b0f4c734900b 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/err.h> 2#include <linux/err.h>
2#include <linux/igmp.h> 3#include <linux/igmp.h>
3#include <linux/kernel.h> 4#include <linux/kernel.h>
@@ -291,6 +292,46 @@ err:
291 kfree(priv); 292 kfree(priv);
292} 293}
293 294
295static void br_mdb_switchdev_host_port(struct net_device *dev,
296 struct net_device *lower_dev,
297 struct br_mdb_entry *entry, int type)
298{
299 struct switchdev_obj_port_mdb mdb = {
300 .obj = {
301 .id = SWITCHDEV_OBJ_ID_HOST_MDB,
302 .flags = SWITCHDEV_F_DEFER,
303 },
304 .vid = entry->vid,
305 };
306
307 if (entry->addr.proto == htons(ETH_P_IP))
308 ip_eth_mc_map(entry->addr.u.ip4, mdb.addr);
309#if IS_ENABLED(CONFIG_IPV6)
310 else
311 ipv6_eth_mc_map(&entry->addr.u.ip6, mdb.addr);
312#endif
313
314 mdb.obj.orig_dev = dev;
315 switch (type) {
316 case RTM_NEWMDB:
317 switchdev_port_obj_add(lower_dev, &mdb.obj);
318 break;
319 case RTM_DELMDB:
320 switchdev_port_obj_del(lower_dev, &mdb.obj);
321 break;
322 }
323}
324
325static void br_mdb_switchdev_host(struct net_device *dev,
326 struct br_mdb_entry *entry, int type)
327{
328 struct net_device *lower_dev;
329 struct list_head *iter;
330
331 netdev_for_each_lower_dev(dev, lower_dev, iter)
332 br_mdb_switchdev_host_port(dev, lower_dev, entry, type);
333}
334
294static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p, 335static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
295 struct br_mdb_entry *entry, int type) 336 struct br_mdb_entry *entry, int type)
296{ 337{
@@ -316,7 +357,7 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
316#endif 357#endif
317 358
318 mdb.obj.orig_dev = port_dev; 359 mdb.obj.orig_dev = port_dev;
319 if (port_dev && type == RTM_NEWMDB) { 360 if (p && port_dev && type == RTM_NEWMDB) {
320 complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC); 361 complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC);
321 if (complete_info) { 362 if (complete_info) {
322 complete_info->port = p; 363 complete_info->port = p;
@@ -326,10 +367,13 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
326 if (switchdev_port_obj_add(port_dev, &mdb.obj)) 367 if (switchdev_port_obj_add(port_dev, &mdb.obj))
327 kfree(complete_info); 368 kfree(complete_info);
328 } 369 }
329 } else if (port_dev && type == RTM_DELMDB) { 370 } else if (p && port_dev && type == RTM_DELMDB) {
330 switchdev_port_obj_del(port_dev, &mdb.obj); 371 switchdev_port_obj_del(port_dev, &mdb.obj);
331 } 372 }
332 373
374 if (!p)
375 br_mdb_switchdev_host(dev, entry, type);
376
333 skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC); 377 skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC);
334 if (!skb) 378 if (!skb)
335 goto errout; 379 goto errout;
@@ -352,7 +396,10 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
352 struct br_mdb_entry entry; 396 struct br_mdb_entry entry;
353 397
354 memset(&entry, 0, sizeof(entry)); 398 memset(&entry, 0, sizeof(entry));
355 entry.ifindex = port->dev->ifindex; 399 if (port)
400 entry.ifindex = port->dev->ifindex;
401 else
402 entry.ifindex = dev->ifindex;
356 entry.addr.proto = group->proto; 403 entry.addr.proto = group->proto;
357 entry.addr.u.ip4 = group->u.ip4; 404 entry.addr.u.ip4 = group->u.ip4;
358#if IS_ENABLED(CONFIG_IPV6) 405#if IS_ENABLED(CONFIG_IPV6)
@@ -654,7 +701,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
654 call_rcu_bh(&p->rcu, br_multicast_free_pg); 701 call_rcu_bh(&p->rcu, br_multicast_free_pg);
655 err = 0; 702 err = 0;
656 703
657 if (!mp->ports && !mp->mglist && 704 if (!mp->ports && !mp->host_joined &&
658 netif_running(br->dev)) 705 netif_running(br->dev))
659 mod_timer(&mp->timer, jiffies); 706 mod_timer(&mp->timer, jiffies);
660 break; 707 break;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 8dc5c8d69bcd..cb4729539b82 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -239,9 +239,9 @@ static void br_multicast_free_group(struct rcu_head *head)
239 kfree(mp); 239 kfree(mp);
240} 240}
241 241
242static void br_multicast_group_expired(unsigned long data) 242static void br_multicast_group_expired(struct timer_list *t)
243{ 243{
244 struct net_bridge_mdb_entry *mp = (void *)data; 244 struct net_bridge_mdb_entry *mp = from_timer(mp, t, timer);
245 struct net_bridge *br = mp->br; 245 struct net_bridge *br = mp->br;
246 struct net_bridge_mdb_htable *mdb; 246 struct net_bridge_mdb_htable *mdb;
247 247
@@ -249,7 +249,8 @@ static void br_multicast_group_expired(unsigned long data)
249 if (!netif_running(br->dev) || timer_pending(&mp->timer)) 249 if (!netif_running(br->dev) || timer_pending(&mp->timer))
250 goto out; 250 goto out;
251 251
252 mp->mglist = false; 252 mp->host_joined = false;
253 br_mdb_notify(br->dev, NULL, &mp->addr, RTM_DELMDB, 0);
253 254
254 if (mp->ports) 255 if (mp->ports)
255 goto out; 256 goto out;
@@ -292,7 +293,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
292 p->flags); 293 p->flags);
293 call_rcu_bh(&p->rcu, br_multicast_free_pg); 294 call_rcu_bh(&p->rcu, br_multicast_free_pg);
294 295
295 if (!mp->ports && !mp->mglist && 296 if (!mp->ports && !mp->host_joined &&
296 netif_running(br->dev)) 297 netif_running(br->dev))
297 mod_timer(&mp->timer, jiffies); 298 mod_timer(&mp->timer, jiffies);
298 299
@@ -302,9 +303,9 @@ static void br_multicast_del_pg(struct net_bridge *br,
302 WARN_ON(1); 303 WARN_ON(1);
303} 304}
304 305
305static void br_multicast_port_group_expired(unsigned long data) 306static void br_multicast_port_group_expired(struct timer_list *t)
306{ 307{
307 struct net_bridge_port_group *pg = (void *)data; 308 struct net_bridge_port_group *pg = from_timer(pg, t, timer);
308 struct net_bridge *br = pg->port->br; 309 struct net_bridge *br = pg->port->br;
309 310
310 spin_lock(&br->multicast_lock); 311 spin_lock(&br->multicast_lock);
@@ -701,8 +702,7 @@ rehash:
701 702
702 mp->br = br; 703 mp->br = br;
703 mp->addr = *group; 704 mp->addr = *group;
704 setup_timer(&mp->timer, br_multicast_group_expired, 705 timer_setup(&mp->timer, br_multicast_group_expired, 0);
705 (unsigned long)mp);
706 706
707 hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]); 707 hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]);
708 mdb->size++; 708 mdb->size++;
@@ -729,8 +729,7 @@ struct net_bridge_port_group *br_multicast_new_port_group(
729 p->flags = flags; 729 p->flags = flags;
730 rcu_assign_pointer(p->next, next); 730 rcu_assign_pointer(p->next, next);
731 hlist_add_head(&p->mglist, &port->mglist); 731 hlist_add_head(&p->mglist, &port->mglist);
732 setup_timer(&p->timer, br_multicast_port_group_expired, 732 timer_setup(&p->timer, br_multicast_port_group_expired, 0);
733 (unsigned long)p);
734 733
735 if (src) 734 if (src)
736 memcpy(p->eth_addr, src, ETH_ALEN); 735 memcpy(p->eth_addr, src, ETH_ALEN);
@@ -775,7 +774,10 @@ static int br_multicast_add_group(struct net_bridge *br,
775 goto err; 774 goto err;
776 775
777 if (!port) { 776 if (!port) {
778 mp->mglist = true; 777 if (!mp->host_joined) {
778 mp->host_joined = true;
779 br_mdb_notify(br->dev, NULL, &mp->addr, RTM_NEWMDB, 0);
780 }
779 mod_timer(&mp->timer, now + br->multicast_membership_interval); 781 mod_timer(&mp->timer, now + br->multicast_membership_interval);
780 goto out; 782 goto out;
781 } 783 }
@@ -843,9 +845,10 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
843} 845}
844#endif 846#endif
845 847
846static void br_multicast_router_expired(unsigned long data) 848static void br_multicast_router_expired(struct timer_list *t)
847{ 849{
848 struct net_bridge_port *port = (void *)data; 850 struct net_bridge_port *port =
851 from_timer(port, t, multicast_router_timer);
849 struct net_bridge *br = port->br; 852 struct net_bridge *br = port->br;
850 853
851 spin_lock(&br->multicast_lock); 854 spin_lock(&br->multicast_lock);
@@ -859,8 +862,32 @@ out:
859 spin_unlock(&br->multicast_lock); 862 spin_unlock(&br->multicast_lock);
860} 863}
861 864
862static void br_multicast_local_router_expired(unsigned long data) 865static void br_mc_router_state_change(struct net_bridge *p,
866 bool is_mc_router)
867{
868 struct switchdev_attr attr = {
869 .orig_dev = p->dev,
870 .id = SWITCHDEV_ATTR_ID_BRIDGE_MROUTER,
871 .flags = SWITCHDEV_F_DEFER,
872 .u.mrouter = is_mc_router,
873 };
874
875 switchdev_port_attr_set(p->dev, &attr);
876}
877
878static void br_multicast_local_router_expired(struct timer_list *t)
863{ 879{
880 struct net_bridge *br = from_timer(br, t, multicast_router_timer);
881
882 spin_lock(&br->multicast_lock);
883 if (br->multicast_router == MDB_RTR_TYPE_DISABLED ||
884 br->multicast_router == MDB_RTR_TYPE_PERM ||
885 timer_pending(&br->multicast_router_timer))
886 goto out;
887
888 br_mc_router_state_change(br, false);
889out:
890 spin_unlock(&br->multicast_lock);
864} 891}
865 892
866static void br_multicast_querier_expired(struct net_bridge *br, 893static void br_multicast_querier_expired(struct net_bridge *br,
@@ -876,17 +903,17 @@ out:
876 spin_unlock(&br->multicast_lock); 903 spin_unlock(&br->multicast_lock);
877} 904}
878 905
879static void br_ip4_multicast_querier_expired(unsigned long data) 906static void br_ip4_multicast_querier_expired(struct timer_list *t)
880{ 907{
881 struct net_bridge *br = (void *)data; 908 struct net_bridge *br = from_timer(br, t, ip4_other_query.timer);
882 909
883 br_multicast_querier_expired(br, &br->ip4_own_query); 910 br_multicast_querier_expired(br, &br->ip4_own_query);
884} 911}
885 912
886#if IS_ENABLED(CONFIG_IPV6) 913#if IS_ENABLED(CONFIG_IPV6)
887static void br_ip6_multicast_querier_expired(unsigned long data) 914static void br_ip6_multicast_querier_expired(struct timer_list *t)
888{ 915{
889 struct net_bridge *br = (void *)data; 916 struct net_bridge *br = from_timer(br, t, ip6_other_query.timer);
890 917
891 br_multicast_querier_expired(br, &br->ip6_own_query); 918 br_multicast_querier_expired(br, &br->ip6_own_query);
892} 919}
@@ -987,17 +1014,17 @@ out:
987 spin_unlock(&br->multicast_lock); 1014 spin_unlock(&br->multicast_lock);
988} 1015}
989 1016
990static void br_ip4_multicast_port_query_expired(unsigned long data) 1017static void br_ip4_multicast_port_query_expired(struct timer_list *t)
991{ 1018{
992 struct net_bridge_port *port = (void *)data; 1019 struct net_bridge_port *port = from_timer(port, t, ip4_own_query.timer);
993 1020
994 br_multicast_port_query_expired(port, &port->ip4_own_query); 1021 br_multicast_port_query_expired(port, &port->ip4_own_query);
995} 1022}
996 1023
997#if IS_ENABLED(CONFIG_IPV6) 1024#if IS_ENABLED(CONFIG_IPV6)
998static void br_ip6_multicast_port_query_expired(unsigned long data) 1025static void br_ip6_multicast_port_query_expired(struct timer_list *t)
999{ 1026{
1000 struct net_bridge_port *port = (void *)data; 1027 struct net_bridge_port *port = from_timer(port, t, ip6_own_query.timer);
1001 1028
1002 br_multicast_port_query_expired(port, &port->ip6_own_query); 1029 br_multicast_port_query_expired(port, &port->ip6_own_query);
1003} 1030}
@@ -1019,13 +1046,13 @@ int br_multicast_add_port(struct net_bridge_port *port)
1019{ 1046{
1020 port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; 1047 port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
1021 1048
1022 setup_timer(&port->multicast_router_timer, br_multicast_router_expired, 1049 timer_setup(&port->multicast_router_timer,
1023 (unsigned long)port); 1050 br_multicast_router_expired, 0);
1024 setup_timer(&port->ip4_own_query.timer, 1051 timer_setup(&port->ip4_own_query.timer,
1025 br_ip4_multicast_port_query_expired, (unsigned long)port); 1052 br_ip4_multicast_port_query_expired, 0);
1026#if IS_ENABLED(CONFIG_IPV6) 1053#if IS_ENABLED(CONFIG_IPV6)
1027 setup_timer(&port->ip6_own_query.timer, 1054 timer_setup(&port->ip6_own_query.timer,
1028 br_ip6_multicast_port_query_expired, (unsigned long)port); 1055 br_ip6_multicast_port_query_expired, 0);
1029#endif 1056#endif
1030 br_mc_disabled_update(port->dev, port->br->multicast_disabled); 1057 br_mc_disabled_update(port->dev, port->br->multicast_disabled);
1031 1058
@@ -1364,9 +1391,12 @@ static void br_multicast_mark_router(struct net_bridge *br,
1364 unsigned long now = jiffies; 1391 unsigned long now = jiffies;
1365 1392
1366 if (!port) { 1393 if (!port) {
1367 if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) 1394 if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) {
1395 if (!timer_pending(&br->multicast_router_timer))
1396 br_mc_router_state_change(br, true);
1368 mod_timer(&br->multicast_router_timer, 1397 mod_timer(&br->multicast_router_timer,
1369 now + br->multicast_querier_interval); 1398 now + br->multicast_querier_interval);
1399 }
1370 return; 1400 return;
1371 } 1401 }
1372 1402
@@ -1451,7 +1481,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
1451 1481
1452 max_delay *= br->multicast_last_member_count; 1482 max_delay *= br->multicast_last_member_count;
1453 1483
1454 if (mp->mglist && 1484 if (mp->host_joined &&
1455 (timer_pending(&mp->timer) ? 1485 (timer_pending(&mp->timer) ?
1456 time_after(mp->timer.expires, now + max_delay) : 1486 time_after(mp->timer.expires, now + max_delay) :
1457 try_to_del_timer_sync(&mp->timer) >= 0)) 1487 try_to_del_timer_sync(&mp->timer) >= 0))
@@ -1535,7 +1565,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1535 goto out; 1565 goto out;
1536 1566
1537 max_delay *= br->multicast_last_member_count; 1567 max_delay *= br->multicast_last_member_count;
1538 if (mp->mglist && 1568 if (mp->host_joined &&
1539 (timer_pending(&mp->timer) ? 1569 (timer_pending(&mp->timer) ?
1540 time_after(mp->timer.expires, now + max_delay) : 1570 time_after(mp->timer.expires, now + max_delay) :
1541 try_to_del_timer_sync(&mp->timer) >= 0)) 1571 try_to_del_timer_sync(&mp->timer) >= 0))
@@ -1596,7 +1626,7 @@ br_multicast_leave_group(struct net_bridge *br,
1596 br_mdb_notify(br->dev, port, group, RTM_DELMDB, 1626 br_mdb_notify(br->dev, port, group, RTM_DELMDB,
1597 p->flags); 1627 p->flags);
1598 1628
1599 if (!mp->ports && !mp->mglist && 1629 if (!mp->ports && !mp->host_joined &&
1600 netif_running(br->dev)) 1630 netif_running(br->dev))
1601 mod_timer(&mp->timer, jiffies); 1631 mod_timer(&mp->timer, jiffies);
1602 } 1632 }
@@ -1636,7 +1666,7 @@ br_multicast_leave_group(struct net_bridge *br,
1636 br->multicast_last_member_interval; 1666 br->multicast_last_member_interval;
1637 1667
1638 if (!port) { 1668 if (!port) {
1639 if (mp->mglist && 1669 if (mp->host_joined &&
1640 (timer_pending(&mp->timer) ? 1670 (timer_pending(&mp->timer) ?
1641 time_after(mp->timer.expires, time) : 1671 time_after(mp->timer.expires, time) :
1642 try_to_del_timer_sync(&mp->timer) >= 0)) { 1672 try_to_del_timer_sync(&mp->timer) >= 0)) {
@@ -1906,17 +1936,17 @@ static void br_multicast_query_expired(struct net_bridge *br,
1906 spin_unlock(&br->multicast_lock); 1936 spin_unlock(&br->multicast_lock);
1907} 1937}
1908 1938
1909static void br_ip4_multicast_query_expired(unsigned long data) 1939static void br_ip4_multicast_query_expired(struct timer_list *t)
1910{ 1940{
1911 struct net_bridge *br = (void *)data; 1941 struct net_bridge *br = from_timer(br, t, ip4_own_query.timer);
1912 1942
1913 br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier); 1943 br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier);
1914} 1944}
1915 1945
1916#if IS_ENABLED(CONFIG_IPV6) 1946#if IS_ENABLED(CONFIG_IPV6)
1917static void br_ip6_multicast_query_expired(unsigned long data) 1947static void br_ip6_multicast_query_expired(struct timer_list *t)
1918{ 1948{
1919 struct net_bridge *br = (void *)data; 1949 struct net_bridge *br = from_timer(br, t, ip6_own_query.timer);
1920 1950
1921 br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier); 1951 br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier);
1922} 1952}
@@ -1951,17 +1981,17 @@ void br_multicast_init(struct net_bridge *br)
1951 br->has_ipv6_addr = 1; 1981 br->has_ipv6_addr = 1;
1952 1982
1953 spin_lock_init(&br->multicast_lock); 1983 spin_lock_init(&br->multicast_lock);
1954 setup_timer(&br->multicast_router_timer, 1984 timer_setup(&br->multicast_router_timer,
1955 br_multicast_local_router_expired, 0); 1985 br_multicast_local_router_expired, 0);
1956 setup_timer(&br->ip4_other_query.timer, 1986 timer_setup(&br->ip4_other_query.timer,
1957 br_ip4_multicast_querier_expired, (unsigned long)br); 1987 br_ip4_multicast_querier_expired, 0);
1958 setup_timer(&br->ip4_own_query.timer, br_ip4_multicast_query_expired, 1988 timer_setup(&br->ip4_own_query.timer,
1959 (unsigned long)br); 1989 br_ip4_multicast_query_expired, 0);
1960#if IS_ENABLED(CONFIG_IPV6) 1990#if IS_ENABLED(CONFIG_IPV6)
1961 setup_timer(&br->ip6_other_query.timer, 1991 timer_setup(&br->ip6_other_query.timer,
1962 br_ip6_multicast_querier_expired, (unsigned long)br); 1992 br_ip6_multicast_querier_expired, 0);
1963 setup_timer(&br->ip6_own_query.timer, br_ip6_multicast_query_expired, 1993 timer_setup(&br->ip6_own_query.timer,
1964 (unsigned long)br); 1994 br_ip6_multicast_query_expired, 0);
1965#endif 1995#endif
1966} 1996}
1967 1997
@@ -2042,9 +2072,14 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
2042 switch (val) { 2072 switch (val) {
2043 case MDB_RTR_TYPE_DISABLED: 2073 case MDB_RTR_TYPE_DISABLED:
2044 case MDB_RTR_TYPE_PERM: 2074 case MDB_RTR_TYPE_PERM:
2075 br_mc_router_state_change(br, val == MDB_RTR_TYPE_PERM);
2045 del_timer(&br->multicast_router_timer); 2076 del_timer(&br->multicast_router_timer);
2046 /* fall through */ 2077 br->multicast_router = val;
2078 err = 0;
2079 break;
2047 case MDB_RTR_TYPE_TEMP_QUERY: 2080 case MDB_RTR_TYPE_TEMP_QUERY:
2081 if (br->multicast_router != MDB_RTR_TYPE_TEMP_QUERY)
2082 br_mc_router_state_change(br, false);
2048 br->multicast_router = val; 2083 br->multicast_router = val;
2049 err = 0; 2084 err = 0;
2050 break; 2085 break;
@@ -2184,6 +2219,18 @@ bool br_multicast_enabled(const struct net_device *dev)
2184} 2219}
2185EXPORT_SYMBOL_GPL(br_multicast_enabled); 2220EXPORT_SYMBOL_GPL(br_multicast_enabled);
2186 2221
2222bool br_multicast_router(const struct net_device *dev)
2223{
2224 struct net_bridge *br = netdev_priv(dev);
2225 bool is_router;
2226
2227 spin_lock_bh(&br->multicast_lock);
2228 is_router = br_multicast_is_router(br);
2229 spin_unlock_bh(&br->multicast_lock);
2230 return is_router;
2231}
2232EXPORT_SYMBOL_GPL(br_multicast_router);
2233
2187int br_multicast_set_querier(struct net_bridge *br, unsigned long val) 2234int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
2188{ 2235{
2189 unsigned long max_delay; 2236 unsigned long max_delay;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 3bc890716c89..d0ef0a8e8831 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -138,6 +138,7 @@ static inline size_t br_port_info_size(void)
138 + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */ 138 + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */
139 + nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */ 139 + nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */
140 + nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */ 140 + nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */
141 + nla_total_size(1) /* IFLA_BRPORT_NEIGH_SUPPRESS */
141 + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */ 142 + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */
142 + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */ 143 + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */
143 + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */ 144 + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */
@@ -152,6 +153,7 @@ static inline size_t br_port_info_size(void)
152#ifdef CONFIG_BRIDGE_IGMP_SNOOPING 153#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
153 + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */ 154 + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */
154#endif 155#endif
156 + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_GROUP_FWD_MASK */
155 + 0; 157 + 0;
156} 158}
157 159
@@ -208,7 +210,10 @@ static int br_port_fill_attrs(struct sk_buff *skb,
208 p->topology_change_ack) || 210 p->topology_change_ack) ||
209 nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending) || 211 nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending) ||
210 nla_put_u8(skb, IFLA_BRPORT_VLAN_TUNNEL, !!(p->flags & 212 nla_put_u8(skb, IFLA_BRPORT_VLAN_TUNNEL, !!(p->flags &
211 BR_VLAN_TUNNEL))) 213 BR_VLAN_TUNNEL)) ||
214 nla_put_u16(skb, IFLA_BRPORT_GROUP_FWD_MASK, p->group_fwd_mask) ||
215 nla_put_u8(skb, IFLA_BRPORT_NEIGH_SUPPRESS,
216 !!(p->flags & BR_NEIGH_SUPPRESS)))
212 return -EMSGSIZE; 217 return -EMSGSIZE;
213 218
214 timerval = br_timer_value(&p->message_age_timer); 219 timerval = br_timer_value(&p->message_age_timer);
@@ -356,14 +361,14 @@ nla_put_failure:
356 * Contains port and master info as well as carrier and bridge state. 361 * Contains port and master info as well as carrier and bridge state.
357 */ 362 */
358static int br_fill_ifinfo(struct sk_buff *skb, 363static int br_fill_ifinfo(struct sk_buff *skb,
359 struct net_bridge_port *port, 364 const struct net_bridge_port *port,
360 u32 pid, u32 seq, int event, unsigned int flags, 365 u32 pid, u32 seq, int event, unsigned int flags,
361 u32 filter_mask, const struct net_device *dev) 366 u32 filter_mask, const struct net_device *dev)
362{ 367{
368 u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
363 struct net_bridge *br; 369 struct net_bridge *br;
364 struct ifinfomsg *hdr; 370 struct ifinfomsg *hdr;
365 struct nlmsghdr *nlh; 371 struct nlmsghdr *nlh;
366 u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
367 372
368 if (port) 373 if (port)
369 br = port->br; 374 br = port->br;
@@ -449,28 +454,36 @@ nla_put_failure:
449 return -EMSGSIZE; 454 return -EMSGSIZE;
450} 455}
451 456
452/* 457/* Notify listeners of a change in bridge or port information */
453 * Notify listeners of a change in port information 458void br_ifinfo_notify(int event, const struct net_bridge *br,
454 */ 459 const struct net_bridge_port *port)
455void br_ifinfo_notify(int event, struct net_bridge_port *port)
456{ 460{
457 struct net *net; 461 u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED;
462 struct net_device *dev;
458 struct sk_buff *skb; 463 struct sk_buff *skb;
459 int err = -ENOBUFS; 464 int err = -ENOBUFS;
460 u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED; 465 struct net *net;
466 u16 port_no = 0;
461 467
462 if (!port) 468 if (WARN_ON(!port && !br))
463 return; 469 return;
464 470
465 net = dev_net(port->dev); 471 if (port) {
466 br_debug(port->br, "port %u(%s) event %d\n", 472 dev = port->dev;
467 (unsigned int)port->port_no, port->dev->name, event); 473 br = port->br;
474 port_no = port->port_no;
475 } else {
476 dev = br->dev;
477 }
468 478
469 skb = nlmsg_new(br_nlmsg_size(port->dev, filter), GFP_ATOMIC); 479 net = dev_net(dev);
480 br_debug(br, "port %u(%s) event %d\n", port_no, dev->name, event);
481
482 skb = nlmsg_new(br_nlmsg_size(dev, filter), GFP_ATOMIC);
470 if (skb == NULL) 483 if (skb == NULL)
471 goto errout; 484 goto errout;
472 485
473 err = br_fill_ifinfo(skb, port, 0, 0, event, 0, filter, port->dev); 486 err = br_fill_ifinfo(skb, port, 0, 0, event, 0, filter, dev);
474 if (err < 0) { 487 if (err < 0) {
475 /* -EMSGSIZE implies BUG in br_nlmsg_size() */ 488 /* -EMSGSIZE implies BUG in br_nlmsg_size() */
476 WARN_ON(err == -EMSGSIZE); 489 WARN_ON(err == -EMSGSIZE);
@@ -483,7 +496,6 @@ errout:
483 rtnl_set_sk_err(net, RTNLGRP_LINK, err); 496 rtnl_set_sk_err(net, RTNLGRP_LINK, err);
484} 497}
485 498
486
487/* 499/*
488 * Dump information about all ports, in response to GETLINK 500 * Dump information about all ports, in response to GETLINK
489 */ 501 */
@@ -501,8 +513,9 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
501} 513}
502 514
503static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, 515static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
504 int cmd, struct bridge_vlan_info *vinfo) 516 int cmd, struct bridge_vlan_info *vinfo, bool *changed)
505{ 517{
518 bool curr_change;
506 int err = 0; 519 int err = 0;
507 520
508 switch (cmd) { 521 switch (cmd) {
@@ -511,22 +524,27 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
511 /* if the MASTER flag is set this will act on the global 524 /* if the MASTER flag is set this will act on the global
512 * per-VLAN entry as well 525 * per-VLAN entry as well
513 */ 526 */
514 err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); 527 err = nbp_vlan_add(p, vinfo->vid, vinfo->flags,
515 if (err) 528 &curr_change);
516 break;
517 } else { 529 } else {
518 vinfo->flags |= BRIDGE_VLAN_INFO_BRENTRY; 530 vinfo->flags |= BRIDGE_VLAN_INFO_BRENTRY;
519 err = br_vlan_add(br, vinfo->vid, vinfo->flags); 531 err = br_vlan_add(br, vinfo->vid, vinfo->flags,
532 &curr_change);
520 } 533 }
534 if (curr_change)
535 *changed = true;
521 break; 536 break;
522 537
523 case RTM_DELLINK: 538 case RTM_DELLINK:
524 if (p) { 539 if (p) {
525 nbp_vlan_delete(p, vinfo->vid); 540 if (!nbp_vlan_delete(p, vinfo->vid))
526 if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) 541 *changed = true;
527 br_vlan_delete(p->br, vinfo->vid); 542
528 } else { 543 if ((vinfo->flags & BRIDGE_VLAN_INFO_MASTER) &&
529 br_vlan_delete(br, vinfo->vid); 544 !br_vlan_delete(p->br, vinfo->vid))
545 *changed = true;
546 } else if (!br_vlan_delete(br, vinfo->vid)) {
547 *changed = true;
530 } 548 }
531 break; 549 break;
532 } 550 }
@@ -537,7 +555,8 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
537static int br_process_vlan_info(struct net_bridge *br, 555static int br_process_vlan_info(struct net_bridge *br,
538 struct net_bridge_port *p, int cmd, 556 struct net_bridge_port *p, int cmd,
539 struct bridge_vlan_info *vinfo_curr, 557 struct bridge_vlan_info *vinfo_curr,
540 struct bridge_vlan_info **vinfo_last) 558 struct bridge_vlan_info **vinfo_last,
559 bool *changed)
541{ 560{
542 if (!vinfo_curr->vid || vinfo_curr->vid >= VLAN_VID_MASK) 561 if (!vinfo_curr->vid || vinfo_curr->vid >= VLAN_VID_MASK)
543 return -EINVAL; 562 return -EINVAL;
@@ -567,22 +586,22 @@ static int br_process_vlan_info(struct net_bridge *br,
567 sizeof(struct bridge_vlan_info)); 586 sizeof(struct bridge_vlan_info));
568 for (v = (*vinfo_last)->vid; v <= vinfo_curr->vid; v++) { 587 for (v = (*vinfo_last)->vid; v <= vinfo_curr->vid; v++) {
569 tmp_vinfo.vid = v; 588 tmp_vinfo.vid = v;
570 err = br_vlan_info(br, p, cmd, &tmp_vinfo); 589 err = br_vlan_info(br, p, cmd, &tmp_vinfo, changed);
571 if (err) 590 if (err)
572 break; 591 break;
573 } 592 }
574 *vinfo_last = NULL; 593 *vinfo_last = NULL;
575 594
576 return 0; 595 return err;
577 } 596 }
578 597
579 return br_vlan_info(br, p, cmd, vinfo_curr); 598 return br_vlan_info(br, p, cmd, vinfo_curr, changed);
580} 599}
581 600
582static int br_afspec(struct net_bridge *br, 601static int br_afspec(struct net_bridge *br,
583 struct net_bridge_port *p, 602 struct net_bridge_port *p,
584 struct nlattr *af_spec, 603 struct nlattr *af_spec,
585 int cmd) 604 int cmd, bool *changed)
586{ 605{
587 struct bridge_vlan_info *vinfo_curr = NULL; 606 struct bridge_vlan_info *vinfo_curr = NULL;
588 struct bridge_vlan_info *vinfo_last = NULL; 607 struct bridge_vlan_info *vinfo_last = NULL;
@@ -602,7 +621,8 @@ static int br_afspec(struct net_bridge *br,
602 return err; 621 return err;
603 err = br_process_vlan_tunnel_info(br, p, cmd, 622 err = br_process_vlan_tunnel_info(br, p, cmd,
604 &tinfo_curr, 623 &tinfo_curr,
605 &tinfo_last); 624 &tinfo_last,
625 changed);
606 if (err) 626 if (err)
607 return err; 627 return err;
608 break; 628 break;
@@ -611,7 +631,7 @@ static int br_afspec(struct net_bridge *br,
611 return -EINVAL; 631 return -EINVAL;
612 vinfo_curr = nla_data(attr); 632 vinfo_curr = nla_data(attr);
613 err = br_process_vlan_info(br, p, cmd, vinfo_curr, 633 err = br_process_vlan_info(br, p, cmd, vinfo_curr,
614 &vinfo_last); 634 &vinfo_last, changed);
615 if (err) 635 if (err)
616 return err; 636 return err;
617 break; 637 break;
@@ -637,6 +657,9 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
637 [IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 }, 657 [IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 },
638 [IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 }, 658 [IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 },
639 [IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 }, 659 [IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 },
660 [IFLA_BRPORT_VLAN_TUNNEL] = { .type = NLA_U8 },
661 [IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 },
662 [IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 },
640}; 663};
641 664
642/* Change the state of the port and notify spanning tree */ 665/* Change the state of the port and notify spanning tree */
@@ -773,6 +796,20 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
773 return err; 796 return err;
774 } 797 }
775#endif 798#endif
799
800 if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) {
801 u16 fwd_mask = nla_get_u16(tb[IFLA_BRPORT_GROUP_FWD_MASK]);
802
803 if (fwd_mask & BR_GROUPFWD_MACPAUSE)
804 return -EINVAL;
805 p->group_fwd_mask = fwd_mask;
806 }
807
808 err = br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_SUPPRESS,
809 BR_NEIGH_SUPPRESS);
810 if (err)
811 return err;
812
776 br_port_flags_change(p, old_flags ^ p->flags); 813 br_port_flags_change(p, old_flags ^ p->flags);
777 return 0; 814 return 0;
778} 815}
@@ -780,10 +817,12 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
780/* Change state and parameters on port. */ 817/* Change state and parameters on port. */
781int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) 818int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
782{ 819{
820 struct net_bridge *br = (struct net_bridge *)netdev_priv(dev);
821 struct nlattr *tb[IFLA_BRPORT_MAX + 1];
822 struct net_bridge_port *p;
783 struct nlattr *protinfo; 823 struct nlattr *protinfo;
784 struct nlattr *afspec; 824 struct nlattr *afspec;
785 struct net_bridge_port *p; 825 bool changed = false;
786 struct nlattr *tb[IFLA_BRPORT_MAX + 1];
787 int err = 0; 826 int err = 0;
788 827
789 protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO); 828 protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO);
@@ -819,15 +858,14 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
819 } 858 }
820 if (err) 859 if (err)
821 goto out; 860 goto out;
861 changed = true;
822 } 862 }
823 863
824 if (afspec) { 864 if (afspec)
825 err = br_afspec((struct net_bridge *)netdev_priv(dev), p, 865 err = br_afspec(br, p, afspec, RTM_SETLINK, &changed);
826 afspec, RTM_SETLINK);
827 }
828 866
829 if (err == 0) 867 if (changed)
830 br_ifinfo_notify(RTM_NEWLINK, p); 868 br_ifinfo_notify(RTM_NEWLINK, br, p);
831out: 869out:
832 return err; 870 return err;
833} 871}
@@ -835,8 +873,10 @@ out:
835/* Delete port information */ 873/* Delete port information */
836int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) 874int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
837{ 875{
838 struct nlattr *afspec; 876 struct net_bridge *br = (struct net_bridge *)netdev_priv(dev);
839 struct net_bridge_port *p; 877 struct net_bridge_port *p;
878 struct nlattr *afspec;
879 bool changed = false;
840 int err = 0; 880 int err = 0;
841 881
842 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); 882 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
@@ -848,13 +888,12 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
848 if (!p && !(dev->priv_flags & IFF_EBRIDGE)) 888 if (!p && !(dev->priv_flags & IFF_EBRIDGE))
849 return -EINVAL; 889 return -EINVAL;
850 890
851 err = br_afspec((struct net_bridge *)netdev_priv(dev), p, 891 err = br_afspec(br, p, afspec, RTM_DELLINK, &changed);
852 afspec, RTM_DELLINK); 892 if (changed)
853 if (err == 0)
854 /* Send RTM_NEWLINK because userspace 893 /* Send RTM_NEWLINK because userspace
855 * expects RTM_NEWLINK for vlan dels 894 * expects RTM_NEWLINK for vlan dels
856 */ 895 */
857 br_ifinfo_notify(RTM_NEWLINK, p); 896 br_ifinfo_notify(RTM_NEWLINK, br, p);
858 897
859 return err; 898 return err;
860} 899}
diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c
index 3712c7f0e00c..da8cb99fd259 100644
--- a/net/bridge/br_netlink_tunnel.c
+++ b/net/bridge/br_netlink_tunnel.c
@@ -198,7 +198,7 @@ static const struct nla_policy vlan_tunnel_policy[IFLA_BRIDGE_VLAN_TUNNEL_MAX +
198}; 198};
199 199
200static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd, 200static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd,
201 u16 vid, u32 tun_id) 201 u16 vid, u32 tun_id, bool *changed)
202{ 202{
203 int err = 0; 203 int err = 0;
204 204
@@ -208,9 +208,12 @@ static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd,
208 switch (cmd) { 208 switch (cmd) {
209 case RTM_SETLINK: 209 case RTM_SETLINK:
210 err = nbp_vlan_tunnel_info_add(p, vid, tun_id); 210 err = nbp_vlan_tunnel_info_add(p, vid, tun_id);
211 if (!err)
212 *changed = true;
211 break; 213 break;
212 case RTM_DELLINK: 214 case RTM_DELLINK:
213 nbp_vlan_tunnel_info_delete(p, vid); 215 if (!nbp_vlan_tunnel_info_delete(p, vid))
216 *changed = true;
214 break; 217 break;
215 } 218 }
216 219
@@ -254,7 +257,8 @@ int br_parse_vlan_tunnel_info(struct nlattr *attr,
254int br_process_vlan_tunnel_info(struct net_bridge *br, 257int br_process_vlan_tunnel_info(struct net_bridge *br,
255 struct net_bridge_port *p, int cmd, 258 struct net_bridge_port *p, int cmd,
256 struct vtunnel_info *tinfo_curr, 259 struct vtunnel_info *tinfo_curr,
257 struct vtunnel_info *tinfo_last) 260 struct vtunnel_info *tinfo_last,
261 bool *changed)
258{ 262{
259 int err; 263 int err;
260 264
@@ -272,7 +276,7 @@ int br_process_vlan_tunnel_info(struct net_bridge *br,
272 return -EINVAL; 276 return -EINVAL;
273 t = tinfo_last->tunid; 277 t = tinfo_last->tunid;
274 for (v = tinfo_last->vid; v <= tinfo_curr->vid; v++) { 278 for (v = tinfo_last->vid; v <= tinfo_curr->vid; v++) {
275 err = br_vlan_tunnel_info(p, cmd, v, t); 279 err = br_vlan_tunnel_info(p, cmd, v, t, changed);
276 if (err) 280 if (err)
277 return err; 281 return err;
278 t++; 282 t++;
@@ -283,7 +287,7 @@ int br_process_vlan_tunnel_info(struct net_bridge *br,
283 if (tinfo_last->flags) 287 if (tinfo_last->flags)
284 return -EINVAL; 288 return -EINVAL;
285 err = br_vlan_tunnel_info(p, cmd, tinfo_curr->vid, 289 err = br_vlan_tunnel_info(p, cmd, tinfo_curr->vid,
286 tinfo_curr->tunid); 290 tinfo_curr->tunid, changed);
287 if (err) 291 if (err)
288 return err; 292 return err;
289 memset(tinfo_last, 0, sizeof(struct vtunnel_info)); 293 memset(tinfo_last, 0, sizeof(struct vtunnel_info));
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index e870cfc85b14..1312b8d20ec3 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -36,7 +36,14 @@
36/* Control of forwarding link local multicast */ 36/* Control of forwarding link local multicast */
37#define BR_GROUPFWD_DEFAULT 0 37#define BR_GROUPFWD_DEFAULT 0
38/* Don't allow forwarding of control protocols like STP, MAC PAUSE and LACP */ 38/* Don't allow forwarding of control protocols like STP, MAC PAUSE and LACP */
39#define BR_GROUPFWD_RESTRICTED 0x0007u 39enum {
40 BR_GROUPFWD_STP = BIT(0),
41 BR_GROUPFWD_MACPAUSE = BIT(1),
42 BR_GROUPFWD_LACP = BIT(2),
43};
44
45#define BR_GROUPFWD_RESTRICTED (BR_GROUPFWD_STP | BR_GROUPFWD_MACPAUSE | \
46 BR_GROUPFWD_LACP)
40/* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */ 47/* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */
41#define BR_GROUPFWD_8021AD 0xB801u 48#define BR_GROUPFWD_8021AD 0xB801u
42 49
@@ -202,7 +209,7 @@ struct net_bridge_mdb_entry
202 struct rcu_head rcu; 209 struct rcu_head rcu;
203 struct timer_list timer; 210 struct timer_list timer;
204 struct br_ip addr; 211 struct br_ip addr;
205 bool mglist; 212 bool host_joined;
206}; 213};
207 214
208struct net_bridge_mdb_htable 215struct net_bridge_mdb_htable
@@ -268,6 +275,7 @@ struct net_bridge_port {
268#ifdef CONFIG_NET_SWITCHDEV 275#ifdef CONFIG_NET_SWITCHDEV
269 int offload_fwd_mark; 276 int offload_fwd_mark;
270#endif 277#endif
278 u16 group_fwd_mask;
271}; 279};
272 280
273#define br_auto_port(p) ((p)->flags & BR_AUTO_MASK) 281#define br_auto_port(p) ((p)->flags & BR_AUTO_MASK)
@@ -396,6 +404,7 @@ struct net_bridge {
396#ifdef CONFIG_NET_SWITCHDEV 404#ifdef CONFIG_NET_SWITCHDEV
397 int offload_fwd_mark; 405 int offload_fwd_mark;
398#endif 406#endif
407 bool neigh_suppress_enabled;
399}; 408};
400 409
401struct br_input_skb_cb { 410struct br_input_skb_cb {
@@ -558,7 +567,8 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
558void br_port_carrier_check(struct net_bridge_port *p); 567void br_port_carrier_check(struct net_bridge_port *p);
559int br_add_bridge(struct net *net, const char *name); 568int br_add_bridge(struct net *net, const char *name);
560int br_del_bridge(struct net *net, const char *name); 569int br_del_bridge(struct net *net, const char *name);
561int br_add_if(struct net_bridge *br, struct net_device *dev); 570int br_add_if(struct net_bridge *br, struct net_device *dev,
571 struct netlink_ext_ack *extack);
562int br_del_if(struct net_bridge *br, struct net_device *dev); 572int br_del_if(struct net_bridge *br, struct net_device *dev);
563int br_min_mtu(const struct net_bridge *br); 573int br_min_mtu(const struct net_bridge *br);
564netdev_features_t br_features_recompute(struct net_bridge *br, 574netdev_features_t br_features_recompute(struct net_bridge *br,
@@ -793,7 +803,8 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
793 const struct net_bridge_port *port, 803 const struct net_bridge_port *port,
794 struct net_bridge_vlan_group *vg, 804 struct net_bridge_vlan_group *vg,
795 struct sk_buff *skb); 805 struct sk_buff *skb);
796int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); 806int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags,
807 bool *changed);
797int br_vlan_delete(struct net_bridge *br, u16 vid); 808int br_vlan_delete(struct net_bridge *br, u16 vid);
798void br_vlan_flush(struct net_bridge *br); 809void br_vlan_flush(struct net_bridge *br);
799struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid); 810struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid);
@@ -806,7 +817,8 @@ int br_vlan_set_stats(struct net_bridge *br, unsigned long val);
806int br_vlan_init(struct net_bridge *br); 817int br_vlan_init(struct net_bridge *br);
807int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val); 818int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val);
808int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid); 819int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid);
809int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); 820int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
821 bool *changed);
810int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); 822int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
811void nbp_vlan_flush(struct net_bridge_port *port); 823void nbp_vlan_flush(struct net_bridge_port *port);
812int nbp_vlan_init(struct net_bridge_port *port); 824int nbp_vlan_init(struct net_bridge_port *port);
@@ -893,8 +905,10 @@ static inline struct sk_buff *br_handle_vlan(struct net_bridge *br,
893 return skb; 905 return skb;
894} 906}
895 907
896static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) 908static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags,
909 bool *changed)
897{ 910{
911 *changed = false;
898 return -EOPNOTSUPP; 912 return -EOPNOTSUPP;
899} 913}
900 914
@@ -916,8 +930,10 @@ static inline int br_vlan_init(struct net_bridge *br)
916 return 0; 930 return 0;
917} 931}
918 932
919static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) 933static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
934 bool *changed)
920{ 935{
936 *changed = false;
921 return -EOPNOTSUPP; 937 return -EOPNOTSUPP;
922} 938}
923 939
@@ -1055,7 +1071,8 @@ extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr)
1055extern struct rtnl_link_ops br_link_ops; 1071extern struct rtnl_link_ops br_link_ops;
1056int br_netlink_init(void); 1072int br_netlink_init(void);
1057void br_netlink_fini(void); 1073void br_netlink_fini(void);
1058void br_ifinfo_notify(int event, struct net_bridge_port *port); 1074void br_ifinfo_notify(int event, const struct net_bridge *br,
1075 const struct net_bridge_port *port);
1059int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); 1076int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
1060int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); 1077int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
1061int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, 1078int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev,
@@ -1130,4 +1147,11 @@ static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
1130} 1147}
1131#endif /* CONFIG_NET_SWITCHDEV */ 1148#endif /* CONFIG_NET_SWITCHDEV */
1132 1149
1150/* br_arp_nd_proxy.c */
1151void br_recalculate_neigh_suppress_enabled(struct net_bridge *br);
1152void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
1153 u16 vid, struct net_bridge_port *p);
1154void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
1155 u16 vid, struct net_bridge_port *p, struct nd_msg *msg);
1156struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *m);
1133#endif 1157#endif
diff --git a/net/bridge/br_private_tunnel.h b/net/bridge/br_private_tunnel.h
index 4a447a378ab3..a259471bfd78 100644
--- a/net/bridge/br_private_tunnel.h
+++ b/net/bridge/br_private_tunnel.h
@@ -26,7 +26,8 @@ int br_process_vlan_tunnel_info(struct net_bridge *br,
26 struct net_bridge_port *p, 26 struct net_bridge_port *p,
27 int cmd, 27 int cmd,
28 struct vtunnel_info *tinfo_curr, 28 struct vtunnel_info *tinfo_curr,
29 struct vtunnel_info *tinfo_last); 29 struct vtunnel_info *tinfo_last,
30 bool *changed);
30int br_get_vlan_tunnel_info_size(struct net_bridge_vlan_group *vg); 31int br_get_vlan_tunnel_info_size(struct net_bridge_vlan_group *vg);
31int br_fill_vlan_tunnel_info(struct sk_buff *skb, 32int br_fill_vlan_tunnel_info(struct sk_buff *skb,
32 struct net_bridge_vlan_group *vg); 33 struct net_bridge_vlan_group *vg);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 8f56c2d1f1a7..b6941961a876 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -123,7 +123,7 @@ static void br_root_port_block(const struct net_bridge *br,
123 (unsigned int) p->port_no, p->dev->name); 123 (unsigned int) p->port_no, p->dev->name);
124 124
125 br_set_state(p, BR_STATE_LISTENING); 125 br_set_state(p, BR_STATE_LISTENING);
126 br_ifinfo_notify(RTM_NEWLINK, p); 126 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
127 127
128 if (br->forward_delay > 0) 128 if (br->forward_delay > 0)
129 mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); 129 mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay);
@@ -403,7 +403,7 @@ static void br_make_blocking(struct net_bridge_port *p)
403 br_topology_change_detection(p->br); 403 br_topology_change_detection(p->br);
404 404
405 br_set_state(p, BR_STATE_BLOCKING); 405 br_set_state(p, BR_STATE_BLOCKING);
406 br_ifinfo_notify(RTM_NEWLINK, p); 406 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
407 407
408 del_timer(&p->forward_delay_timer); 408 del_timer(&p->forward_delay_timer);
409 } 409 }
@@ -426,7 +426,7 @@ static void br_make_forwarding(struct net_bridge_port *p)
426 else 426 else
427 br_set_state(p, BR_STATE_LEARNING); 427 br_set_state(p, BR_STATE_LEARNING);
428 428
429 br_ifinfo_notify(RTM_NEWLINK, p); 429 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
430 430
431 if (br->forward_delay != 0) 431 if (br->forward_delay != 0)
432 mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); 432 mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay);
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 89110319ef0f..808e2b914015 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -96,7 +96,7 @@ void br_stp_enable_port(struct net_bridge_port *p)
96{ 96{
97 br_init_port(p); 97 br_init_port(p);
98 br_port_state_selection(p->br); 98 br_port_state_selection(p->br);
99 br_ifinfo_notify(RTM_NEWLINK, p); 99 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
100} 100}
101 101
102/* called under bridge lock */ 102/* called under bridge lock */
@@ -111,7 +111,7 @@ void br_stp_disable_port(struct net_bridge_port *p)
111 p->topology_change_ack = 0; 111 p->topology_change_ack = 0;
112 p->config_pending = 0; 112 p->config_pending = 0;
113 113
114 br_ifinfo_notify(RTM_NEWLINK, p); 114 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
115 115
116 del_timer(&p->message_age_timer); 116 del_timer(&p->message_age_timer);
117 del_timer(&p->forward_delay_timer); 117 del_timer(&p->forward_delay_timer);
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 60b6fe277a8b..e7739de5f0e1 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -31,9 +31,9 @@ static int br_is_designated_for_some_port(const struct net_bridge *br)
31 return 0; 31 return 0;
32} 32}
33 33
34static void br_hello_timer_expired(unsigned long arg) 34static void br_hello_timer_expired(struct timer_list *t)
35{ 35{
36 struct net_bridge *br = (struct net_bridge *)arg; 36 struct net_bridge *br = from_timer(br, t, hello_timer);
37 37
38 br_debug(br, "hello timer expired\n"); 38 br_debug(br, "hello timer expired\n");
39 spin_lock(&br->lock); 39 spin_lock(&br->lock);
@@ -47,9 +47,9 @@ static void br_hello_timer_expired(unsigned long arg)
47 spin_unlock(&br->lock); 47 spin_unlock(&br->lock);
48} 48}
49 49
50static void br_message_age_timer_expired(unsigned long arg) 50static void br_message_age_timer_expired(struct timer_list *t)
51{ 51{
52 struct net_bridge_port *p = (struct net_bridge_port *) arg; 52 struct net_bridge_port *p = from_timer(p, t, message_age_timer);
53 struct net_bridge *br = p->br; 53 struct net_bridge *br = p->br;
54 const bridge_id *id = &p->designated_bridge; 54 const bridge_id *id = &p->designated_bridge;
55 int was_root; 55 int was_root;
@@ -80,9 +80,9 @@ static void br_message_age_timer_expired(unsigned long arg)
80 spin_unlock(&br->lock); 80 spin_unlock(&br->lock);
81} 81}
82 82
83static void br_forward_delay_timer_expired(unsigned long arg) 83static void br_forward_delay_timer_expired(struct timer_list *t)
84{ 84{
85 struct net_bridge_port *p = (struct net_bridge_port *) arg; 85 struct net_bridge_port *p = from_timer(p, t, forward_delay_timer);
86 struct net_bridge *br = p->br; 86 struct net_bridge *br = p->br;
87 87
88 br_debug(br, "port %u(%s) forward delay timer\n", 88 br_debug(br, "port %u(%s) forward delay timer\n",
@@ -99,14 +99,14 @@ static void br_forward_delay_timer_expired(unsigned long arg)
99 netif_carrier_on(br->dev); 99 netif_carrier_on(br->dev);
100 } 100 }
101 rcu_read_lock(); 101 rcu_read_lock();
102 br_ifinfo_notify(RTM_NEWLINK, p); 102 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
103 rcu_read_unlock(); 103 rcu_read_unlock();
104 spin_unlock(&br->lock); 104 spin_unlock(&br->lock);
105} 105}
106 106
107static void br_tcn_timer_expired(unsigned long arg) 107static void br_tcn_timer_expired(struct timer_list *t)
108{ 108{
109 struct net_bridge *br = (struct net_bridge *) arg; 109 struct net_bridge *br = from_timer(br, t, tcn_timer);
110 110
111 br_debug(br, "tcn timer expired\n"); 111 br_debug(br, "tcn timer expired\n");
112 spin_lock(&br->lock); 112 spin_lock(&br->lock);
@@ -118,9 +118,9 @@ static void br_tcn_timer_expired(unsigned long arg)
118 spin_unlock(&br->lock); 118 spin_unlock(&br->lock);
119} 119}
120 120
121static void br_topology_change_timer_expired(unsigned long arg) 121static void br_topology_change_timer_expired(struct timer_list *t)
122{ 122{
123 struct net_bridge *br = (struct net_bridge *) arg; 123 struct net_bridge *br = from_timer(br, t, topology_change_timer);
124 124
125 br_debug(br, "topo change timer expired\n"); 125 br_debug(br, "topo change timer expired\n");
126 spin_lock(&br->lock); 126 spin_lock(&br->lock);
@@ -129,9 +129,9 @@ static void br_topology_change_timer_expired(unsigned long arg)
129 spin_unlock(&br->lock); 129 spin_unlock(&br->lock);
130} 130}
131 131
132static void br_hold_timer_expired(unsigned long arg) 132static void br_hold_timer_expired(struct timer_list *t)
133{ 133{
134 struct net_bridge_port *p = (struct net_bridge_port *) arg; 134 struct net_bridge_port *p = from_timer(p, t, hold_timer);
135 135
136 br_debug(p->br, "port %u(%s) hold timer expired\n", 136 br_debug(p->br, "port %u(%s) hold timer expired\n",
137 (unsigned int) p->port_no, p->dev->name); 137 (unsigned int) p->port_no, p->dev->name);
@@ -144,27 +144,17 @@ static void br_hold_timer_expired(unsigned long arg)
144 144
145void br_stp_timer_init(struct net_bridge *br) 145void br_stp_timer_init(struct net_bridge *br)
146{ 146{
147 setup_timer(&br->hello_timer, br_hello_timer_expired, 147 timer_setup(&br->hello_timer, br_hello_timer_expired, 0);
148 (unsigned long) br); 148 timer_setup(&br->tcn_timer, br_tcn_timer_expired, 0);
149 149 timer_setup(&br->topology_change_timer,
150 setup_timer(&br->tcn_timer, br_tcn_timer_expired, 150 br_topology_change_timer_expired, 0);
151 (unsigned long) br);
152
153 setup_timer(&br->topology_change_timer,
154 br_topology_change_timer_expired,
155 (unsigned long) br);
156} 151}
157 152
158void br_stp_port_timer_init(struct net_bridge_port *p) 153void br_stp_port_timer_init(struct net_bridge_port *p)
159{ 154{
160 setup_timer(&p->message_age_timer, br_message_age_timer_expired, 155 timer_setup(&p->message_age_timer, br_message_age_timer_expired, 0);
161 (unsigned long) p); 156 timer_setup(&p->forward_delay_timer, br_forward_delay_timer_expired, 0);
162 157 timer_setup(&p->hold_timer, br_hold_timer_expired, 0);
163 setup_timer(&p->forward_delay_timer, br_forward_delay_timer_expired,
164 (unsigned long) p);
165
166 setup_timer(&p->hold_timer, br_hold_timer_expired,
167 (unsigned long) p);
168} 158}
169 159
170/* Report ticks left (in USER_HZ) used for API */ 160/* Report ticks left (in USER_HZ) used for API */
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index f6b1c7de059d..9700e0f3307b 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/kernel.h> 2#include <linux/kernel.h>
2#include <linux/list.h> 3#include <linux/list.h>
3#include <linux/netdevice.h> 4#include <linux/netdevice.h>
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 5d5d413a6cf8..0254c35b2bf0 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -165,6 +165,23 @@ static int store_flush(struct net_bridge_port *p, unsigned long v)
165} 165}
166static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush); 166static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
167 167
168static ssize_t show_group_fwd_mask(struct net_bridge_port *p, char *buf)
169{
170 return sprintf(buf, "%#x\n", p->group_fwd_mask);
171}
172
173static int store_group_fwd_mask(struct net_bridge_port *p,
174 unsigned long v)
175{
176 if (v & BR_GROUPFWD_MACPAUSE)
177 return -EINVAL;
178 p->group_fwd_mask = v;
179
180 return 0;
181}
182static BRPORT_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask,
183 store_group_fwd_mask);
184
168BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); 185BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE);
169BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); 186BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD);
170BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); 187BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK);
@@ -174,6 +191,7 @@ BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP);
174BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI); 191BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI);
175BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD); 192BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD);
176BRPORT_ATTR_FLAG(broadcast_flood, BR_BCAST_FLOOD); 193BRPORT_ATTR_FLAG(broadcast_flood, BR_BCAST_FLOOD);
194BRPORT_ATTR_FLAG(neigh_suppress, BR_NEIGH_SUPPRESS);
177 195
178#ifdef CONFIG_BRIDGE_IGMP_SNOOPING 196#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
179static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) 197static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
@@ -223,6 +241,8 @@ static const struct brport_attribute *brport_attrs[] = {
223 &brport_attr_proxyarp_wifi, 241 &brport_attr_proxyarp_wifi,
224 &brport_attr_multicast_flood, 242 &brport_attr_multicast_flood,
225 &brport_attr_broadcast_flood, 243 &brport_attr_broadcast_flood,
244 &brport_attr_group_fwd_mask,
245 &brport_attr_neigh_suppress,
226 NULL 246 NULL
227}; 247};
228 248
@@ -260,7 +280,7 @@ static ssize_t brport_store(struct kobject *kobj,
260 ret = brport_attr->store(p, val); 280 ret = brport_attr->store(p, val);
261 spin_unlock_bh(&p->br->lock); 281 spin_unlock_bh(&p->br->lock);
262 if (!ret) { 282 if (!ret) {
263 br_ifinfo_notify(RTM_NEWLINK, p); 283 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
264 ret = count; 284 ret = count;
265 } 285 }
266 } 286 }
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 233a30040c91..51935270c651 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -32,27 +32,34 @@ static struct net_bridge_vlan *br_vlan_lookup(struct rhashtable *tbl, u16 vid)
32 return rhashtable_lookup_fast(tbl, &vid, br_vlan_rht_params); 32 return rhashtable_lookup_fast(tbl, &vid, br_vlan_rht_params);
33} 33}
34 34
35static void __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid) 35static bool __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid)
36{ 36{
37 if (vg->pvid == vid) 37 if (vg->pvid == vid)
38 return; 38 return false;
39 39
40 smp_wmb(); 40 smp_wmb();
41 vg->pvid = vid; 41 vg->pvid = vid;
42
43 return true;
42} 44}
43 45
44static void __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid) 46static bool __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid)
45{ 47{
46 if (vg->pvid != vid) 48 if (vg->pvid != vid)
47 return; 49 return false;
48 50
49 smp_wmb(); 51 smp_wmb();
50 vg->pvid = 0; 52 vg->pvid = 0;
53
54 return true;
51} 55}
52 56
53static void __vlan_add_flags(struct net_bridge_vlan *v, u16 flags) 57/* return true if anything changed, false otherwise */
58static bool __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
54{ 59{
55 struct net_bridge_vlan_group *vg; 60 struct net_bridge_vlan_group *vg;
61 u16 old_flags = v->flags;
62 bool ret;
56 63
57 if (br_vlan_is_master(v)) 64 if (br_vlan_is_master(v))
58 vg = br_vlan_group(v->br); 65 vg = br_vlan_group(v->br);
@@ -60,14 +67,16 @@ static void __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
60 vg = nbp_vlan_group(v->port); 67 vg = nbp_vlan_group(v->port);
61 68
62 if (flags & BRIDGE_VLAN_INFO_PVID) 69 if (flags & BRIDGE_VLAN_INFO_PVID)
63 __vlan_add_pvid(vg, v->vid); 70 ret = __vlan_add_pvid(vg, v->vid);
64 else 71 else
65 __vlan_delete_pvid(vg, v->vid); 72 ret = __vlan_delete_pvid(vg, v->vid);
66 73
67 if (flags & BRIDGE_VLAN_INFO_UNTAGGED) 74 if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
68 v->flags |= BRIDGE_VLAN_INFO_UNTAGGED; 75 v->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
69 else 76 else
70 v->flags &= ~BRIDGE_VLAN_INFO_UNTAGGED; 77 v->flags &= ~BRIDGE_VLAN_INFO_UNTAGGED;
78
79 return ret || !!(old_flags ^ v->flags);
71} 80}
72 81
73static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, 82static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
@@ -151,8 +160,10 @@ static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid
151 vg = br_vlan_group(br); 160 vg = br_vlan_group(br);
152 masterv = br_vlan_find(vg, vid); 161 masterv = br_vlan_find(vg, vid);
153 if (!masterv) { 162 if (!masterv) {
163 bool changed;
164
154 /* missing global ctx, create it now */ 165 /* missing global ctx, create it now */
155 if (br_vlan_add(br, vid, 0)) 166 if (br_vlan_add(br, vid, 0, &changed))
156 return NULL; 167 return NULL;
157 masterv = br_vlan_find(vg, vid); 168 masterv = br_vlan_find(vg, vid);
158 if (WARN_ON(!masterv)) 169 if (WARN_ON(!masterv))
@@ -232,8 +243,11 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
232 243
233 /* need to work on the master vlan too */ 244 /* need to work on the master vlan too */
234 if (flags & BRIDGE_VLAN_INFO_MASTER) { 245 if (flags & BRIDGE_VLAN_INFO_MASTER) {
235 err = br_vlan_add(br, v->vid, flags | 246 bool changed;
236 BRIDGE_VLAN_INFO_BRENTRY); 247
248 err = br_vlan_add(br, v->vid,
249 flags | BRIDGE_VLAN_INFO_BRENTRY,
250 &changed);
237 if (err) 251 if (err)
238 goto out_filt; 252 goto out_filt;
239 } 253 }
@@ -550,8 +564,9 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
550 564
551/* Must be protected by RTNL. 565/* Must be protected by RTNL.
552 * Must be called with vid in range from 1 to 4094 inclusive. 566 * Must be called with vid in range from 1 to 4094 inclusive.
567 * changed must be true only if the vlan was created or updated
553 */ 568 */
554int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) 569int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed)
555{ 570{
556 struct net_bridge_vlan_group *vg; 571 struct net_bridge_vlan_group *vg;
557 struct net_bridge_vlan *vlan; 572 struct net_bridge_vlan *vlan;
@@ -559,6 +574,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
559 574
560 ASSERT_RTNL(); 575 ASSERT_RTNL();
561 576
577 *changed = false;
562 vg = br_vlan_group(br); 578 vg = br_vlan_group(br);
563 vlan = br_vlan_find(vg, vid); 579 vlan = br_vlan_find(vg, vid);
564 if (vlan) { 580 if (vlan) {
@@ -576,8 +592,11 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
576 refcount_inc(&vlan->refcnt); 592 refcount_inc(&vlan->refcnt);
577 vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY; 593 vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY;
578 vg->num_vlans++; 594 vg->num_vlans++;
595 *changed = true;
579 } 596 }
580 __vlan_add_flags(vlan, flags); 597 if (__vlan_add_flags(vlan, flags))
598 *changed = true;
599
581 return 0; 600 return 0;
582 } 601 }
583 602
@@ -600,6 +619,8 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
600 if (ret) { 619 if (ret) {
601 free_percpu(vlan->stats); 620 free_percpu(vlan->stats);
602 kfree(vlan); 621 kfree(vlan);
622 } else {
623 *changed = true;
603 } 624 }
604 625
605 return ret; 626 return ret;
@@ -824,9 +845,10 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
824 const struct net_bridge_vlan *pvent; 845 const struct net_bridge_vlan *pvent;
825 struct net_bridge_vlan_group *vg; 846 struct net_bridge_vlan_group *vg;
826 struct net_bridge_port *p; 847 struct net_bridge_port *p;
848 unsigned long *changed;
849 bool vlchange;
827 u16 old_pvid; 850 u16 old_pvid;
828 int err = 0; 851 int err = 0;
829 unsigned long *changed;
830 852
831 if (!pvid) { 853 if (!pvid) {
832 br_vlan_disable_default_pvid(br); 854 br_vlan_disable_default_pvid(br);
@@ -850,7 +872,8 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
850 err = br_vlan_add(br, pvid, 872 err = br_vlan_add(br, pvid,
851 BRIDGE_VLAN_INFO_PVID | 873 BRIDGE_VLAN_INFO_PVID |
852 BRIDGE_VLAN_INFO_UNTAGGED | 874 BRIDGE_VLAN_INFO_UNTAGGED |
853 BRIDGE_VLAN_INFO_BRENTRY); 875 BRIDGE_VLAN_INFO_BRENTRY,
876 &vlchange);
854 if (err) 877 if (err)
855 goto out; 878 goto out;
856 br_vlan_delete(br, old_pvid); 879 br_vlan_delete(br, old_pvid);
@@ -869,7 +892,8 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
869 892
870 err = nbp_vlan_add(p, pvid, 893 err = nbp_vlan_add(p, pvid,
871 BRIDGE_VLAN_INFO_PVID | 894 BRIDGE_VLAN_INFO_PVID |
872 BRIDGE_VLAN_INFO_UNTAGGED); 895 BRIDGE_VLAN_INFO_UNTAGGED,
896 &vlchange);
873 if (err) 897 if (err)
874 goto err_port; 898 goto err_port;
875 nbp_vlan_delete(p, old_pvid); 899 nbp_vlan_delete(p, old_pvid);
@@ -890,7 +914,8 @@ err_port:
890 if (old_pvid) 914 if (old_pvid)
891 nbp_vlan_add(p, old_pvid, 915 nbp_vlan_add(p, old_pvid,
892 BRIDGE_VLAN_INFO_PVID | 916 BRIDGE_VLAN_INFO_PVID |
893 BRIDGE_VLAN_INFO_UNTAGGED); 917 BRIDGE_VLAN_INFO_UNTAGGED,
918 &vlchange);
894 nbp_vlan_delete(p, pvid); 919 nbp_vlan_delete(p, pvid);
895 } 920 }
896 921
@@ -899,7 +924,8 @@ err_port:
899 br_vlan_add(br, old_pvid, 924 br_vlan_add(br, old_pvid,
900 BRIDGE_VLAN_INFO_PVID | 925 BRIDGE_VLAN_INFO_PVID |
901 BRIDGE_VLAN_INFO_UNTAGGED | 926 BRIDGE_VLAN_INFO_UNTAGGED |
902 BRIDGE_VLAN_INFO_BRENTRY); 927 BRIDGE_VLAN_INFO_BRENTRY,
928 &vlchange);
903 br_vlan_delete(br, pvid); 929 br_vlan_delete(br, pvid);
904 } 930 }
905 goto out; 931 goto out;
@@ -931,6 +957,7 @@ int br_vlan_init(struct net_bridge *br)
931{ 957{
932 struct net_bridge_vlan_group *vg; 958 struct net_bridge_vlan_group *vg;
933 int ret = -ENOMEM; 959 int ret = -ENOMEM;
960 bool changed;
934 961
935 vg = kzalloc(sizeof(*vg), GFP_KERNEL); 962 vg = kzalloc(sizeof(*vg), GFP_KERNEL);
936 if (!vg) 963 if (!vg)
@@ -947,7 +974,7 @@ int br_vlan_init(struct net_bridge *br)
947 rcu_assign_pointer(br->vlgrp, vg); 974 rcu_assign_pointer(br->vlgrp, vg);
948 ret = br_vlan_add(br, 1, 975 ret = br_vlan_add(br, 1,
949 BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED | 976 BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED |
950 BRIDGE_VLAN_INFO_BRENTRY); 977 BRIDGE_VLAN_INFO_BRENTRY, &changed);
951 if (ret) 978 if (ret)
952 goto err_vlan_add; 979 goto err_vlan_add;
953 980
@@ -992,9 +1019,12 @@ int nbp_vlan_init(struct net_bridge_port *p)
992 INIT_LIST_HEAD(&vg->vlan_list); 1019 INIT_LIST_HEAD(&vg->vlan_list);
993 rcu_assign_pointer(p->vlgrp, vg); 1020 rcu_assign_pointer(p->vlgrp, vg);
994 if (p->br->default_pvid) { 1021 if (p->br->default_pvid) {
1022 bool changed;
1023
995 ret = nbp_vlan_add(p, p->br->default_pvid, 1024 ret = nbp_vlan_add(p, p->br->default_pvid,
996 BRIDGE_VLAN_INFO_PVID | 1025 BRIDGE_VLAN_INFO_PVID |
997 BRIDGE_VLAN_INFO_UNTAGGED); 1026 BRIDGE_VLAN_INFO_UNTAGGED,
1027 &changed);
998 if (ret) 1028 if (ret)
999 goto err_vlan_add; 1029 goto err_vlan_add;
1000 } 1030 }
@@ -1016,8 +1046,10 @@ err_vlan_enabled:
1016 1046
1017/* Must be protected by RTNL. 1047/* Must be protected by RTNL.
1018 * Must be called with vid in range from 1 to 4094 inclusive. 1048 * Must be called with vid in range from 1 to 4094 inclusive.
1049 * changed must be true only if the vlan was created or updated
1019 */ 1050 */
1020int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) 1051int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
1052 bool *changed)
1021{ 1053{
1022 struct switchdev_obj_port_vlan v = { 1054 struct switchdev_obj_port_vlan v = {
1023 .obj.orig_dev = port->dev, 1055 .obj.orig_dev = port->dev,
@@ -1031,13 +1063,15 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
1031 1063
1032 ASSERT_RTNL(); 1064 ASSERT_RTNL();
1033 1065
1066 *changed = false;
1034 vlan = br_vlan_find(nbp_vlan_group(port), vid); 1067 vlan = br_vlan_find(nbp_vlan_group(port), vid);
1035 if (vlan) { 1068 if (vlan) {
1036 /* Pass the flags to the hardware bridge */ 1069 /* Pass the flags to the hardware bridge */
1037 ret = switchdev_port_obj_add(port->dev, &v.obj); 1070 ret = switchdev_port_obj_add(port->dev, &v.obj);
1038 if (ret && ret != -EOPNOTSUPP) 1071 if (ret && ret != -EOPNOTSUPP)
1039 return ret; 1072 return ret;
1040 __vlan_add_flags(vlan, flags); 1073 *changed = __vlan_add_flags(vlan, flags);
1074
1041 return 0; 1075 return 0;
1042 } 1076 }
1043 1077
@@ -1050,6 +1084,8 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
1050 ret = __vlan_add(vlan, flags); 1084 ret = __vlan_add(vlan, flags);
1051 if (ret) 1085 if (ret)
1052 kfree(vlan); 1086 kfree(vlan);
1087 else
1088 *changed = true;
1053 1089
1054 return ret; 1090 return ret;
1055} 1091}
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index be4d0cea78ce..2f28e16de6c7 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the netfilter modules for Link Layer filtering on a bridge. 3# Makefile for the netfilter modules for Link Layer filtering on a bridge.
3# 4#
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 2585b100ebbb..276b60262981 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -65,8 +65,8 @@ static int ebt_broute(struct sk_buff *skb)
65 65
66static int __net_init broute_net_init(struct net *net) 66static int __net_init broute_net_init(struct net *net)
67{ 67{
68 net->xt.broute_table = ebt_register_table(net, &broute_table, NULL); 68 return ebt_register_table(net, &broute_table, NULL,
69 return PTR_ERR_OR_ZERO(net->xt.broute_table); 69 &net->xt.broute_table);
70} 70}
71 71
72static void __net_exit broute_net_exit(struct net *net) 72static void __net_exit broute_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 45a00dbdbcad..c41da5fac84f 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -93,8 +93,8 @@ static const struct nf_hook_ops ebt_ops_filter[] = {
93 93
94static int __net_init frame_filter_net_init(struct net *net) 94static int __net_init frame_filter_net_init(struct net *net)
95{ 95{
96 net->xt.frame_filter = ebt_register_table(net, &frame_filter, ebt_ops_filter); 96 return ebt_register_table(net, &frame_filter, ebt_ops_filter,
97 return PTR_ERR_OR_ZERO(net->xt.frame_filter); 97 &net->xt.frame_filter);
98} 98}
99 99
100static void __net_exit frame_filter_net_exit(struct net *net) 100static void __net_exit frame_filter_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 57cd5bb154e7..08df7406ecb3 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -93,8 +93,8 @@ static const struct nf_hook_ops ebt_ops_nat[] = {
93 93
94static int __net_init frame_nat_net_init(struct net *net) 94static int __net_init frame_nat_net_init(struct net *net)
95{ 95{
96 net->xt.frame_nat = ebt_register_table(net, &frame_nat, ebt_ops_nat); 96 return ebt_register_table(net, &frame_nat, ebt_ops_nat,
97 return PTR_ERR_OR_ZERO(net->xt.frame_nat); 97 &net->xt.frame_nat);
98} 98}
99 99
100static void __net_exit frame_nat_net_exit(struct net *net) 100static void __net_exit frame_nat_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 83951f978445..37817d25b63d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1169,9 +1169,8 @@ static void __ebt_unregister_table(struct net *net, struct ebt_table *table)
1169 kfree(table); 1169 kfree(table);
1170} 1170}
1171 1171
1172struct ebt_table * 1172int ebt_register_table(struct net *net, const struct ebt_table *input_table,
1173ebt_register_table(struct net *net, const struct ebt_table *input_table, 1173 const struct nf_hook_ops *ops, struct ebt_table **res)
1174 const struct nf_hook_ops *ops)
1175{ 1174{
1176 struct ebt_table_info *newinfo; 1175 struct ebt_table_info *newinfo;
1177 struct ebt_table *t, *table; 1176 struct ebt_table *t, *table;
@@ -1183,7 +1182,7 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table,
1183 repl->entries == NULL || repl->entries_size == 0 || 1182 repl->entries == NULL || repl->entries_size == 0 ||
1184 repl->counters != NULL || input_table->private != NULL) { 1183 repl->counters != NULL || input_table->private != NULL) {
1185 BUGPRINT("Bad table data for ebt_register_table!!!\n"); 1184 BUGPRINT("Bad table data for ebt_register_table!!!\n");
1186 return ERR_PTR(-EINVAL); 1185 return -EINVAL;
1187 } 1186 }
1188 1187
1189 /* Don't add one table to multiple lists. */ 1188 /* Don't add one table to multiple lists. */
@@ -1252,16 +1251,18 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table,
1252 list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]); 1251 list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]);
1253 mutex_unlock(&ebt_mutex); 1252 mutex_unlock(&ebt_mutex);
1254 1253
1254 WRITE_ONCE(*res, table);
1255
1255 if (!ops) 1256 if (!ops)
1256 return table; 1257 return 0;
1257 1258
1258 ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); 1259 ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
1259 if (ret) { 1260 if (ret) {
1260 __ebt_unregister_table(net, table); 1261 __ebt_unregister_table(net, table);
1261 return ERR_PTR(ret); 1262 *res = NULL;
1262 } 1263 }
1263 1264
1264 return table; 1265 return ret;
1265free_unlock: 1266free_unlock:
1266 mutex_unlock(&ebt_mutex); 1267 mutex_unlock(&ebt_mutex);
1267free_chainstack: 1268free_chainstack:
@@ -1276,7 +1277,7 @@ free_newinfo:
1276free_table: 1277free_table:
1277 kfree(table); 1278 kfree(table);
1278out: 1279out:
1279 return ERR_PTR(ret); 1280 return ret;
1280} 1281}
1281 1282
1282void ebt_unregister_table(struct net *net, struct ebt_table *table, 1283void ebt_unregister_table(struct net *net, struct ebt_table *table,
@@ -2111,9 +2112,8 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
2111 for (i = 0, j = 1 ; j < 4 ; j++, i++) { 2112 for (i = 0, j = 1 ; j < 4 ; j++, i++) {
2112 struct compat_ebt_entry_mwt *match32; 2113 struct compat_ebt_entry_mwt *match32;
2113 unsigned int size; 2114 unsigned int size;
2114 char *buf = buf_start; 2115 char *buf = buf_start + offsets[i];
2115 2116
2116 buf = buf_start + offsets[i];
2117 if (offsets[i] > offsets[j]) 2117 if (offsets[i] > offsets[j])
2118 return -EINVAL; 2118 return -EINVAL;
2119 2119
diff --git a/net/caif/Makefile b/net/caif/Makefile
index cc2b51154d03..4f6c0517cdfb 100644
--- a/net/caif/Makefile
+++ b/net/caif/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1ccflags-$(CONFIG_CAIF_DEBUG) := -DDEBUG 2ccflags-$(CONFIG_CAIF_DEBUG) := -DDEBUG
2 3
3caif-y := caif_dev.o \ 4caif-y := caif_dev.o \
diff --git a/net/can/Makefile b/net/can/Makefile
index 10936754e3f2..1242bbbfe57f 100644
--- a/net/can/Makefile
+++ b/net/can/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the Linux Controller Area Network core. 3# Makefile for the Linux Controller Area Network core.
3# 4#
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 88edac0f3e36..003b2d6d655f 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -78,7 +78,7 @@ MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)");
78static struct kmem_cache *rcv_cache __read_mostly; 78static struct kmem_cache *rcv_cache __read_mostly;
79 79
80/* table of registered CAN protocols */ 80/* table of registered CAN protocols */
81static const struct can_proto *proto_tab[CAN_NPROTO] __read_mostly; 81static const struct can_proto __rcu *proto_tab[CAN_NPROTO] __read_mostly;
82static DEFINE_MUTEX(proto_tab_lock); 82static DEFINE_MUTEX(proto_tab_lock);
83 83
84static atomic_t skbcounter = ATOMIC_INIT(0); 84static atomic_t skbcounter = ATOMIC_INIT(0);
@@ -788,7 +788,7 @@ int can_proto_register(const struct can_proto *cp)
788 788
789 mutex_lock(&proto_tab_lock); 789 mutex_lock(&proto_tab_lock);
790 790
791 if (proto_tab[proto]) { 791 if (rcu_access_pointer(proto_tab[proto])) {
792 pr_err("can: protocol %d already registered\n", proto); 792 pr_err("can: protocol %d already registered\n", proto);
793 err = -EBUSY; 793 err = -EBUSY;
794 } else 794 } else
@@ -812,7 +812,7 @@ void can_proto_unregister(const struct can_proto *cp)
812 int proto = cp->protocol; 812 int proto = cp->protocol;
813 813
814 mutex_lock(&proto_tab_lock); 814 mutex_lock(&proto_tab_lock);
815 BUG_ON(proto_tab[proto] != cp); 815 BUG_ON(rcu_access_pointer(proto_tab[proto]) != cp);
816 RCU_INIT_POINTER(proto_tab[proto], NULL); 816 RCU_INIT_POINTER(proto_tab[proto], NULL);
817 mutex_unlock(&proto_tab_lock); 817 mutex_unlock(&proto_tab_lock);
818 818
@@ -875,15 +875,20 @@ static int can_pernet_init(struct net *net)
875 spin_lock_init(&net->can.can_rcvlists_lock); 875 spin_lock_init(&net->can.can_rcvlists_lock);
876 net->can.can_rx_alldev_list = 876 net->can.can_rx_alldev_list =
877 kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL); 877 kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL);
878 878 if (!net->can.can_rx_alldev_list)
879 goto out;
879 net->can.can_stats = kzalloc(sizeof(struct s_stats), GFP_KERNEL); 880 net->can.can_stats = kzalloc(sizeof(struct s_stats), GFP_KERNEL);
881 if (!net->can.can_stats)
882 goto out_free_alldev_list;
880 net->can.can_pstats = kzalloc(sizeof(struct s_pstats), GFP_KERNEL); 883 net->can.can_pstats = kzalloc(sizeof(struct s_pstats), GFP_KERNEL);
884 if (!net->can.can_pstats)
885 goto out_free_can_stats;
881 886
882 if (IS_ENABLED(CONFIG_PROC_FS)) { 887 if (IS_ENABLED(CONFIG_PROC_FS)) {
883 /* the statistics are updated every second (timer triggered) */ 888 /* the statistics are updated every second (timer triggered) */
884 if (stats_timer) { 889 if (stats_timer) {
885 setup_timer(&net->can.can_stattimer, can_stat_update, 890 timer_setup(&net->can.can_stattimer, can_stat_update,
886 (unsigned long)net); 891 0);
887 mod_timer(&net->can.can_stattimer, 892 mod_timer(&net->can.can_stattimer,
888 round_jiffies(jiffies + HZ)); 893 round_jiffies(jiffies + HZ));
889 } 894 }
@@ -892,6 +897,13 @@ static int can_pernet_init(struct net *net)
892 } 897 }
893 898
894 return 0; 899 return 0;
900
901 out_free_can_stats:
902 kfree(net->can.can_stats);
903 out_free_alldev_list:
904 kfree(net->can.can_rx_alldev_list);
905 out:
906 return -ENOMEM;
895} 907}
896 908
897static void can_pernet_exit(struct net *net) 909static void can_pernet_exit(struct net *net)
diff --git a/net/can/af_can.h b/net/can/af_can.h
index d0ef45bb2a72..eca6463c6213 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -113,6 +113,6 @@ struct s_pstats {
113/* function prototypes for the CAN networklayer procfs (proc.c) */ 113/* function prototypes for the CAN networklayer procfs (proc.c) */
114void can_init_proc(struct net *net); 114void can_init_proc(struct net *net);
115void can_remove_proc(struct net *net); 115void can_remove_proc(struct net *net);
116void can_stat_update(unsigned long data); 116void can_stat_update(struct timer_list *t);
117 117
118#endif /* AF_CAN_H */ 118#endif /* AF_CAN_H */
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 47a8748d953a..13690334efa3 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1493,13 +1493,14 @@ static int bcm_init(struct sock *sk)
1493static int bcm_release(struct socket *sock) 1493static int bcm_release(struct socket *sock)
1494{ 1494{
1495 struct sock *sk = sock->sk; 1495 struct sock *sk = sock->sk;
1496 struct net *net = sock_net(sk); 1496 struct net *net;
1497 struct bcm_sock *bo; 1497 struct bcm_sock *bo;
1498 struct bcm_op *op, *next; 1498 struct bcm_op *op, *next;
1499 1499
1500 if (sk == NULL) 1500 if (!sk)
1501 return 0; 1501 return 0;
1502 1502
1503 net = sock_net(sk);
1503 bo = bcm_sk(sk); 1504 bo = bcm_sk(sk);
1504 1505
1505 /* remove bcm_ops, timer, rx_unregister(), etc. */ 1506 /* remove bcm_ops, timer, rx_unregister(), etc. */
diff --git a/net/can/proc.c b/net/can/proc.c
index 83045f00c63c..0c59f876fe6f 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -115,9 +115,9 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
115 return rate; 115 return rate;
116} 116}
117 117
118void can_stat_update(unsigned long data) 118void can_stat_update(struct timer_list *t)
119{ 119{
120 struct net *net = (struct net *)data; 120 struct net *net = from_timer(net, t, can.can_stattimer);
121 struct s_stats *can_stats = net->can.can_stats; 121 struct s_stats *can_stats = net->can.can_stats;
122 unsigned long j = jiffies; /* snapshot */ 122 unsigned long j = jiffies; /* snapshot */
123 123
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index 6a5180903e7b..b4bded4b5396 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for CEPH filesystem. 3# Makefile for CEPH filesystem.
3# 4#
diff --git a/net/ceph/armor.c b/net/ceph/armor.c
index 1fc1ee11dfa2..0db8065928df 100644
--- a/net/ceph/armor.c
+++ b/net/ceph/armor.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1 2
2#include <linux/errno.h> 3#include <linux/errno.h>
3 4
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index 48bb8d95195b..dbde2b3c3c15 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/ceph/ceph_debug.h> 2#include <linux/ceph/ceph_debug.h>
2 3
3#include <linux/module.h> 4#include <linux/module.h>
diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
index df45e467c81f..41d2a0c72236 100644
--- a/net/ceph/auth_none.c
+++ b/net/ceph/auth_none.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1 2
2#include <linux/ceph/ceph_debug.h> 3#include <linux/ceph/ceph_debug.h>
3 4
diff --git a/net/ceph/auth_none.h b/net/ceph/auth_none.h
index 62021535ae4a..860ed9875791 100644
--- a/net/ceph/auth_none.h
+++ b/net/ceph/auth_none.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef _FS_CEPH_AUTH_NONE_H 2#ifndef _FS_CEPH_AUTH_NONE_H
2#define _FS_CEPH_AUTH_NONE_H 3#define _FS_CEPH_AUTH_NONE_H
3 4
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 8757fb87dab8..2f4a1baf5f52 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1 2
2#include <linux/ceph/ceph_debug.h> 3#include <linux/ceph/ceph_debug.h>
3 4
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index 48e9ad41bd2a..454cb54568af 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef _FS_CEPH_AUTH_X_H 2#ifndef _FS_CEPH_AUTH_X_H
2#define _FS_CEPH_AUTH_X_H 3#define _FS_CEPH_AUTH_X_H
3 4
diff --git a/net/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h
index 671d30576c4f..32c13d763b9a 100644
--- a/net/ceph/auth_x_protocol.h
+++ b/net/ceph/auth_x_protocol.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __FS_CEPH_AUTH_X_PROTOCOL 2#ifndef __FS_CEPH_AUTH_X_PROTOCOL
2#define __FS_CEPH_AUTH_X_PROTOCOL 3#define __FS_CEPH_AUTH_X_PROTOCOL
3 4
diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c
index add5f921a0ff..5622763ad402 100644
--- a/net/ceph/buffer.c
+++ b/net/ceph/buffer.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1 2
2#include <linux/ceph/ceph_debug.h> 3#include <linux/ceph/ceph_debug.h>
3 4
diff --git a/net/ceph/ceph_fs.c b/net/ceph/ceph_fs.c
index dcbe67ff3e2b..756a2dc10d27 100644
--- a/net/ceph/ceph_fs.c
+++ b/net/ceph/ceph_fs.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Some non-inline ceph helpers 3 * Some non-inline ceph helpers
3 */ 4 */
diff --git a/net/ceph/ceph_hash.c b/net/ceph/ceph_hash.c
index 67bb1f11e613..9a5850f264ed 100644
--- a/net/ceph/ceph_hash.c
+++ b/net/ceph/ceph_hash.c
@@ -47,28 +47,38 @@ unsigned int ceph_str_hash_rjenkins(const char *str, unsigned int length)
47 47
48 /* handle the last 11 bytes */ 48 /* handle the last 11 bytes */
49 c = c + length; 49 c = c + length;
50 switch (len) { /* all the case statements fall through */ 50 switch (len) {
51 case 11: 51 case 11:
52 c = c + ((__u32)k[10] << 24); 52 c = c + ((__u32)k[10] << 24);
53 /* fall through */
53 case 10: 54 case 10:
54 c = c + ((__u32)k[9] << 16); 55 c = c + ((__u32)k[9] << 16);
56 /* fall through */
55 case 9: 57 case 9:
56 c = c + ((__u32)k[8] << 8); 58 c = c + ((__u32)k[8] << 8);
57 /* the first byte of c is reserved for the length */ 59 /* the first byte of c is reserved for the length */
60 /* fall through */
58 case 8: 61 case 8:
59 b = b + ((__u32)k[7] << 24); 62 b = b + ((__u32)k[7] << 24);
63 /* fall through */
60 case 7: 64 case 7:
61 b = b + ((__u32)k[6] << 16); 65 b = b + ((__u32)k[6] << 16);
66 /* fall through */
62 case 6: 67 case 6:
63 b = b + ((__u32)k[5] << 8); 68 b = b + ((__u32)k[5] << 8);
69 /* fall through */
64 case 5: 70 case 5:
65 b = b + k[4]; 71 b = b + k[4];
72 /* fall through */
66 case 4: 73 case 4:
67 a = a + ((__u32)k[3] << 24); 74 a = a + ((__u32)k[3] << 24);
75 /* fall through */
68 case 3: 76 case 3:
69 a = a + ((__u32)k[2] << 16); 77 a = a + ((__u32)k[2] << 16);
78 /* fall through */
70 case 2: 79 case 2:
71 a = a + ((__u32)k[1] << 8); 80 a = a + ((__u32)k[1] << 8);
81 /* fall through */
72 case 1: 82 case 1:
73 a = a + k[0]; 83 a = a + k[0];
74 /* case 0: nothing left to add */ 84 /* case 0: nothing left to add */
diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c
index 19b7d8aa915c..10e01494993c 100644
--- a/net/ceph/ceph_strings.c
+++ b/net/ceph/ceph_strings.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Ceph string constants 3 * Ceph string constants
3 */ 4 */
diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c
index 08ada893f01e..8d2032b2f225 100644
--- a/net/ceph/cls_lock_client.c
+++ b/net/ceph/cls_lock_client.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/ceph/ceph_debug.h> 2#include <linux/ceph/ceph_debug.h>
2 3
3#include <linux/types.h> 4#include <linux/types.h>
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 4b428f46a8ca..3d70244bc1b6 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#ifdef __KERNEL__ 2#ifdef __KERNEL__
2# include <linux/slab.h> 3# include <linux/slab.h>
3# include <linux/crush/crush.h> 4# include <linux/crush/crush.h>
diff --git a/net/ceph/crush/hash.c b/net/ceph/crush/hash.c
index ed123af49eba..e5cc603cdb17 100644
--- a/net/ceph/crush/hash.c
+++ b/net/ceph/crush/hash.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#ifdef __KERNEL__ 2#ifdef __KERNEL__
2# include <linux/crush/hash.h> 3# include <linux/crush/hash.h>
3#else 4#else
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 46008d5ac504..bf9d079cbafd 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1 2
2#include <linux/ceph/ceph_debug.h> 3#include <linux/ceph/ceph_debug.h>
3 4
@@ -36,7 +37,9 @@ static int set_secret(struct ceph_crypto_key *key, void *buf)
36 return -ENOTSUPP; 37 return -ENOTSUPP;
37 } 38 }
38 39
39 WARN_ON(!key->len); 40 if (!key->len)
41 return -EINVAL;
42
40 key->key = kmemdup(buf, key->len, GFP_NOIO); 43 key->key = kmemdup(buf, key->len, GFP_NOIO);
41 if (!key->key) { 44 if (!key->key) {
42 ret = -ENOMEM; 45 ret = -ENOMEM;
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index 58d83aa7740f..bb45c7d43739 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef _FS_CEPH_CRYPTO_H 2#ifndef _FS_CEPH_CRYPTO_H
2#define _FS_CEPH_CRYPTO_H 3#define _FS_CEPH_CRYPTO_H
3 4
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index fa5233e0d01c..1eef6806aa1a 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/ceph/ceph_debug.h> 2#include <linux/ceph/ceph_debug.h>
2 3
3#include <linux/device.h> 4#include <linux/device.h>
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index a67298c7e0cd..8a4d3758030b 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/ceph/ceph_debug.h> 2#include <linux/ceph/ceph_debug.h>
2 3
3#include <linux/crc32c.h> 4#include <linux/crc32c.h>
@@ -429,6 +430,7 @@ static void ceph_sock_state_change(struct sock *sk)
429 switch (sk->sk_state) { 430 switch (sk->sk_state) {
430 case TCP_CLOSE: 431 case TCP_CLOSE:
431 dout("%s TCP_CLOSE\n", __func__); 432 dout("%s TCP_CLOSE\n", __func__);
433 /* fall through */
432 case TCP_CLOSE_WAIT: 434 case TCP_CLOSE_WAIT:
433 dout("%s TCP_CLOSE_WAIT\n", __func__); 435 dout("%s TCP_CLOSE_WAIT\n", __func__);
434 con_sock_state_closing(con); 436 con_sock_state_closing(con);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 63edc6e5f026..1547107f4854 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/ceph/ceph_debug.h> 2#include <linux/ceph/ceph_debug.h>
2 3
3#include <linux/module.h> 4#include <linux/module.h>
@@ -1278,9 +1279,10 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
1278 1279
1279 /* 1280 /*
1280 * Older OSDs don't set reply tid even if the orignal 1281 * Older OSDs don't set reply tid even if the orignal
1281 * request had a non-zero tid. Workaround this weirdness 1282 * request had a non-zero tid. Work around this weirdness
1282 * by falling through to the allocate case. 1283 * by allocating a new message.
1283 */ 1284 */
1285 /* fall through */
1284 case CEPH_MSG_MON_MAP: 1286 case CEPH_MSG_MON_MAP:
1285 case CEPH_MSG_MDS_MAP: 1287 case CEPH_MSG_MDS_MAP:
1286 case CEPH_MSG_OSD_MAP: 1288 case CEPH_MSG_OSD_MAP:
diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c
index aaed59a47b1d..72571535883f 100644
--- a/net/ceph/msgpool.c
+++ b/net/ceph/msgpool.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/ceph/ceph_debug.h> 2#include <linux/ceph/ceph_debug.h>
2 3
3#include <linux/err.h> 4#include <linux/err.h>
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index e02f01f534e2..2814dba5902d 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1 2
2#include <linux/ceph/ceph_debug.h> 3#include <linux/ceph/ceph_debug.h>
3 4
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index f358d0bfa76b..0da27c66349a 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1 2
2#include <linux/ceph/ceph_debug.h> 3#include <linux/ceph/ceph_debug.h>
3 4
@@ -2445,19 +2446,34 @@ static void apply_upmap(struct ceph_osdmap *osdmap,
2445 2446
2446 pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid); 2447 pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid);
2447 if (pg) { 2448 if (pg) {
2448 for (i = 0; i < raw->size; i++) { 2449 /*
2449 for (j = 0; j < pg->pg_upmap_items.len; j++) { 2450 * Note: this approach does not allow a bidirectional swap,
2450 int from = pg->pg_upmap_items.from_to[j][0]; 2451 * e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1].
2451 int to = pg->pg_upmap_items.from_to[j][1]; 2452 */
2452 2453 for (i = 0; i < pg->pg_upmap_items.len; i++) {
2453 if (from == raw->osds[i]) { 2454 int from = pg->pg_upmap_items.from_to[i][0];
2454 if (!(to != CRUSH_ITEM_NONE && 2455 int to = pg->pg_upmap_items.from_to[i][1];
2455 to < osdmap->max_osd && 2456 int pos = -1;
2456 osdmap->osd_weight[to] == 0)) 2457 bool exists = false;
2457 raw->osds[i] = to; 2458
2459 /* make sure replacement doesn't already appear */
2460 for (j = 0; j < raw->size; j++) {
2461 int osd = raw->osds[j];
2462
2463 if (osd == to) {
2464 exists = true;
2458 break; 2465 break;
2459 } 2466 }
2467 /* ignore mapping if target is marked out */
2468 if (osd == from && pos < 0 &&
2469 !(to != CRUSH_ITEM_NONE &&
2470 to < osdmap->max_osd &&
2471 osdmap->osd_weight[to] == 0)) {
2472 pos = j;
2473 }
2460 } 2474 }
2475 if (!exists && pos >= 0)
2476 raw->osds[pos] = to;
2461 } 2477 }
2462 } 2478 }
2463} 2479}
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index ce09f73be759..2ea0564771d2 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/module.h> 2#include <linux/module.h>
2#include <linux/gfp.h> 3#include <linux/gfp.h>
3#include <linux/slab.h> 4#include <linux/slab.h>
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 1a7c9a79a53c..a3d0adc828e6 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/ceph/ceph_debug.h> 2#include <linux/ceph/ceph_debug.h>
2 3
3#include <linux/module.h> 4#include <linux/module.h>
@@ -24,9 +25,9 @@ struct page **ceph_get_direct_page_vector(const void __user *data,
24 return ERR_PTR(-ENOMEM); 25 return ERR_PTR(-ENOMEM);
25 26
26 while (got < num_pages) { 27 while (got < num_pages) {
27 rc = get_user_pages_unlocked( 28 rc = get_user_pages_fast(
28 (unsigned long)data + ((unsigned long)got * PAGE_SIZE), 29 (unsigned long)data + ((unsigned long)got * PAGE_SIZE),
29 num_pages - got, pages + got, write_page ? FOLL_WRITE : 0); 30 num_pages - got, write_page, pages + got);
30 if (rc < 0) 31 if (rc < 0)
31 break; 32 break;
32 BUG_ON(rc == 0); 33 BUG_ON(rc == 0);
diff --git a/net/ceph/string_table.c b/net/ceph/string_table.c
index 22fb96efcf34..3191d9d160a2 100644
--- a/net/ceph/string_table.c
+++ b/net/ceph/string_table.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/slab.h> 2#include <linux/slab.h>
2#include <linux/gfp.h> 3#include <linux/gfp.h>
3#include <linux/string.h> 4#include <linux/string.h>
diff --git a/net/compat.c b/net/compat.c
index 6ded6c821d7a..22381719718c 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -185,6 +185,13 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
185 ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen); 185 ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen);
186 } 186 }
187 187
188 /*
189 * check the length of messages copied in is the same as the
190 * what we get from the first loop
191 */
192 if ((char *)kcmsg - (char *)kcmsg_base != kcmlen)
193 goto Einval;
194
188 /* Ok, looks like we made it. Hook it up and return success. */ 195 /* Ok, looks like we made it. Hook it up and return success. */
189 kmsg->msg_control = kcmsg_base; 196 kmsg->msg_control = kcmsg_base;
190 kmsg->msg_controllen = kcmlen; 197 kmsg->msg_controllen = kcmlen;
diff --git a/net/core/Makefile b/net/core/Makefile
index 56d771a887b6..1fd0a9c88b1b 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the Linux networking core. 3# Makefile for the Linux networking core.
3# 4#
diff --git a/net/core/datagram.c b/net/core/datagram.c
index f7fb7e3f2acf..522873ed120b 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * SUCS NET3: 3 * SUCS NET3:
3 * 4 *
@@ -188,7 +189,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
188 } 189 }
189 if (!skb->len) { 190 if (!skb->len) {
190 skb = skb_set_peeked(skb); 191 skb = skb_set_peeked(skb);
191 if (unlikely(IS_ERR(skb))) { 192 if (IS_ERR(skb)) {
192 *err = PTR_ERR(skb); 193 *err = PTR_ERR(skb);
193 return NULL; 194 return NULL;
194 } 195 }
diff --git a/net/core/dev.c b/net/core/dev.c
index fb766d906148..07ed21d64f92 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -145,6 +145,7 @@
145#include <linux/crash_dump.h> 145#include <linux/crash_dump.h>
146#include <linux/sctp.h> 146#include <linux/sctp.h>
147#include <net/udp_tunnel.h> 147#include <net/udp_tunnel.h>
148#include <linux/net_namespace.h>
148 149
149#include "net-sysfs.h" 150#include "net-sysfs.h"
150 151
@@ -162,7 +163,6 @@ static struct list_head offload_base __read_mostly;
162 163
163static int netif_rx_internal(struct sk_buff *skb); 164static int netif_rx_internal(struct sk_buff *skb);
164static int call_netdevice_notifiers_info(unsigned long val, 165static int call_netdevice_notifiers_info(unsigned long val,
165 struct net_device *dev,
166 struct netdev_notifier_info *info); 166 struct netdev_notifier_info *info);
167static struct napi_struct *napi_by_id(unsigned int napi_id); 167static struct napi_struct *napi_by_id(unsigned int napi_id);
168 168
@@ -188,6 +188,8 @@ static struct napi_struct *napi_by_id(unsigned int napi_id);
188DEFINE_RWLOCK(dev_base_lock); 188DEFINE_RWLOCK(dev_base_lock);
189EXPORT_SYMBOL(dev_base_lock); 189EXPORT_SYMBOL(dev_base_lock);
190 190
191static DEFINE_MUTEX(ifalias_mutex);
192
191/* protects napi_hash addition/deletion and napi_gen_id */ 193/* protects napi_hash addition/deletion and napi_gen_id */
192static DEFINE_SPINLOCK(napi_hash_lock); 194static DEFINE_SPINLOCK(napi_hash_lock);
193 195
@@ -1062,7 +1064,10 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
1062 unsigned long *inuse; 1064 unsigned long *inuse;
1063 struct net_device *d; 1065 struct net_device *d;
1064 1066
1065 p = strnchr(name, IFNAMSIZ-1, '%'); 1067 if (!dev_valid_name(name))
1068 return -EINVAL;
1069
1070 p = strchr(name, '%');
1066 if (p) { 1071 if (p) {
1067 /* 1072 /*
1068 * Verify the string as this thing may have come from 1073 * Verify the string as this thing may have come from
@@ -1093,8 +1098,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
1093 free_page((unsigned long) inuse); 1098 free_page((unsigned long) inuse);
1094 } 1099 }
1095 1100
1096 if (buf != name) 1101 snprintf(buf, IFNAMSIZ, name, i);
1097 snprintf(buf, IFNAMSIZ, name, i);
1098 if (!__dev_get_by_name(net, buf)) 1102 if (!__dev_get_by_name(net, buf))
1099 return i; 1103 return i;
1100 1104
@@ -1102,7 +1106,21 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
1102 * when the name is long and there isn't enough space left 1106 * when the name is long and there isn't enough space left
1103 * for the digits, or if all bits are used. 1107 * for the digits, or if all bits are used.
1104 */ 1108 */
1105 return -ENFILE; 1109 return p ? -ENFILE : -EEXIST;
1110}
1111
1112static int dev_alloc_name_ns(struct net *net,
1113 struct net_device *dev,
1114 const char *name)
1115{
1116 char buf[IFNAMSIZ];
1117 int ret;
1118
1119 BUG_ON(!net);
1120 ret = __dev_alloc_name(net, name, buf);
1121 if (ret >= 0)
1122 strlcpy(dev->name, buf, IFNAMSIZ);
1123 return ret;
1106} 1124}
1107 1125
1108/** 1126/**
@@ -1121,50 +1139,16 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
1121 1139
1122int dev_alloc_name(struct net_device *dev, const char *name) 1140int dev_alloc_name(struct net_device *dev, const char *name)
1123{ 1141{
1124 char buf[IFNAMSIZ]; 1142 return dev_alloc_name_ns(dev_net(dev), dev, name);
1125 struct net *net;
1126 int ret;
1127
1128 BUG_ON(!dev_net(dev));
1129 net = dev_net(dev);
1130 ret = __dev_alloc_name(net, name, buf);
1131 if (ret >= 0)
1132 strlcpy(dev->name, buf, IFNAMSIZ);
1133 return ret;
1134} 1143}
1135EXPORT_SYMBOL(dev_alloc_name); 1144EXPORT_SYMBOL(dev_alloc_name);
1136 1145
1137static int dev_alloc_name_ns(struct net *net, 1146int dev_get_valid_name(struct net *net, struct net_device *dev,
1138 struct net_device *dev, 1147 const char *name)
1139 const char *name)
1140{ 1148{
1141 char buf[IFNAMSIZ]; 1149 return dev_alloc_name_ns(net, dev, name);
1142 int ret;
1143
1144 ret = __dev_alloc_name(net, name, buf);
1145 if (ret >= 0)
1146 strlcpy(dev->name, buf, IFNAMSIZ);
1147 return ret;
1148}
1149
1150static int dev_get_valid_name(struct net *net,
1151 struct net_device *dev,
1152 const char *name)
1153{
1154 BUG_ON(!net);
1155
1156 if (!dev_valid_name(name))
1157 return -EINVAL;
1158
1159 if (strchr(name, '%'))
1160 return dev_alloc_name_ns(net, dev, name);
1161 else if (__dev_get_by_name(net, name))
1162 return -EEXIST;
1163 else if (dev->name != name)
1164 strlcpy(dev->name, name, IFNAMSIZ);
1165
1166 return 0;
1167} 1150}
1151EXPORT_SYMBOL(dev_get_valid_name);
1168 1152
1169/** 1153/**
1170 * dev_change_name - change name of a device 1154 * dev_change_name - change name of a device
@@ -1265,29 +1249,53 @@ rollback:
1265 */ 1249 */
1266int dev_set_alias(struct net_device *dev, const char *alias, size_t len) 1250int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1267{ 1251{
1268 char *new_ifalias; 1252 struct dev_ifalias *new_alias = NULL;
1269
1270 ASSERT_RTNL();
1271 1253
1272 if (len >= IFALIASZ) 1254 if (len >= IFALIASZ)
1273 return -EINVAL; 1255 return -EINVAL;
1274 1256
1275 if (!len) { 1257 if (len) {
1276 kfree(dev->ifalias); 1258 new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL);
1277 dev->ifalias = NULL; 1259 if (!new_alias)
1278 return 0; 1260 return -ENOMEM;
1261
1262 memcpy(new_alias->ifalias, alias, len);
1263 new_alias->ifalias[len] = 0;
1279 } 1264 }
1280 1265
1281 new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); 1266 mutex_lock(&ifalias_mutex);
1282 if (!new_ifalias) 1267 rcu_swap_protected(dev->ifalias, new_alias,
1283 return -ENOMEM; 1268 mutex_is_locked(&ifalias_mutex));
1284 dev->ifalias = new_ifalias; 1269 mutex_unlock(&ifalias_mutex);
1285 memcpy(dev->ifalias, alias, len); 1270
1286 dev->ifalias[len] = 0; 1271 if (new_alias)
1272 kfree_rcu(new_alias, rcuhead);
1287 1273
1288 return len; 1274 return len;
1289} 1275}
1290 1276
1277/**
1278 * dev_get_alias - get ifalias of a device
1279 * @dev: device
1280 * @name: buffer to store name of ifalias
1281 * @len: size of buffer
1282 *
1283 * get ifalias for a device. Caller must make sure dev cannot go
1284 * away, e.g. rcu read lock or own a reference count to device.
1285 */
1286int dev_get_alias(const struct net_device *dev, char *name, size_t len)
1287{
1288 const struct dev_ifalias *alias;
1289 int ret = 0;
1290
1291 rcu_read_lock();
1292 alias = rcu_dereference(dev->ifalias);
1293 if (alias)
1294 ret = snprintf(name, len, "%s", alias->ifalias);
1295 rcu_read_unlock();
1296
1297 return ret;
1298}
1291 1299
1292/** 1300/**
1293 * netdev_features_change - device changes features 1301 * netdev_features_change - device changes features
@@ -1312,10 +1320,11 @@ EXPORT_SYMBOL(netdev_features_change);
1312void netdev_state_change(struct net_device *dev) 1320void netdev_state_change(struct net_device *dev)
1313{ 1321{
1314 if (dev->flags & IFF_UP) { 1322 if (dev->flags & IFF_UP) {
1315 struct netdev_notifier_change_info change_info; 1323 struct netdev_notifier_change_info change_info = {
1324 .info.dev = dev,
1325 };
1316 1326
1317 change_info.flags_changed = 0; 1327 call_netdevice_notifiers_info(NETDEV_CHANGE,
1318 call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
1319 &change_info.info); 1328 &change_info.info);
1320 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); 1329 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
1321 } 1330 }
@@ -1536,9 +1545,10 @@ EXPORT_SYMBOL(dev_disable_lro);
1536static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val, 1545static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
1537 struct net_device *dev) 1546 struct net_device *dev)
1538{ 1547{
1539 struct netdev_notifier_info info; 1548 struct netdev_notifier_info info = {
1549 .dev = dev,
1550 };
1540 1551
1541 netdev_notifier_info_init(&info, dev);
1542 return nb->notifier_call(nb, val, &info); 1552 return nb->notifier_call(nb, val, &info);
1543} 1553}
1544 1554
@@ -1663,11 +1673,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
1663 */ 1673 */
1664 1674
1665static int call_netdevice_notifiers_info(unsigned long val, 1675static int call_netdevice_notifiers_info(unsigned long val,
1666 struct net_device *dev,
1667 struct netdev_notifier_info *info) 1676 struct netdev_notifier_info *info)
1668{ 1677{
1669 ASSERT_RTNL(); 1678 ASSERT_RTNL();
1670 netdev_notifier_info_init(info, dev);
1671 return raw_notifier_call_chain(&netdev_chain, val, info); 1679 return raw_notifier_call_chain(&netdev_chain, val, info);
1672} 1680}
1673 1681
@@ -1682,9 +1690,11 @@ static int call_netdevice_notifiers_info(unsigned long val,
1682 1690
1683int call_netdevice_notifiers(unsigned long val, struct net_device *dev) 1691int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1684{ 1692{
1685 struct netdev_notifier_info info; 1693 struct netdev_notifier_info info = {
1694 .dev = dev,
1695 };
1686 1696
1687 return call_netdevice_notifiers_info(val, dev, &info); 1697 return call_netdevice_notifiers_info(val, &info);
1688} 1698}
1689EXPORT_SYMBOL(call_netdevice_notifiers); 1699EXPORT_SYMBOL(call_netdevice_notifiers);
1690 1700
@@ -1948,8 +1958,12 @@ again:
1948 goto again; 1958 goto again;
1949 } 1959 }
1950out_unlock: 1960out_unlock:
1951 if (pt_prev) 1961 if (pt_prev) {
1952 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); 1962 if (!skb_orphan_frags_rx(skb2, GFP_ATOMIC))
1963 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
1964 else
1965 kfree_skb(skb2);
1966 }
1953 rcu_read_unlock(); 1967 rcu_read_unlock();
1954} 1968}
1955EXPORT_SYMBOL_GPL(dev_queue_xmit_nit); 1969EXPORT_SYMBOL_GPL(dev_queue_xmit_nit);
@@ -2008,6 +2022,7 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
2008 2022
2009 return 0; 2023 return 0;
2010} 2024}
2025EXPORT_SYMBOL(netdev_txq_to_tc);
2011 2026
2012#ifdef CONFIG_XPS 2027#ifdef CONFIG_XPS
2013static DEFINE_MUTEX(xps_map_mutex); 2028static DEFINE_MUTEX(xps_map_mutex);
@@ -2731,7 +2746,8 @@ EXPORT_SYMBOL(skb_mac_gso_segment);
2731static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) 2746static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
2732{ 2747{
2733 if (tx_path) 2748 if (tx_path)
2734 return skb->ip_summed != CHECKSUM_PARTIAL; 2749 return skb->ip_summed != CHECKSUM_PARTIAL &&
2750 skb->ip_summed != CHECKSUM_UNNECESSARY;
2735 2751
2736 return skb->ip_summed == CHECKSUM_NONE; 2752 return skb->ip_summed == CHECKSUM_NONE;
2737} 2753}
@@ -3241,22 +3257,22 @@ EXPORT_SYMBOL(dev_loopback_xmit);
3241static struct sk_buff * 3257static struct sk_buff *
3242sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) 3258sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
3243{ 3259{
3244 struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list); 3260 struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
3245 struct tcf_result cl_res; 3261 struct tcf_result cl_res;
3246 3262
3247 if (!cl) 3263 if (!miniq)
3248 return skb; 3264 return skb;
3249 3265
3250 /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ 3266 /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
3251 qdisc_bstats_cpu_update(cl->q, skb); 3267 mini_qdisc_bstats_cpu_update(miniq, skb);
3252 3268
3253 switch (tcf_classify(skb, cl, &cl_res, false)) { 3269 switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
3254 case TC_ACT_OK: 3270 case TC_ACT_OK:
3255 case TC_ACT_RECLASSIFY: 3271 case TC_ACT_RECLASSIFY:
3256 skb->tc_index = TC_H_MIN(cl_res.classid); 3272 skb->tc_index = TC_H_MIN(cl_res.classid);
3257 break; 3273 break;
3258 case TC_ACT_SHOT: 3274 case TC_ACT_SHOT:
3259 qdisc_qstats_cpu_drop(cl->q); 3275 mini_qdisc_qstats_cpu_drop(miniq);
3260 *ret = NET_XMIT_DROP; 3276 *ret = NET_XMIT_DROP;
3261 kfree_skb(skb); 3277 kfree_skb(skb);
3262 return NULL; 3278 return NULL;
@@ -3721,7 +3737,7 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
3721 flow_table = rcu_dereference(rxqueue->rps_flow_table); 3737 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3722 if (flow_table && flow_id <= flow_table->mask) { 3738 if (flow_table && flow_id <= flow_table->mask) {
3723 rflow = &flow_table->flows[flow_id]; 3739 rflow = &flow_table->flows[flow_id];
3724 cpu = ACCESS_ONCE(rflow->cpu); 3740 cpu = READ_ONCE(rflow->cpu);
3725 if (rflow->filter == filter_id && cpu < nr_cpu_ids && 3741 if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
3726 ((int)(per_cpu(softnet_data, cpu).input_queue_head - 3742 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
3727 rflow->last_qtail) < 3743 rflow->last_qtail) <
@@ -3860,8 +3876,8 @@ drop:
3860static u32 netif_receive_generic_xdp(struct sk_buff *skb, 3876static u32 netif_receive_generic_xdp(struct sk_buff *skb,
3861 struct bpf_prog *xdp_prog) 3877 struct bpf_prog *xdp_prog)
3862{ 3878{
3879 u32 metalen, act = XDP_DROP;
3863 struct xdp_buff xdp; 3880 struct xdp_buff xdp;
3864 u32 act = XDP_DROP;
3865 void *orig_data; 3881 void *orig_data;
3866 int hlen, off; 3882 int hlen, off;
3867 u32 mac_len; 3883 u32 mac_len;
@@ -3872,8 +3888,25 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
3872 if (skb_cloned(skb)) 3888 if (skb_cloned(skb))
3873 return XDP_PASS; 3889 return XDP_PASS;
3874 3890
3875 if (skb_linearize(skb)) 3891 /* XDP packets must be linear and must have sufficient headroom
3876 goto do_drop; 3892 * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
3893 * native XDP provides, thus we need to do it here as well.
3894 */
3895 if (skb_is_nonlinear(skb) ||
3896 skb_headroom(skb) < XDP_PACKET_HEADROOM) {
3897 int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
3898 int troom = skb->tail + skb->data_len - skb->end;
3899
3900 /* In case we have to go down the path and also linearize,
3901 * then lets do the pskb_expand_head() work just once here.
3902 */
3903 if (pskb_expand_head(skb,
3904 hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
3905 troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
3906 goto do_drop;
3907 if (troom > 0 && __skb_linearize(skb))
3908 goto do_drop;
3909 }
3877 3910
3878 /* The XDP program wants to see the packet starting at the MAC 3911 /* The XDP program wants to see the packet starting at the MAC
3879 * header. 3912 * header.
@@ -3881,6 +3914,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
3881 mac_len = skb->data - skb_mac_header(skb); 3914 mac_len = skb->data - skb_mac_header(skb);
3882 hlen = skb_headlen(skb) + mac_len; 3915 hlen = skb_headlen(skb) + mac_len;
3883 xdp.data = skb->data - mac_len; 3916 xdp.data = skb->data - mac_len;
3917 xdp.data_meta = xdp.data;
3884 xdp.data_end = xdp.data + hlen; 3918 xdp.data_end = xdp.data + hlen;
3885 xdp.data_hard_start = skb->data - skb_headroom(skb); 3919 xdp.data_hard_start = skb->data - skb_headroom(skb);
3886 orig_data = xdp.data; 3920 orig_data = xdp.data;
@@ -3892,15 +3926,18 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
3892 __skb_pull(skb, off); 3926 __skb_pull(skb, off);
3893 else if (off < 0) 3927 else if (off < 0)
3894 __skb_push(skb, -off); 3928 __skb_push(skb, -off);
3929 skb->mac_header += off;
3895 3930
3896 switch (act) { 3931 switch (act) {
3897 case XDP_REDIRECT: 3932 case XDP_REDIRECT:
3898 case XDP_TX: 3933 case XDP_TX:
3899 __skb_push(skb, mac_len); 3934 __skb_push(skb, mac_len);
3900 /* fall through */ 3935 break;
3901 case XDP_PASS: 3936 case XDP_PASS:
3937 metalen = xdp.data - xdp.data_meta;
3938 if (metalen)
3939 skb_metadata_set(skb, metalen);
3902 break; 3940 break;
3903
3904 default: 3941 default:
3905 bpf_warn_invalid_xdp_action(act); 3942 bpf_warn_invalid_xdp_action(act);
3906 /* fall through */ 3943 /* fall through */
@@ -4135,7 +4172,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
4135 struct net_device *orig_dev) 4172 struct net_device *orig_dev)
4136{ 4173{
4137#ifdef CONFIG_NET_CLS_ACT 4174#ifdef CONFIG_NET_CLS_ACT
4138 struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list); 4175 struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
4139 struct tcf_result cl_res; 4176 struct tcf_result cl_res;
4140 4177
4141 /* If there's at least one ingress present somewhere (so 4178 /* If there's at least one ingress present somewhere (so
@@ -4143,8 +4180,9 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
4143 * that are not configured with an ingress qdisc will bail 4180 * that are not configured with an ingress qdisc will bail
4144 * out here. 4181 * out here.
4145 */ 4182 */
4146 if (!cl) 4183 if (!miniq)
4147 return skb; 4184 return skb;
4185
4148 if (*pt_prev) { 4186 if (*pt_prev) {
4149 *ret = deliver_skb(skb, *pt_prev, orig_dev); 4187 *ret = deliver_skb(skb, *pt_prev, orig_dev);
4150 *pt_prev = NULL; 4188 *pt_prev = NULL;
@@ -4152,15 +4190,15 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
4152 4190
4153 qdisc_skb_cb(skb)->pkt_len = skb->len; 4191 qdisc_skb_cb(skb)->pkt_len = skb->len;
4154 skb->tc_at_ingress = 1; 4192 skb->tc_at_ingress = 1;
4155 qdisc_bstats_cpu_update(cl->q, skb); 4193 mini_qdisc_bstats_cpu_update(miniq, skb);
4156 4194
4157 switch (tcf_classify(skb, cl, &cl_res, false)) { 4195 switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
4158 case TC_ACT_OK: 4196 case TC_ACT_OK:
4159 case TC_ACT_RECLASSIFY: 4197 case TC_ACT_RECLASSIFY:
4160 skb->tc_index = TC_H_MIN(cl_res.classid); 4198 skb->tc_index = TC_H_MIN(cl_res.classid);
4161 break; 4199 break;
4162 case TC_ACT_SHOT: 4200 case TC_ACT_SHOT:
4163 qdisc_qstats_cpu_drop(cl->q); 4201 mini_qdisc_qstats_cpu_drop(miniq);
4164 kfree_skb(skb); 4202 kfree_skb(skb);
4165 return NULL; 4203 return NULL;
4166 case TC_ACT_STOLEN: 4204 case TC_ACT_STOLEN:
@@ -4438,6 +4476,33 @@ out:
4438 return ret; 4476 return ret;
4439} 4477}
4440 4478
4479/**
4480 * netif_receive_skb_core - special purpose version of netif_receive_skb
4481 * @skb: buffer to process
4482 *
4483 * More direct receive version of netif_receive_skb(). It should
4484 * only be used by callers that have a need to skip RPS and Generic XDP.
4485 * Caller must also take care of handling if (page_is_)pfmemalloc.
4486 *
4487 * This function may only be called from softirq context and interrupts
4488 * should be enabled.
4489 *
4490 * Return values (usually ignored):
4491 * NET_RX_SUCCESS: no congestion
4492 * NET_RX_DROP: packet was dropped
4493 */
4494int netif_receive_skb_core(struct sk_buff *skb)
4495{
4496 int ret;
4497
4498 rcu_read_lock();
4499 ret = __netif_receive_skb_core(skb, false);
4500 rcu_read_unlock();
4501
4502 return ret;
4503}
4504EXPORT_SYMBOL(netif_receive_skb_core);
4505
4441static int __netif_receive_skb(struct sk_buff *skb) 4506static int __netif_receive_skb(struct sk_buff *skb)
4442{ 4507{
4443 int ret; 4508 int ret;
@@ -4463,7 +4528,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
4463 return ret; 4528 return ret;
4464} 4529}
4465 4530
4466static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp) 4531static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
4467{ 4532{
4468 struct bpf_prog *old = rtnl_dereference(dev->xdp_prog); 4533 struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
4469 struct bpf_prog *new = xdp->prog; 4534 struct bpf_prog *new = xdp->prog;
@@ -4690,6 +4755,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
4690 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; 4755 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
4691 diffs |= p->vlan_tci ^ skb->vlan_tci; 4756 diffs |= p->vlan_tci ^ skb->vlan_tci;
4692 diffs |= skb_metadata_dst_cmp(p, skb); 4757 diffs |= skb_metadata_dst_cmp(p, skb);
4758 diffs |= skb_metadata_differs(p, skb);
4693 if (maclen == ETH_HLEN) 4759 if (maclen == ETH_HLEN)
4694 diffs |= compare_ether_header(skb_mac_header(p), 4760 diffs |= compare_ether_header(skb_mac_header(p),
4695 skb_mac_header(skb)); 4761 skb_mac_header(skb));
@@ -6223,9 +6289,19 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
6223 6289
6224static int __netdev_upper_dev_link(struct net_device *dev, 6290static int __netdev_upper_dev_link(struct net_device *dev,
6225 struct net_device *upper_dev, bool master, 6291 struct net_device *upper_dev, bool master,
6226 void *upper_priv, void *upper_info) 6292 void *upper_priv, void *upper_info,
6227{ 6293 struct netlink_ext_ack *extack)
6228 struct netdev_notifier_changeupper_info changeupper_info; 6294{
6295 struct netdev_notifier_changeupper_info changeupper_info = {
6296 .info = {
6297 .dev = dev,
6298 .extack = extack,
6299 },
6300 .upper_dev = upper_dev,
6301 .master = master,
6302 .linking = true,
6303 .upper_info = upper_info,
6304 };
6229 int ret = 0; 6305 int ret = 0;
6230 6306
6231 ASSERT_RTNL(); 6307 ASSERT_RTNL();
@@ -6243,12 +6319,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
6243 if (master && netdev_master_upper_dev_get(dev)) 6319 if (master && netdev_master_upper_dev_get(dev))
6244 return -EBUSY; 6320 return -EBUSY;
6245 6321
6246 changeupper_info.upper_dev = upper_dev; 6322 ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
6247 changeupper_info.master = master;
6248 changeupper_info.linking = true;
6249 changeupper_info.upper_info = upper_info;
6250
6251 ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
6252 &changeupper_info.info); 6323 &changeupper_info.info);
6253 ret = notifier_to_errno(ret); 6324 ret = notifier_to_errno(ret);
6254 if (ret) 6325 if (ret)
@@ -6259,7 +6330,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
6259 if (ret) 6330 if (ret)
6260 return ret; 6331 return ret;
6261 6332
6262 ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, 6333 ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
6263 &changeupper_info.info); 6334 &changeupper_info.info);
6264 ret = notifier_to_errno(ret); 6335 ret = notifier_to_errno(ret);
6265 if (ret) 6336 if (ret)
@@ -6284,9 +6355,11 @@ rollback:
6284 * returns zero. 6355 * returns zero.
6285 */ 6356 */
6286int netdev_upper_dev_link(struct net_device *dev, 6357int netdev_upper_dev_link(struct net_device *dev,
6287 struct net_device *upper_dev) 6358 struct net_device *upper_dev,
6359 struct netlink_ext_ack *extack)
6288{ 6360{
6289 return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL); 6361 return __netdev_upper_dev_link(dev, upper_dev, false,
6362 NULL, NULL, extack);
6290} 6363}
6291EXPORT_SYMBOL(netdev_upper_dev_link); 6364EXPORT_SYMBOL(netdev_upper_dev_link);
6292 6365
@@ -6305,10 +6378,11 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
6305 */ 6378 */
6306int netdev_master_upper_dev_link(struct net_device *dev, 6379int netdev_master_upper_dev_link(struct net_device *dev,
6307 struct net_device *upper_dev, 6380 struct net_device *upper_dev,
6308 void *upper_priv, void *upper_info) 6381 void *upper_priv, void *upper_info,
6382 struct netlink_ext_ack *extack)
6309{ 6383{
6310 return __netdev_upper_dev_link(dev, upper_dev, true, 6384 return __netdev_upper_dev_link(dev, upper_dev, true,
6311 upper_priv, upper_info); 6385 upper_priv, upper_info, extack);
6312} 6386}
6313EXPORT_SYMBOL(netdev_master_upper_dev_link); 6387EXPORT_SYMBOL(netdev_master_upper_dev_link);
6314 6388
@@ -6323,20 +6397,24 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link);
6323void netdev_upper_dev_unlink(struct net_device *dev, 6397void netdev_upper_dev_unlink(struct net_device *dev,
6324 struct net_device *upper_dev) 6398 struct net_device *upper_dev)
6325{ 6399{
6326 struct netdev_notifier_changeupper_info changeupper_info; 6400 struct netdev_notifier_changeupper_info changeupper_info = {
6401 .info = {
6402 .dev = dev,
6403 },
6404 .upper_dev = upper_dev,
6405 .linking = false,
6406 };
6327 6407
6328 ASSERT_RTNL(); 6408 ASSERT_RTNL();
6329 6409
6330 changeupper_info.upper_dev = upper_dev;
6331 changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev; 6410 changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
6332 changeupper_info.linking = false;
6333 6411
6334 call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, 6412 call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
6335 &changeupper_info.info); 6413 &changeupper_info.info);
6336 6414
6337 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); 6415 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
6338 6416
6339 call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, 6417 call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
6340 &changeupper_info.info); 6418 &changeupper_info.info);
6341} 6419}
6342EXPORT_SYMBOL(netdev_upper_dev_unlink); 6420EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -6352,11 +6430,13 @@ EXPORT_SYMBOL(netdev_upper_dev_unlink);
6352void netdev_bonding_info_change(struct net_device *dev, 6430void netdev_bonding_info_change(struct net_device *dev,
6353 struct netdev_bonding_info *bonding_info) 6431 struct netdev_bonding_info *bonding_info)
6354{ 6432{
6355 struct netdev_notifier_bonding_info info; 6433 struct netdev_notifier_bonding_info info = {
6434 .info.dev = dev,
6435 };
6356 6436
6357 memcpy(&info.bonding_info, bonding_info, 6437 memcpy(&info.bonding_info, bonding_info,
6358 sizeof(struct netdev_bonding_info)); 6438 sizeof(struct netdev_bonding_info));
6359 call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev, 6439 call_netdevice_notifiers_info(NETDEV_BONDING_INFO,
6360 &info.info); 6440 &info.info);
6361} 6441}
6362EXPORT_SYMBOL(netdev_bonding_info_change); 6442EXPORT_SYMBOL(netdev_bonding_info_change);
@@ -6482,11 +6562,13 @@ EXPORT_SYMBOL(dev_get_nest_level);
6482void netdev_lower_state_changed(struct net_device *lower_dev, 6562void netdev_lower_state_changed(struct net_device *lower_dev,
6483 void *lower_state_info) 6563 void *lower_state_info)
6484{ 6564{
6485 struct netdev_notifier_changelowerstate_info changelowerstate_info; 6565 struct netdev_notifier_changelowerstate_info changelowerstate_info = {
6566 .info.dev = lower_dev,
6567 };
6486 6568
6487 ASSERT_RTNL(); 6569 ASSERT_RTNL();
6488 changelowerstate_info.lower_state_info = lower_state_info; 6570 changelowerstate_info.lower_state_info = lower_state_info;
6489 call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev, 6571 call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE,
6490 &changelowerstate_info.info); 6572 &changelowerstate_info.info);
6491} 6573}
6492EXPORT_SYMBOL(netdev_lower_state_changed); 6574EXPORT_SYMBOL(netdev_lower_state_changed);
@@ -6777,11 +6859,14 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
6777 6859
6778 if (dev->flags & IFF_UP && 6860 if (dev->flags & IFF_UP &&
6779 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) { 6861 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
6780 struct netdev_notifier_change_info change_info; 6862 struct netdev_notifier_change_info change_info = {
6863 .info = {
6864 .dev = dev,
6865 },
6866 .flags_changed = changes,
6867 };
6781 6868
6782 change_info.flags_changed = changes; 6869 call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info);
6783 call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
6784 &change_info.info);
6785 } 6870 }
6786} 6871}
6787 6872
@@ -6988,26 +7073,26 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
6988} 7073}
6989EXPORT_SYMBOL(dev_change_proto_down); 7074EXPORT_SYMBOL(dev_change_proto_down);
6990 7075
6991u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id) 7076u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op, u32 *prog_id)
6992{ 7077{
6993 struct netdev_xdp xdp; 7078 struct netdev_bpf xdp;
6994 7079
6995 memset(&xdp, 0, sizeof(xdp)); 7080 memset(&xdp, 0, sizeof(xdp));
6996 xdp.command = XDP_QUERY_PROG; 7081 xdp.command = XDP_QUERY_PROG;
6997 7082
6998 /* Query must always succeed. */ 7083 /* Query must always succeed. */
6999 WARN_ON(xdp_op(dev, &xdp) < 0); 7084 WARN_ON(bpf_op(dev, &xdp) < 0);
7000 if (prog_id) 7085 if (prog_id)
7001 *prog_id = xdp.prog_id; 7086 *prog_id = xdp.prog_id;
7002 7087
7003 return xdp.prog_attached; 7088 return xdp.prog_attached;
7004} 7089}
7005 7090
7006static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op, 7091static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
7007 struct netlink_ext_ack *extack, u32 flags, 7092 struct netlink_ext_ack *extack, u32 flags,
7008 struct bpf_prog *prog) 7093 struct bpf_prog *prog)
7009{ 7094{
7010 struct netdev_xdp xdp; 7095 struct netdev_bpf xdp;
7011 7096
7012 memset(&xdp, 0, sizeof(xdp)); 7097 memset(&xdp, 0, sizeof(xdp));
7013 if (flags & XDP_FLAGS_HW_MODE) 7098 if (flags & XDP_FLAGS_HW_MODE)
@@ -7018,7 +7103,7 @@ static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
7018 xdp.flags = flags; 7103 xdp.flags = flags;
7019 xdp.prog = prog; 7104 xdp.prog = prog;
7020 7105
7021 return xdp_op(dev, &xdp); 7106 return bpf_op(dev, &xdp);
7022} 7107}
7023 7108
7024/** 7109/**
@@ -7035,32 +7120,40 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
7035{ 7120{
7036 const struct net_device_ops *ops = dev->netdev_ops; 7121 const struct net_device_ops *ops = dev->netdev_ops;
7037 struct bpf_prog *prog = NULL; 7122 struct bpf_prog *prog = NULL;
7038 xdp_op_t xdp_op, xdp_chk; 7123 bpf_op_t bpf_op, bpf_chk;
7039 int err; 7124 int err;
7040 7125
7041 ASSERT_RTNL(); 7126 ASSERT_RTNL();
7042 7127
7043 xdp_op = xdp_chk = ops->ndo_xdp; 7128 bpf_op = bpf_chk = ops->ndo_bpf;
7044 if (!xdp_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) 7129 if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
7045 return -EOPNOTSUPP; 7130 return -EOPNOTSUPP;
7046 if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE)) 7131 if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE))
7047 xdp_op = generic_xdp_install; 7132 bpf_op = generic_xdp_install;
7048 if (xdp_op == xdp_chk) 7133 if (bpf_op == bpf_chk)
7049 xdp_chk = generic_xdp_install; 7134 bpf_chk = generic_xdp_install;
7050 7135
7051 if (fd >= 0) { 7136 if (fd >= 0) {
7052 if (xdp_chk && __dev_xdp_attached(dev, xdp_chk, NULL)) 7137 if (bpf_chk && __dev_xdp_attached(dev, bpf_chk, NULL))
7053 return -EEXIST; 7138 return -EEXIST;
7054 if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && 7139 if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
7055 __dev_xdp_attached(dev, xdp_op, NULL)) 7140 __dev_xdp_attached(dev, bpf_op, NULL))
7056 return -EBUSY; 7141 return -EBUSY;
7057 7142
7058 prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); 7143 prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
7144 bpf_op == ops->ndo_bpf);
7059 if (IS_ERR(prog)) 7145 if (IS_ERR(prog))
7060 return PTR_ERR(prog); 7146 return PTR_ERR(prog);
7147
7148 if (!(flags & XDP_FLAGS_HW_MODE) &&
7149 bpf_prog_is_dev_bound(prog->aux)) {
7150 NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported");
7151 bpf_prog_put(prog);
7152 return -EINVAL;
7153 }
7061 } 7154 }
7062 7155
7063 err = dev_xdp_install(dev, xdp_op, extack, flags, prog); 7156 err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
7064 if (err < 0 && prog) 7157 if (err < 0 && prog)
7065 bpf_prog_put(prog); 7158 bpf_prog_put(prog);
7066 7159
@@ -7152,7 +7245,7 @@ static void rollback_registered_many(struct list_head *head)
7152 if (!dev->rtnl_link_ops || 7245 if (!dev->rtnl_link_ops ||
7153 dev->rtnl_link_state == RTNL_LINK_INITIALIZED) 7246 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
7154 skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, 7247 skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
7155 GFP_KERNEL); 7248 GFP_KERNEL, NULL);
7156 7249
7157 /* 7250 /*
7158 * Flush the unicast and multicast chains 7251 * Flush the unicast and multicast chains
@@ -7989,7 +8082,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
7989 unsigned int txqs, unsigned int rxqs) 8082 unsigned int txqs, unsigned int rxqs)
7990{ 8083{
7991 struct net_device *dev; 8084 struct net_device *dev;
7992 size_t alloc_size; 8085 unsigned int alloc_size;
7993 struct net_device *p; 8086 struct net_device *p;
7994 8087
7995 BUG_ON(strlen(name) >= sizeof(dev->name)); 8088 BUG_ON(strlen(name) >= sizeof(dev->name));
@@ -8239,7 +8332,7 @@ EXPORT_SYMBOL(unregister_netdev);
8239 8332
8240int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) 8333int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
8241{ 8334{
8242 int err; 8335 int err, new_nsid;
8243 8336
8244 ASSERT_RTNL(); 8337 ASSERT_RTNL();
8245 8338
@@ -8295,7 +8388,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
8295 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 8388 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
8296 rcu_barrier(); 8389 rcu_barrier();
8297 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); 8390 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
8298 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL); 8391 if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net)
8392 new_nsid = peernet2id_alloc(dev_net(dev), net);
8393 else
8394 new_nsid = peernet2id(dev_net(dev), net);
8395 rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid);
8299 8396
8300 /* 8397 /*
8301 * Flush the unicast and multicast chains 8398 * Flush the unicast and multicast chains
@@ -8557,6 +8654,8 @@ static void __net_exit netdev_exit(struct net *net)
8557{ 8654{
8558 kfree(net->dev_name_head); 8655 kfree(net->dev_name_head);
8559 kfree(net->dev_index_head); 8656 kfree(net->dev_index_head);
8657 if (net != &init_net)
8658 WARN_ON_ONCE(!list_empty(&net->dev_base_head));
8560} 8659}
8561 8660
8562static struct pernet_operations __net_initdata netdev_net_ops = { 8661static struct pernet_operations __net_initdata netdev_net_ops = {
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 709a4e6fb447..7e690d0ccd05 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/kmod.h> 2#include <linux/kmod.h>
2#include <linux/netdevice.h> 3#include <linux/netdevice.h>
3#include <linux/etherdevice.h> 4#include <linux/etherdevice.h>
@@ -303,7 +304,18 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
303 case SIOCSIFTXQLEN: 304 case SIOCSIFTXQLEN:
304 if (ifr->ifr_qlen < 0) 305 if (ifr->ifr_qlen < 0)
305 return -EINVAL; 306 return -EINVAL;
306 dev->tx_queue_len = ifr->ifr_qlen; 307 if (dev->tx_queue_len ^ ifr->ifr_qlen) {
308 unsigned int orig_len = dev->tx_queue_len;
309
310 dev->tx_queue_len = ifr->ifr_qlen;
311 err = call_netdevice_notifiers(
312 NETDEV_CHANGE_TX_QUEUE_LEN, dev);
313 err = notifier_to_errno(err);
314 if (err) {
315 dev->tx_queue_len = orig_len;
316 return err;
317 }
318 }
307 return 0; 319 return 0;
308 320
309 case SIOCSIFNAME: 321 case SIOCSIFNAME:
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 70ccda233bd1..c7785efeea57 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -144,9 +144,9 @@ static void send_dm_alert(struct work_struct *work)
144 * in the event that more drops will arrive during the 144 * in the event that more drops will arrive during the
145 * hysteresis period. 145 * hysteresis period.
146 */ 146 */
147static void sched_send_work(unsigned long _data) 147static void sched_send_work(struct timer_list *t)
148{ 148{
149 struct per_cpu_dm_data *data = (struct per_cpu_dm_data *)_data; 149 struct per_cpu_dm_data *data = from_timer(data, t, send_timer);
150 150
151 schedule_work(&data->dm_alert_work); 151 schedule_work(&data->dm_alert_work);
152} 152}
@@ -412,8 +412,7 @@ static int __init init_net_drop_monitor(void)
412 for_each_possible_cpu(cpu) { 412 for_each_possible_cpu(cpu) {
413 data = &per_cpu(dm_cpu_data, cpu); 413 data = &per_cpu(dm_cpu_data, cpu);
414 INIT_WORK(&data->dm_alert_work, send_dm_alert); 414 INIT_WORK(&data->dm_alert_work, send_dm_alert);
415 setup_timer(&data->send_timer, sched_send_work, 415 timer_setup(&data->send_timer, sched_send_work, 0);
416 (unsigned long)data);
417 spin_lock_init(&data->lock); 416 spin_lock_init(&data->lock);
418 reset_per_cpu_data(data); 417 reset_per_cpu_data(data);
419 } 418 }
diff --git a/net/core/dst.c b/net/core/dst.c
index a6c47da7d0f8..662a2d4a3d19 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -322,3 +322,19 @@ metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags)
322 return md_dst; 322 return md_dst;
323} 323}
324EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu); 324EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu);
325
326void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst)
327{
328#ifdef CONFIG_DST_CACHE
329 int cpu;
330
331 for_each_possible_cpu(cpu) {
332 struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu);
333
334 if (one_md_dst->type == METADATA_IP_TUNNEL)
335 dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache);
336 }
337#endif
338 free_percpu(md_dst);
339}
340EXPORT_SYMBOL_GPL(metadata_dst_free_percpu);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 6a582ae4c5d9..f8fcf450a36e 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -403,6 +403,22 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)
403 return 0; 403 return 0;
404} 404}
405 405
406/* Given two link masks, AND them together and save the result in dst. */
407void ethtool_intersect_link_masks(struct ethtool_link_ksettings *dst,
408 struct ethtool_link_ksettings *src)
409{
410 unsigned int size = BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS);
411 unsigned int idx = 0;
412
413 for (; idx < size; idx++) {
414 dst->link_modes.supported[idx] &=
415 src->link_modes.supported[idx];
416 dst->link_modes.advertising[idx] &=
417 src->link_modes.advertising[idx];
418 }
419}
420EXPORT_SYMBOL(ethtool_intersect_link_masks);
421
406void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, 422void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst,
407 u32 legacy_u32) 423 u32 legacy_u32)
408{ 424{
@@ -436,7 +452,7 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
436EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32); 452EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32);
437 453
438/* return false if legacy contained non-0 deprecated fields 454/* return false if legacy contained non-0 deprecated fields
439 * transceiver/maxtxpkt/maxrxpkt. rest of ksettings always updated 455 * maxtxpkt/maxrxpkt. rest of ksettings always updated
440 */ 456 */
441static bool 457static bool
442convert_legacy_settings_to_link_ksettings( 458convert_legacy_settings_to_link_ksettings(
@@ -451,8 +467,7 @@ convert_legacy_settings_to_link_ksettings(
451 * deprecated legacy fields, and they should not use 467 * deprecated legacy fields, and they should not use
452 * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS 468 * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS
453 */ 469 */
454 if (legacy_settings->transceiver || 470 if (legacy_settings->maxtxpkt ||
455 legacy_settings->maxtxpkt ||
456 legacy_settings->maxrxpkt) 471 legacy_settings->maxrxpkt)
457 retval = false; 472 retval = false;
458 473
@@ -525,6 +540,8 @@ convert_link_ksettings_to_legacy_settings(
525 = link_ksettings->base.eth_tp_mdix; 540 = link_ksettings->base.eth_tp_mdix;
526 legacy_settings->eth_tp_mdix_ctrl 541 legacy_settings->eth_tp_mdix_ctrl
527 = link_ksettings->base.eth_tp_mdix_ctrl; 542 = link_ksettings->base.eth_tp_mdix_ctrl;
543 legacy_settings->transceiver
544 = link_ksettings->base.transceiver;
528 return retval; 545 return retval;
529} 546}
530 547
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 4fc202dbdfb6..0c048bdeb016 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -34,12 +34,14 @@ static unsigned int fib_seq_sum(void)
34 34
35 rtnl_lock(); 35 rtnl_lock();
36 for_each_net(net) { 36 for_each_net(net) {
37 list_for_each_entry(ops, &net->fib_notifier_ops, list) { 37 rcu_read_lock();
38 list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
38 if (!try_module_get(ops->owner)) 39 if (!try_module_get(ops->owner))
39 continue; 40 continue;
40 fib_seq += ops->fib_seq_read(net); 41 fib_seq += ops->fib_seq_read(net);
41 module_put(ops->owner); 42 module_put(ops->owner);
42 } 43 }
44 rcu_read_unlock();
43 } 45 }
44 rtnl_unlock(); 46 rtnl_unlock();
45 47
@@ -161,8 +163,14 @@ static int __net_init fib_notifier_net_init(struct net *net)
161 return 0; 163 return 0;
162} 164}
163 165
166static void __net_exit fib_notifier_net_exit(struct net *net)
167{
168 WARN_ON_ONCE(!list_empty(&net->fib_notifier_ops));
169}
170
164static struct pernet_operations fib_notifier_net_ops = { 171static struct pernet_operations fib_notifier_net_ops = {
165 .init = fib_notifier_net_init, 172 .init = fib_notifier_net_init,
173 .exit = fib_notifier_net_exit,
166}; 174};
167 175
168static int __init fib_notifier_init(void) 176static int __init fib_notifier_init(void)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 9a6d97c1d810..98e1066c3d55 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -314,10 +314,12 @@ static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
314static int call_fib_rule_notifiers(struct net *net, 314static int call_fib_rule_notifiers(struct net *net,
315 enum fib_event_type event_type, 315 enum fib_event_type event_type,
316 struct fib_rule *rule, 316 struct fib_rule *rule,
317 struct fib_rules_ops *ops) 317 struct fib_rules_ops *ops,
318 struct netlink_ext_ack *extack)
318{ 319{
319 struct fib_rule_notifier_info info = { 320 struct fib_rule_notifier_info info = {
320 .info.family = ops->family, 321 .info.family = ops->family,
322 .info.extack = extack,
321 .rule = rule, 323 .rule = rule,
322 }; 324 };
323 325
@@ -609,7 +611,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
609 if (rule->tun_id) 611 if (rule->tun_id)
610 ip_tunnel_need_metadata(); 612 ip_tunnel_need_metadata();
611 613
612 call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops); 614 call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops, extack);
613 notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid); 615 notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
614 flush_route_cache(ops); 616 flush_route_cache(ops);
615 rules_ops_put(ops); 617 rules_ops_put(ops);
@@ -749,7 +751,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
749 } 751 }
750 } 752 }
751 753
752 call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops); 754 call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops,
755 NULL);
753 notify_rule_change(RTM_DELRULE, rule, ops, nlh, 756 notify_rule_change(RTM_DELRULE, rule, ops, nlh,
754 NETLINK_CB(skb).portid); 757 NETLINK_CB(skb).portid);
755 fib_rule_put(rule); 758 fib_rule_put(rule);
@@ -1019,8 +1022,14 @@ static int __net_init fib_rules_net_init(struct net *net)
1019 return 0; 1022 return 0;
1020} 1023}
1021 1024
1025static void __net_exit fib_rules_net_exit(struct net *net)
1026{
1027 WARN_ON_ONCE(!list_empty(&net->rules_ops));
1028}
1029
1022static struct pernet_operations fib_rules_net_ops = { 1030static struct pernet_operations fib_rules_net_ops = {
1023 .init = fib_rules_net_init, 1031 .init = fib_rules_net_init,
1032 .exit = fib_rules_net_exit,
1024}; 1033};
1025 1034
1026static int __init fib_rules_init(void) 1035static int __init fib_rules_init(void)
diff --git a/net/core/filter.c b/net/core/filter.c
index 24dd33dd9f04..6a85e67fafce 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -43,6 +43,7 @@
43#include <linux/timer.h> 43#include <linux/timer.h>
44#include <linux/uaccess.h> 44#include <linux/uaccess.h>
45#include <asm/unaligned.h> 45#include <asm/unaligned.h>
46#include <asm/cmpxchg.h>
46#include <linux/filter.h> 47#include <linux/filter.h>
47#include <linux/ratelimit.h> 48#include <linux/ratelimit.h>
48#include <linux/seccomp.h> 49#include <linux/seccomp.h>
@@ -989,10 +990,14 @@ static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
989 990
990bool sk_filter_charge(struct sock *sk, struct sk_filter *fp) 991bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
991{ 992{
992 bool ret = __sk_filter_charge(sk, fp); 993 if (!refcount_inc_not_zero(&fp->refcnt))
993 if (ret) 994 return false;
994 refcount_inc(&fp->refcnt); 995
995 return ret; 996 if (!__sk_filter_charge(sk, fp)) {
997 sk_filter_release(fp);
998 return false;
999 }
1000 return true;
996} 1001}
997 1002
998static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) 1003static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
@@ -1402,7 +1407,7 @@ static inline int bpf_try_make_writable(struct sk_buff *skb,
1402{ 1407{
1403 int err = __bpf_try_make_writable(skb, write_len); 1408 int err = __bpf_try_make_writable(skb, write_len);
1404 1409
1405 bpf_compute_data_end(skb); 1410 bpf_compute_data_pointers(skb);
1406 return err; 1411 return err;
1407} 1412}
1408 1413
@@ -1641,9 +1646,9 @@ static const struct bpf_func_proto bpf_csum_diff_proto = {
1641 .gpl_only = false, 1646 .gpl_only = false,
1642 .pkt_access = true, 1647 .pkt_access = true,
1643 .ret_type = RET_INTEGER, 1648 .ret_type = RET_INTEGER,
1644 .arg1_type = ARG_PTR_TO_MEM, 1649 .arg1_type = ARG_PTR_TO_MEM_OR_NULL,
1645 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 1650 .arg2_type = ARG_CONST_SIZE_OR_ZERO,
1646 .arg3_type = ARG_PTR_TO_MEM, 1651 .arg3_type = ARG_PTR_TO_MEM_OR_NULL,
1647 .arg4_type = ARG_CONST_SIZE_OR_ZERO, 1652 .arg4_type = ARG_CONST_SIZE_OR_ZERO,
1648 .arg5_type = ARG_ANYTHING, 1653 .arg5_type = ARG_ANYTHING,
1649}; 1654};
@@ -1794,7 +1799,7 @@ struct redirect_info {
1794 u32 flags; 1799 u32 flags;
1795 struct bpf_map *map; 1800 struct bpf_map *map;
1796 struct bpf_map *map_to_flush; 1801 struct bpf_map *map_to_flush;
1797 const struct bpf_prog *map_owner; 1802 unsigned long map_owner;
1798}; 1803};
1799 1804
1800static DEFINE_PER_CPU(struct redirect_info, redirect_info); 1805static DEFINE_PER_CPU(struct redirect_info, redirect_info);
@@ -1835,31 +1840,32 @@ static const struct bpf_func_proto bpf_redirect_proto = {
1835 .arg2_type = ARG_ANYTHING, 1840 .arg2_type = ARG_ANYTHING,
1836}; 1841};
1837 1842
1838BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags) 1843BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
1844 struct bpf_map *, map, u32, key, u64, flags)
1839{ 1845{
1840 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 1846 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
1841 1847
1848 /* If user passes invalid input drop the packet. */
1842 if (unlikely(flags)) 1849 if (unlikely(flags))
1843 return SK_ABORTED; 1850 return SK_DROP;
1844 1851
1845 ri->ifindex = key; 1852 tcb->bpf.key = key;
1846 ri->flags = flags; 1853 tcb->bpf.flags = flags;
1847 ri->map = map; 1854 tcb->bpf.map = map;
1848 1855
1849 return SK_REDIRECT; 1856 return SK_PASS;
1850} 1857}
1851 1858
1852struct sock *do_sk_redirect_map(void) 1859struct sock *do_sk_redirect_map(struct sk_buff *skb)
1853{ 1860{
1854 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 1861 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
1855 struct sock *sk = NULL; 1862 struct sock *sk = NULL;
1856 1863
1857 if (ri->map) { 1864 if (tcb->bpf.map) {
1858 sk = __sock_map_lookup_elem(ri->map, ri->ifindex); 1865 sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key);
1859 1866
1860 ri->ifindex = 0; 1867 tcb->bpf.key = 0;
1861 ri->map = NULL; 1868 tcb->bpf.map = NULL;
1862 /* we do not clear flags for future lookup */
1863 } 1869 }
1864 1870
1865 return sk; 1871 return sk;
@@ -1869,9 +1875,10 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
1869 .func = bpf_sk_redirect_map, 1875 .func = bpf_sk_redirect_map,
1870 .gpl_only = false, 1876 .gpl_only = false,
1871 .ret_type = RET_INTEGER, 1877 .ret_type = RET_INTEGER,
1872 .arg1_type = ARG_CONST_MAP_PTR, 1878 .arg1_type = ARG_PTR_TO_CTX,
1873 .arg2_type = ARG_ANYTHING, 1879 .arg2_type = ARG_CONST_MAP_PTR,
1874 .arg3_type = ARG_ANYTHING, 1880 .arg3_type = ARG_ANYTHING,
1881 .arg4_type = ARG_ANYTHING,
1875}; 1882};
1876 1883
1877BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) 1884BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
@@ -1962,7 +1969,7 @@ BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
1962 ret = skb_vlan_push(skb, vlan_proto, vlan_tci); 1969 ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
1963 bpf_pull_mac_rcsum(skb); 1970 bpf_pull_mac_rcsum(skb);
1964 1971
1965 bpf_compute_data_end(skb); 1972 bpf_compute_data_pointers(skb);
1966 return ret; 1973 return ret;
1967} 1974}
1968 1975
@@ -1984,7 +1991,7 @@ BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
1984 ret = skb_vlan_pop(skb); 1991 ret = skb_vlan_pop(skb);
1985 bpf_pull_mac_rcsum(skb); 1992 bpf_pull_mac_rcsum(skb);
1986 1993
1987 bpf_compute_data_end(skb); 1994 bpf_compute_data_pointers(skb);
1988 return ret; 1995 return ret;
1989} 1996}
1990 1997
@@ -2178,7 +2185,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
2178 * need to be verified first. 2185 * need to be verified first.
2179 */ 2186 */
2180 ret = bpf_skb_proto_xlat(skb, proto); 2187 ret = bpf_skb_proto_xlat(skb, proto);
2181 bpf_compute_data_end(skb); 2188 bpf_compute_data_pointers(skb);
2182 return ret; 2189 return ret;
2183} 2190}
2184 2191
@@ -2303,7 +2310,7 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
2303 ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) : 2310 ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) :
2304 bpf_skb_net_grow(skb, len_diff_abs); 2311 bpf_skb_net_grow(skb, len_diff_abs);
2305 2312
2306 bpf_compute_data_end(skb); 2313 bpf_compute_data_pointers(skb);
2307 return ret; 2314 return ret;
2308} 2315}
2309 2316
@@ -2394,7 +2401,7 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
2394 skb_gso_reset(skb); 2401 skb_gso_reset(skb);
2395 } 2402 }
2396 2403
2397 bpf_compute_data_end(skb); 2404 bpf_compute_data_pointers(skb);
2398 return ret; 2405 return ret;
2399} 2406}
2400 2407
@@ -2434,7 +2441,7 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
2434 skb_reset_mac_header(skb); 2441 skb_reset_mac_header(skb);
2435 } 2442 }
2436 2443
2437 bpf_compute_data_end(skb); 2444 bpf_compute_data_pointers(skb);
2438 return 0; 2445 return 0;
2439} 2446}
2440 2447
@@ -2447,14 +2454,26 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
2447 .arg3_type = ARG_ANYTHING, 2454 .arg3_type = ARG_ANYTHING,
2448}; 2455};
2449 2456
2457static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
2458{
2459 return xdp_data_meta_unsupported(xdp) ? 0 :
2460 xdp->data - xdp->data_meta;
2461}
2462
2450BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset) 2463BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
2451{ 2464{
2465 unsigned long metalen = xdp_get_metalen(xdp);
2466 void *data_start = xdp->data_hard_start + metalen;
2452 void *data = xdp->data + offset; 2467 void *data = xdp->data + offset;
2453 2468
2454 if (unlikely(data < xdp->data_hard_start || 2469 if (unlikely(data < data_start ||
2455 data > xdp->data_end - ETH_HLEN)) 2470 data > xdp->data_end - ETH_HLEN))
2456 return -EINVAL; 2471 return -EINVAL;
2457 2472
2473 if (metalen)
2474 memmove(xdp->data_meta + offset,
2475 xdp->data_meta, metalen);
2476 xdp->data_meta += offset;
2458 xdp->data = data; 2477 xdp->data = data;
2459 2478
2460 return 0; 2479 return 0;
@@ -2468,6 +2487,33 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
2468 .arg2_type = ARG_ANYTHING, 2487 .arg2_type = ARG_ANYTHING,
2469}; 2488};
2470 2489
2490BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
2491{
2492 void *meta = xdp->data_meta + offset;
2493 unsigned long metalen = xdp->data - meta;
2494
2495 if (xdp_data_meta_unsupported(xdp))
2496 return -ENOTSUPP;
2497 if (unlikely(meta < xdp->data_hard_start ||
2498 meta > xdp->data))
2499 return -EINVAL;
2500 if (unlikely((metalen & (sizeof(__u32) - 1)) ||
2501 (metalen > 32)))
2502 return -EACCES;
2503
2504 xdp->data_meta = meta;
2505
2506 return 0;
2507}
2508
2509static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
2510 .func = bpf_xdp_adjust_meta,
2511 .gpl_only = false,
2512 .ret_type = RET_INTEGER,
2513 .arg1_type = ARG_PTR_TO_CTX,
2514 .arg2_type = ARG_ANYTHING,
2515};
2516
2471static int __bpf_tx_xdp(struct net_device *dev, 2517static int __bpf_tx_xdp(struct net_device *dev,
2472 struct bpf_map *map, 2518 struct bpf_map *map,
2473 struct xdp_buff *xdp, 2519 struct xdp_buff *xdp,
@@ -2482,10 +2528,36 @@ static int __bpf_tx_xdp(struct net_device *dev,
2482 err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp); 2528 err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
2483 if (err) 2529 if (err)
2484 return err; 2530 return err;
2485 if (map) 2531 dev->netdev_ops->ndo_xdp_flush(dev);
2532 return 0;
2533}
2534
2535static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
2536 struct bpf_map *map,
2537 struct xdp_buff *xdp,
2538 u32 index)
2539{
2540 int err;
2541
2542 if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
2543 struct net_device *dev = fwd;
2544
2545 if (!dev->netdev_ops->ndo_xdp_xmit)
2546 return -EOPNOTSUPP;
2547
2548 err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
2549 if (err)
2550 return err;
2486 __dev_map_insert_ctx(map, index); 2551 __dev_map_insert_ctx(map, index);
2487 else 2552
2488 dev->netdev_ops->ndo_xdp_flush(dev); 2553 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
2554 struct bpf_cpu_map_entry *rcpu = fwd;
2555
2556 err = cpu_map_enqueue(rcpu, xdp, dev_rx);
2557 if (err)
2558 return err;
2559 __cpu_map_insert_ctx(map, index);
2560 }
2489 return 0; 2561 return 0;
2490} 2562}
2491 2563
@@ -2495,32 +2567,60 @@ void xdp_do_flush_map(void)
2495 struct bpf_map *map = ri->map_to_flush; 2567 struct bpf_map *map = ri->map_to_flush;
2496 2568
2497 ri->map_to_flush = NULL; 2569 ri->map_to_flush = NULL;
2498 if (map) 2570 if (map) {
2499 __dev_map_flush(map); 2571 switch (map->map_type) {
2572 case BPF_MAP_TYPE_DEVMAP:
2573 __dev_map_flush(map);
2574 break;
2575 case BPF_MAP_TYPE_CPUMAP:
2576 __cpu_map_flush(map);
2577 break;
2578 default:
2579 break;
2580 }
2581 }
2500} 2582}
2501EXPORT_SYMBOL_GPL(xdp_do_flush_map); 2583EXPORT_SYMBOL_GPL(xdp_do_flush_map);
2502 2584
2585static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
2586{
2587 switch (map->map_type) {
2588 case BPF_MAP_TYPE_DEVMAP:
2589 return __dev_map_lookup_elem(map, index);
2590 case BPF_MAP_TYPE_CPUMAP:
2591 return __cpu_map_lookup_elem(map, index);
2592 default:
2593 return NULL;
2594 }
2595}
2596
2597static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
2598 unsigned long aux)
2599{
2600 return (unsigned long)xdp_prog->aux != aux;
2601}
2602
2503static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, 2603static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
2504 struct bpf_prog *xdp_prog) 2604 struct bpf_prog *xdp_prog)
2505{ 2605{
2506 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 2606 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
2507 const struct bpf_prog *map_owner = ri->map_owner; 2607 unsigned long map_owner = ri->map_owner;
2508 struct bpf_map *map = ri->map; 2608 struct bpf_map *map = ri->map;
2509 struct net_device *fwd = NULL;
2510 u32 index = ri->ifindex; 2609 u32 index = ri->ifindex;
2610 void *fwd = NULL;
2511 int err; 2611 int err;
2512 2612
2513 ri->ifindex = 0; 2613 ri->ifindex = 0;
2514 ri->map = NULL; 2614 ri->map = NULL;
2515 ri->map_owner = NULL; 2615 ri->map_owner = 0;
2516 2616
2517 if (unlikely(map_owner != xdp_prog)) { 2617 if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
2518 err = -EFAULT; 2618 err = -EFAULT;
2519 map = NULL; 2619 map = NULL;
2520 goto err; 2620 goto err;
2521 } 2621 }
2522 2622
2523 fwd = __dev_map_lookup_elem(map, index); 2623 fwd = __xdp_map_lookup_elem(map, index);
2524 if (!fwd) { 2624 if (!fwd) {
2525 err = -EINVAL; 2625 err = -EINVAL;
2526 goto err; 2626 goto err;
@@ -2528,7 +2628,7 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
2528 if (ri->map_to_flush && ri->map_to_flush != map) 2628 if (ri->map_to_flush && ri->map_to_flush != map)
2529 xdp_do_flush_map(); 2629 xdp_do_flush_map();
2530 2630
2531 err = __bpf_tx_xdp(fwd, map, xdp, index); 2631 err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
2532 if (unlikely(err)) 2632 if (unlikely(err))
2533 goto err; 2633 goto err;
2534 2634
@@ -2570,54 +2670,88 @@ err:
2570} 2670}
2571EXPORT_SYMBOL_GPL(xdp_do_redirect); 2671EXPORT_SYMBOL_GPL(xdp_do_redirect);
2572 2672
2573int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, 2673static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
2574 struct bpf_prog *xdp_prog) 2674{
2675 unsigned int len;
2676
2677 if (unlikely(!(fwd->flags & IFF_UP)))
2678 return -ENETDOWN;
2679
2680 len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
2681 if (skb->len > len)
2682 return -EMSGSIZE;
2683
2684 return 0;
2685}
2686
2687int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb,
2688 struct bpf_prog *xdp_prog)
2575{ 2689{
2576 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 2690 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
2577 const struct bpf_prog *map_owner = ri->map_owner; 2691 unsigned long map_owner = ri->map_owner;
2578 struct bpf_map *map = ri->map; 2692 struct bpf_map *map = ri->map;
2579 struct net_device *fwd = NULL; 2693 struct net_device *fwd = NULL;
2580 u32 index = ri->ifindex; 2694 u32 index = ri->ifindex;
2581 unsigned int len;
2582 int err = 0; 2695 int err = 0;
2583 2696
2584 ri->ifindex = 0; 2697 ri->ifindex = 0;
2585 ri->map = NULL; 2698 ri->map = NULL;
2586 ri->map_owner = NULL; 2699 ri->map_owner = 0;
2587 2700
2588 if (map) { 2701 if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
2589 if (unlikely(map_owner != xdp_prog)) { 2702 err = -EFAULT;
2590 err = -EFAULT; 2703 map = NULL;
2591 map = NULL; 2704 goto err;
2592 goto err;
2593 }
2594 fwd = __dev_map_lookup_elem(map, index);
2595 } else {
2596 fwd = dev_get_by_index_rcu(dev_net(dev), index);
2597 } 2705 }
2706 fwd = __xdp_map_lookup_elem(map, index);
2598 if (unlikely(!fwd)) { 2707 if (unlikely(!fwd)) {
2599 err = -EINVAL; 2708 err = -EINVAL;
2600 goto err; 2709 goto err;
2601 } 2710 }
2602 2711
2603 if (unlikely(!(fwd->flags & IFF_UP))) { 2712 if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
2604 err = -ENETDOWN; 2713 if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
2714 goto err;
2715 skb->dev = fwd;
2716 } else {
2717 /* TODO: Handle BPF_MAP_TYPE_CPUMAP */
2718 err = -EBADRQC;
2605 goto err; 2719 goto err;
2606 } 2720 }
2607 2721
2608 len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; 2722 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
2609 if (skb->len > len) { 2723 return 0;
2610 err = -EMSGSIZE; 2724err:
2725 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
2726 return err;
2727}
2728
2729int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
2730 struct bpf_prog *xdp_prog)
2731{
2732 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
2733 u32 index = ri->ifindex;
2734 struct net_device *fwd;
2735 int err = 0;
2736
2737 if (ri->map)
2738 return xdp_do_generic_redirect_map(dev, skb, xdp_prog);
2739
2740 ri->ifindex = 0;
2741 fwd = dev_get_by_index_rcu(dev_net(dev), index);
2742 if (unlikely(!fwd)) {
2743 err = -EINVAL;
2611 goto err; 2744 goto err;
2612 } 2745 }
2613 2746
2747 if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
2748 goto err;
2749
2614 skb->dev = fwd; 2750 skb->dev = fwd;
2615 map ? _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index) 2751 _trace_xdp_redirect(dev, xdp_prog, index);
2616 : _trace_xdp_redirect(dev, xdp_prog, index);
2617 return 0; 2752 return 0;
2618err: 2753err:
2619 map ? _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err) 2754 _trace_xdp_redirect_err(dev, xdp_prog, index, err);
2620 : _trace_xdp_redirect_err(dev, xdp_prog, index, err);
2621 return err; 2755 return err;
2622} 2756}
2623EXPORT_SYMBOL_GPL(xdp_do_generic_redirect); 2757EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
@@ -2632,7 +2766,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
2632 ri->ifindex = ifindex; 2766 ri->ifindex = ifindex;
2633 ri->flags = flags; 2767 ri->flags = flags;
2634 ri->map = NULL; 2768 ri->map = NULL;
2635 ri->map_owner = NULL; 2769 ri->map_owner = 0;
2636 2770
2637 return XDP_REDIRECT; 2771 return XDP_REDIRECT;
2638} 2772}
@@ -2646,7 +2780,7 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
2646}; 2780};
2647 2781
2648BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags, 2782BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
2649 const struct bpf_prog *, map_owner) 2783 unsigned long, map_owner)
2650{ 2784{
2651 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 2785 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
2652 2786
@@ -2686,7 +2820,8 @@ bool bpf_helper_changes_pkt_data(void *func)
2686 func == bpf_clone_redirect || 2820 func == bpf_clone_redirect ||
2687 func == bpf_l3_csum_replace || 2821 func == bpf_l3_csum_replace ||
2688 func == bpf_l4_csum_replace || 2822 func == bpf_l4_csum_replace ||
2689 func == bpf_xdp_adjust_head) 2823 func == bpf_xdp_adjust_head ||
2824 func == bpf_xdp_adjust_meta)
2690 return true; 2825 return true;
2691 2826
2692 return false; 2827 return false;
@@ -2937,14 +3072,15 @@ static const struct bpf_func_proto *
2937bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) 3072bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
2938{ 3073{
2939 if (!md_dst) { 3074 if (!md_dst) {
2940 /* Race is not possible, since it's called from verifier 3075 struct metadata_dst __percpu *tmp;
2941 * that is holding verifier mutex. 3076
2942 */ 3077 tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
2943 md_dst = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX, 3078 METADATA_IP_TUNNEL,
2944 METADATA_IP_TUNNEL, 3079 GFP_KERNEL);
2945 GFP_KERNEL); 3080 if (!tmp)
2946 if (!md_dst)
2947 return NULL; 3081 return NULL;
3082 if (cmpxchg(&md_dst, NULL, tmp))
3083 metadata_dst_free_percpu(tmp);
2948 } 3084 }
2949 3085
2950 switch (which) { 3086 switch (which) {
@@ -3139,7 +3275,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
3139 3275
3140static const struct bpf_func_proto bpf_setsockopt_proto = { 3276static const struct bpf_func_proto bpf_setsockopt_proto = {
3141 .func = bpf_setsockopt, 3277 .func = bpf_setsockopt,
3142 .gpl_only = true, 3278 .gpl_only = false,
3143 .ret_type = RET_INTEGER, 3279 .ret_type = RET_INTEGER,
3144 .arg1_type = ARG_PTR_TO_CTX, 3280 .arg1_type = ARG_PTR_TO_CTX,
3145 .arg2_type = ARG_ANYTHING, 3281 .arg2_type = ARG_ANYTHING,
@@ -3148,6 +3284,47 @@ static const struct bpf_func_proto bpf_setsockopt_proto = {
3148 .arg5_type = ARG_CONST_SIZE, 3284 .arg5_type = ARG_CONST_SIZE,
3149}; 3285};
3150 3286
3287BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
3288 int, level, int, optname, char *, optval, int, optlen)
3289{
3290 struct sock *sk = bpf_sock->sk;
3291
3292 if (!sk_fullsock(sk))
3293 goto err_clear;
3294
3295#ifdef CONFIG_INET
3296 if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
3297 if (optname == TCP_CONGESTION) {
3298 struct inet_connection_sock *icsk = inet_csk(sk);
3299
3300 if (!icsk->icsk_ca_ops || optlen <= 1)
3301 goto err_clear;
3302 strncpy(optval, icsk->icsk_ca_ops->name, optlen);
3303 optval[optlen - 1] = 0;
3304 } else {
3305 goto err_clear;
3306 }
3307 } else {
3308 goto err_clear;
3309 }
3310 return 0;
3311#endif
3312err_clear:
3313 memset(optval, 0, optlen);
3314 return -EINVAL;
3315}
3316
3317static const struct bpf_func_proto bpf_getsockopt_proto = {
3318 .func = bpf_getsockopt,
3319 .gpl_only = false,
3320 .ret_type = RET_INTEGER,
3321 .arg1_type = ARG_PTR_TO_CTX,
3322 .arg2_type = ARG_ANYTHING,
3323 .arg3_type = ARG_ANYTHING,
3324 .arg4_type = ARG_PTR_TO_UNINIT_MEM,
3325 .arg5_type = ARG_CONST_SIZE,
3326};
3327
3151static const struct bpf_func_proto * 3328static const struct bpf_func_proto *
3152bpf_base_func_proto(enum bpf_func_id func_id) 3329bpf_base_func_proto(enum bpf_func_id func_id)
3153{ 3330{
@@ -3282,6 +3459,8 @@ xdp_func_proto(enum bpf_func_id func_id)
3282 return &bpf_get_smp_processor_id_proto; 3459 return &bpf_get_smp_processor_id_proto;
3283 case BPF_FUNC_xdp_adjust_head: 3460 case BPF_FUNC_xdp_adjust_head:
3284 return &bpf_xdp_adjust_head_proto; 3461 return &bpf_xdp_adjust_head_proto;
3462 case BPF_FUNC_xdp_adjust_meta:
3463 return &bpf_xdp_adjust_meta_proto;
3285 case BPF_FUNC_redirect: 3464 case BPF_FUNC_redirect:
3286 return &bpf_xdp_redirect_proto; 3465 return &bpf_xdp_redirect_proto;
3287 case BPF_FUNC_redirect_map: 3466 case BPF_FUNC_redirect_map:
@@ -3324,6 +3503,8 @@ static const struct bpf_func_proto *
3324 switch (func_id) { 3503 switch (func_id) {
3325 case BPF_FUNC_setsockopt: 3504 case BPF_FUNC_setsockopt:
3326 return &bpf_setsockopt_proto; 3505 return &bpf_setsockopt_proto;
3506 case BPF_FUNC_getsockopt:
3507 return &bpf_getsockopt_proto;
3327 case BPF_FUNC_sock_map_update: 3508 case BPF_FUNC_sock_map_update:
3328 return &bpf_sock_map_update_proto; 3509 return &bpf_sock_map_update_proto;
3329 default: 3510 default:
@@ -3412,6 +3593,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
3412 case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4): 3593 case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
3413 case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4): 3594 case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
3414 case bpf_ctx_range(struct __sk_buff, data): 3595 case bpf_ctx_range(struct __sk_buff, data):
3596 case bpf_ctx_range(struct __sk_buff, data_meta):
3415 case bpf_ctx_range(struct __sk_buff, data_end): 3597 case bpf_ctx_range(struct __sk_buff, data_end):
3416 if (size != size_default) 3598 if (size != size_default)
3417 return false; 3599 return false;
@@ -3438,6 +3620,7 @@ static bool sk_filter_is_valid_access(int off, int size,
3438 switch (off) { 3620 switch (off) {
3439 case bpf_ctx_range(struct __sk_buff, tc_classid): 3621 case bpf_ctx_range(struct __sk_buff, tc_classid):
3440 case bpf_ctx_range(struct __sk_buff, data): 3622 case bpf_ctx_range(struct __sk_buff, data):
3623 case bpf_ctx_range(struct __sk_buff, data_meta):
3441 case bpf_ctx_range(struct __sk_buff, data_end): 3624 case bpf_ctx_range(struct __sk_buff, data_end):
3442 case bpf_ctx_range_till(struct __sk_buff, family, local_port): 3625 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
3443 return false; 3626 return false;
@@ -3462,6 +3645,7 @@ static bool lwt_is_valid_access(int off, int size,
3462 switch (off) { 3645 switch (off) {
3463 case bpf_ctx_range(struct __sk_buff, tc_classid): 3646 case bpf_ctx_range(struct __sk_buff, tc_classid):
3464 case bpf_ctx_range_till(struct __sk_buff, family, local_port): 3647 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
3648 case bpf_ctx_range(struct __sk_buff, data_meta):
3465 return false; 3649 return false;
3466 } 3650 }
3467 3651
@@ -3580,6 +3764,9 @@ static bool tc_cls_act_is_valid_access(int off, int size,
3580 case bpf_ctx_range(struct __sk_buff, data): 3764 case bpf_ctx_range(struct __sk_buff, data):
3581 info->reg_type = PTR_TO_PACKET; 3765 info->reg_type = PTR_TO_PACKET;
3582 break; 3766 break;
3767 case bpf_ctx_range(struct __sk_buff, data_meta):
3768 info->reg_type = PTR_TO_PACKET_META;
3769 break;
3583 case bpf_ctx_range(struct __sk_buff, data_end): 3770 case bpf_ctx_range(struct __sk_buff, data_end):
3584 info->reg_type = PTR_TO_PACKET_END; 3771 info->reg_type = PTR_TO_PACKET_END;
3585 break; 3772 break;
@@ -3613,6 +3800,9 @@ static bool xdp_is_valid_access(int off, int size,
3613 case offsetof(struct xdp_md, data): 3800 case offsetof(struct xdp_md, data):
3614 info->reg_type = PTR_TO_PACKET; 3801 info->reg_type = PTR_TO_PACKET;
3615 break; 3802 break;
3803 case offsetof(struct xdp_md, data_meta):
3804 info->reg_type = PTR_TO_PACKET_META;
3805 break;
3616 case offsetof(struct xdp_md, data_end): 3806 case offsetof(struct xdp_md, data_end):
3617 info->reg_type = PTR_TO_PACKET_END; 3807 info->reg_type = PTR_TO_PACKET_END;
3618 break; 3808 break;
@@ -3671,9 +3861,14 @@ static bool sk_skb_is_valid_access(int off, int size,
3671 enum bpf_access_type type, 3861 enum bpf_access_type type,
3672 struct bpf_insn_access_aux *info) 3862 struct bpf_insn_access_aux *info)
3673{ 3863{
3864 switch (off) {
3865 case bpf_ctx_range(struct __sk_buff, tc_classid):
3866 case bpf_ctx_range(struct __sk_buff, data_meta):
3867 return false;
3868 }
3869
3674 if (type == BPF_WRITE) { 3870 if (type == BPF_WRITE) {
3675 switch (off) { 3871 switch (off) {
3676 case bpf_ctx_range(struct __sk_buff, mark):
3677 case bpf_ctx_range(struct __sk_buff, tc_index): 3872 case bpf_ctx_range(struct __sk_buff, tc_index):
3678 case bpf_ctx_range(struct __sk_buff, priority): 3873 case bpf_ctx_range(struct __sk_buff, priority):
3679 break; 3874 break;
@@ -3683,7 +3878,7 @@ static bool sk_skb_is_valid_access(int off, int size,
3683 } 3878 }
3684 3879
3685 switch (off) { 3880 switch (off) {
3686 case bpf_ctx_range(struct __sk_buff, tc_classid): 3881 case bpf_ctx_range(struct __sk_buff, mark):
3687 return false; 3882 return false;
3688 case bpf_ctx_range(struct __sk_buff, data): 3883 case bpf_ctx_range(struct __sk_buff, data):
3689 info->reg_type = PTR_TO_PACKET; 3884 info->reg_type = PTR_TO_PACKET;
@@ -3841,6 +4036,15 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
3841 offsetof(struct sk_buff, data)); 4036 offsetof(struct sk_buff, data));
3842 break; 4037 break;
3843 4038
4039 case offsetof(struct __sk_buff, data_meta):
4040 off = si->off;
4041 off -= offsetof(struct __sk_buff, data_meta);
4042 off += offsetof(struct sk_buff, cb);
4043 off += offsetof(struct bpf_skb_data_end, data_meta);
4044 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
4045 si->src_reg, off);
4046 break;
4047
3844 case offsetof(struct __sk_buff, data_end): 4048 case offsetof(struct __sk_buff, data_end):
3845 off = si->off; 4049 off = si->off;
3846 off -= offsetof(struct __sk_buff, data_end); 4050 off -= offsetof(struct __sk_buff, data_end);
@@ -4089,6 +4293,11 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
4089 si->dst_reg, si->src_reg, 4293 si->dst_reg, si->src_reg,
4090 offsetof(struct xdp_buff, data)); 4294 offsetof(struct xdp_buff, data));
4091 break; 4295 break;
4296 case offsetof(struct xdp_md, data_meta):
4297 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta),
4298 si->dst_reg, si->src_reg,
4299 offsetof(struct xdp_buff, data_meta));
4300 break;
4092 case offsetof(struct xdp_md, data_end): 4301 case offsetof(struct xdp_md, data_end):
4093 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end), 4302 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
4094 si->dst_reg, si->src_reg, 4303 si->dst_reg, si->src_reg,
@@ -4232,68 +4441,120 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
4232 return insn - insn_buf; 4441 return insn - insn_buf;
4233} 4442}
4234 4443
4235const struct bpf_verifier_ops sk_filter_prog_ops = { 4444static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
4445 const struct bpf_insn *si,
4446 struct bpf_insn *insn_buf,
4447 struct bpf_prog *prog, u32 *target_size)
4448{
4449 struct bpf_insn *insn = insn_buf;
4450 int off;
4451
4452 switch (si->off) {
4453 case offsetof(struct __sk_buff, data_end):
4454 off = si->off;
4455 off -= offsetof(struct __sk_buff, data_end);
4456 off += offsetof(struct sk_buff, cb);
4457 off += offsetof(struct tcp_skb_cb, bpf.data_end);
4458 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
4459 si->src_reg, off);
4460 break;
4461 default:
4462 return bpf_convert_ctx_access(type, si, insn_buf, prog,
4463 target_size);
4464 }
4465
4466 return insn - insn_buf;
4467}
4468
4469const struct bpf_verifier_ops sk_filter_verifier_ops = {
4236 .get_func_proto = sk_filter_func_proto, 4470 .get_func_proto = sk_filter_func_proto,
4237 .is_valid_access = sk_filter_is_valid_access, 4471 .is_valid_access = sk_filter_is_valid_access,
4238 .convert_ctx_access = bpf_convert_ctx_access, 4472 .convert_ctx_access = bpf_convert_ctx_access,
4239}; 4473};
4240 4474
4241const struct bpf_verifier_ops tc_cls_act_prog_ops = { 4475const struct bpf_prog_ops sk_filter_prog_ops = {
4476};
4477
4478const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
4242 .get_func_proto = tc_cls_act_func_proto, 4479 .get_func_proto = tc_cls_act_func_proto,
4243 .is_valid_access = tc_cls_act_is_valid_access, 4480 .is_valid_access = tc_cls_act_is_valid_access,
4244 .convert_ctx_access = tc_cls_act_convert_ctx_access, 4481 .convert_ctx_access = tc_cls_act_convert_ctx_access,
4245 .gen_prologue = tc_cls_act_prologue, 4482 .gen_prologue = tc_cls_act_prologue,
4483};
4484
4485const struct bpf_prog_ops tc_cls_act_prog_ops = {
4246 .test_run = bpf_prog_test_run_skb, 4486 .test_run = bpf_prog_test_run_skb,
4247}; 4487};
4248 4488
4249const struct bpf_verifier_ops xdp_prog_ops = { 4489const struct bpf_verifier_ops xdp_verifier_ops = {
4250 .get_func_proto = xdp_func_proto, 4490 .get_func_proto = xdp_func_proto,
4251 .is_valid_access = xdp_is_valid_access, 4491 .is_valid_access = xdp_is_valid_access,
4252 .convert_ctx_access = xdp_convert_ctx_access, 4492 .convert_ctx_access = xdp_convert_ctx_access,
4493};
4494
4495const struct bpf_prog_ops xdp_prog_ops = {
4253 .test_run = bpf_prog_test_run_xdp, 4496 .test_run = bpf_prog_test_run_xdp,
4254}; 4497};
4255 4498
4256const struct bpf_verifier_ops cg_skb_prog_ops = { 4499const struct bpf_verifier_ops cg_skb_verifier_ops = {
4257 .get_func_proto = sk_filter_func_proto, 4500 .get_func_proto = sk_filter_func_proto,
4258 .is_valid_access = sk_filter_is_valid_access, 4501 .is_valid_access = sk_filter_is_valid_access,
4259 .convert_ctx_access = bpf_convert_ctx_access, 4502 .convert_ctx_access = bpf_convert_ctx_access,
4503};
4504
4505const struct bpf_prog_ops cg_skb_prog_ops = {
4260 .test_run = bpf_prog_test_run_skb, 4506 .test_run = bpf_prog_test_run_skb,
4261}; 4507};
4262 4508
4263const struct bpf_verifier_ops lwt_inout_prog_ops = { 4509const struct bpf_verifier_ops lwt_inout_verifier_ops = {
4264 .get_func_proto = lwt_inout_func_proto, 4510 .get_func_proto = lwt_inout_func_proto,
4265 .is_valid_access = lwt_is_valid_access, 4511 .is_valid_access = lwt_is_valid_access,
4266 .convert_ctx_access = bpf_convert_ctx_access, 4512 .convert_ctx_access = bpf_convert_ctx_access,
4513};
4514
4515const struct bpf_prog_ops lwt_inout_prog_ops = {
4267 .test_run = bpf_prog_test_run_skb, 4516 .test_run = bpf_prog_test_run_skb,
4268}; 4517};
4269 4518
4270const struct bpf_verifier_ops lwt_xmit_prog_ops = { 4519const struct bpf_verifier_ops lwt_xmit_verifier_ops = {
4271 .get_func_proto = lwt_xmit_func_proto, 4520 .get_func_proto = lwt_xmit_func_proto,
4272 .is_valid_access = lwt_is_valid_access, 4521 .is_valid_access = lwt_is_valid_access,
4273 .convert_ctx_access = bpf_convert_ctx_access, 4522 .convert_ctx_access = bpf_convert_ctx_access,
4274 .gen_prologue = tc_cls_act_prologue, 4523 .gen_prologue = tc_cls_act_prologue,
4524};
4525
4526const struct bpf_prog_ops lwt_xmit_prog_ops = {
4275 .test_run = bpf_prog_test_run_skb, 4527 .test_run = bpf_prog_test_run_skb,
4276}; 4528};
4277 4529
4278const struct bpf_verifier_ops cg_sock_prog_ops = { 4530const struct bpf_verifier_ops cg_sock_verifier_ops = {
4279 .get_func_proto = sock_filter_func_proto, 4531 .get_func_proto = sock_filter_func_proto,
4280 .is_valid_access = sock_filter_is_valid_access, 4532 .is_valid_access = sock_filter_is_valid_access,
4281 .convert_ctx_access = sock_filter_convert_ctx_access, 4533 .convert_ctx_access = sock_filter_convert_ctx_access,
4282}; 4534};
4283 4535
4284const struct bpf_verifier_ops sock_ops_prog_ops = { 4536const struct bpf_prog_ops cg_sock_prog_ops = {
4537};
4538
4539const struct bpf_verifier_ops sock_ops_verifier_ops = {
4285 .get_func_proto = sock_ops_func_proto, 4540 .get_func_proto = sock_ops_func_proto,
4286 .is_valid_access = sock_ops_is_valid_access, 4541 .is_valid_access = sock_ops_is_valid_access,
4287 .convert_ctx_access = sock_ops_convert_ctx_access, 4542 .convert_ctx_access = sock_ops_convert_ctx_access,
4288}; 4543};
4289 4544
4290const struct bpf_verifier_ops sk_skb_prog_ops = { 4545const struct bpf_prog_ops sock_ops_prog_ops = {
4546};
4547
4548const struct bpf_verifier_ops sk_skb_verifier_ops = {
4291 .get_func_proto = sk_skb_func_proto, 4549 .get_func_proto = sk_skb_func_proto,
4292 .is_valid_access = sk_skb_is_valid_access, 4550 .is_valid_access = sk_skb_is_valid_access,
4293 .convert_ctx_access = bpf_convert_ctx_access, 4551 .convert_ctx_access = sk_skb_convert_ctx_access,
4294 .gen_prologue = sk_skb_prologue, 4552 .gen_prologue = sk_skb_prologue,
4295}; 4553};
4296 4554
4555const struct bpf_prog_ops sk_skb_prog_ops = {
4556};
4557
4297int sk_detach_filter(struct sock *sk) 4558int sk_detach_filter(struct sock *sk)
4298{ 4559{
4299 int ret = -ENOENT; 4560 int ret = -ENOENT;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 0a977373d003..15ce30063765 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -5,10 +5,12 @@
5#include <linux/ipv6.h> 5#include <linux/ipv6.h>
6#include <linux/if_vlan.h> 6#include <linux/if_vlan.h>
7#include <net/dsa.h> 7#include <net/dsa.h>
8#include <net/dst_metadata.h>
8#include <net/ip.h> 9#include <net/ip.h>
9#include <net/ipv6.h> 10#include <net/ipv6.h>
10#include <net/gre.h> 11#include <net/gre.h>
11#include <net/pptp.h> 12#include <net/pptp.h>
13#include <net/tipc.h>
12#include <linux/igmp.h> 14#include <linux/igmp.h>
13#include <linux/icmp.h> 15#include <linux/icmp.h>
14#include <linux/sctp.h> 16#include <linux/sctp.h>
@@ -115,6 +117,102 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
115} 117}
116EXPORT_SYMBOL(__skb_flow_get_ports); 118EXPORT_SYMBOL(__skb_flow_get_ports);
117 119
120static void
121skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type,
122 struct flow_dissector *flow_dissector,
123 void *target_container)
124{
125 struct flow_dissector_key_control *ctrl;
126
127 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL))
128 return;
129
130 ctrl = skb_flow_dissector_target(flow_dissector,
131 FLOW_DISSECTOR_KEY_ENC_CONTROL,
132 target_container);
133 ctrl->addr_type = type;
134}
135
136static void
137__skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
138 struct flow_dissector *flow_dissector,
139 void *target_container)
140{
141 struct ip_tunnel_info *info;
142 struct ip_tunnel_key *key;
143
144 /* A quick check to see if there might be something to do. */
145 if (!dissector_uses_key(flow_dissector,
146 FLOW_DISSECTOR_KEY_ENC_KEYID) &&
147 !dissector_uses_key(flow_dissector,
148 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) &&
149 !dissector_uses_key(flow_dissector,
150 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) &&
151 !dissector_uses_key(flow_dissector,
152 FLOW_DISSECTOR_KEY_ENC_CONTROL) &&
153 !dissector_uses_key(flow_dissector,
154 FLOW_DISSECTOR_KEY_ENC_PORTS))
155 return;
156
157 info = skb_tunnel_info(skb);
158 if (!info)
159 return;
160
161 key = &info->key;
162
163 switch (ip_tunnel_info_af(info)) {
164 case AF_INET:
165 skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV4_ADDRS,
166 flow_dissector,
167 target_container);
168 if (dissector_uses_key(flow_dissector,
169 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
170 struct flow_dissector_key_ipv4_addrs *ipv4;
171
172 ipv4 = skb_flow_dissector_target(flow_dissector,
173 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
174 target_container);
175 ipv4->src = key->u.ipv4.src;
176 ipv4->dst = key->u.ipv4.dst;
177 }
178 break;
179 case AF_INET6:
180 skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV6_ADDRS,
181 flow_dissector,
182 target_container);
183 if (dissector_uses_key(flow_dissector,
184 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
185 struct flow_dissector_key_ipv6_addrs *ipv6;
186
187 ipv6 = skb_flow_dissector_target(flow_dissector,
188 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
189 target_container);
190 ipv6->src = key->u.ipv6.src;
191 ipv6->dst = key->u.ipv6.dst;
192 }
193 break;
194 }
195
196 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
197 struct flow_dissector_key_keyid *keyid;
198
199 keyid = skb_flow_dissector_target(flow_dissector,
200 FLOW_DISSECTOR_KEY_ENC_KEYID,
201 target_container);
202 keyid->keyid = tunnel_id_to_key32(key->tun_id);
203 }
204
205 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
206 struct flow_dissector_key_ports *tp;
207
208 tp = skb_flow_dissector_target(flow_dissector,
209 FLOW_DISSECTOR_KEY_ENC_PORTS,
210 target_container);
211 tp->src = key->tp_src;
212 tp->dst = key->tp_dst;
213 }
214}
215
118static enum flow_dissect_ret 216static enum flow_dissect_ret
119__skb_flow_dissect_mpls(const struct sk_buff *skb, 217__skb_flow_dissect_mpls(const struct sk_buff *skb,
120 struct flow_dissector *flow_dissector, 218 struct flow_dissector *flow_dissector,
@@ -478,6 +576,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
478 FLOW_DISSECTOR_KEY_BASIC, 576 FLOW_DISSECTOR_KEY_BASIC,
479 target_container); 577 target_container);
480 578
579 __skb_flow_dissect_tunnel_info(skb, flow_dissector,
580 target_container);
581
481 if (dissector_uses_key(flow_dissector, 582 if (dissector_uses_key(flow_dissector,
482 FLOW_DISSECTOR_KEY_ETH_ADDRS)) { 583 FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
483 struct ethhdr *eth = eth_hdr(skb); 584 struct ethhdr *eth = eth_hdr(skb);
@@ -672,23 +773,22 @@ proto_again:
672 break; 773 break;
673 } 774 }
674 case htons(ETH_P_TIPC): { 775 case htons(ETH_P_TIPC): {
675 struct { 776 struct tipc_basic_hdr *hdr, _hdr;
676 __be32 pre[3]; 777
677 __be32 srcnode; 778 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr),
678 } *hdr, _hdr; 779 data, hlen, &_hdr);
679 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
680 if (!hdr) { 780 if (!hdr) {
681 fdret = FLOW_DISSECT_RET_OUT_BAD; 781 fdret = FLOW_DISSECT_RET_OUT_BAD;
682 break; 782 break;
683 } 783 }
684 784
685 if (dissector_uses_key(flow_dissector, 785 if (dissector_uses_key(flow_dissector,
686 FLOW_DISSECTOR_KEY_TIPC_ADDRS)) { 786 FLOW_DISSECTOR_KEY_TIPC)) {
687 key_addrs = skb_flow_dissector_target(flow_dissector, 787 key_addrs = skb_flow_dissector_target(flow_dissector,
688 FLOW_DISSECTOR_KEY_TIPC_ADDRS, 788 FLOW_DISSECTOR_KEY_TIPC,
689 target_container); 789 target_container);
690 key_addrs->tipcaddrs.srcnode = hdr->srcnode; 790 key_addrs->tipckey.key = tipc_hdr_rps_key(hdr);
691 key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS; 791 key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC;
692 } 792 }
693 fdret = FLOW_DISSECT_RET_OUT_GOOD; 793 fdret = FLOW_DISSECT_RET_OUT_GOOD;
694 break; 794 break;
@@ -924,8 +1024,8 @@ static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
924 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 1024 case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
925 diff -= sizeof(flow->addrs.v6addrs); 1025 diff -= sizeof(flow->addrs.v6addrs);
926 break; 1026 break;
927 case FLOW_DISSECTOR_KEY_TIPC_ADDRS: 1027 case FLOW_DISSECTOR_KEY_TIPC:
928 diff -= sizeof(flow->addrs.tipcaddrs); 1028 diff -= sizeof(flow->addrs.tipckey);
929 break; 1029 break;
930 } 1030 }
931 return (sizeof(*flow) - diff) / sizeof(u32); 1031 return (sizeof(*flow) - diff) / sizeof(u32);
@@ -939,8 +1039,8 @@ __be32 flow_get_u32_src(const struct flow_keys *flow)
939 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 1039 case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
940 return (__force __be32)ipv6_addr_hash( 1040 return (__force __be32)ipv6_addr_hash(
941 &flow->addrs.v6addrs.src); 1041 &flow->addrs.v6addrs.src);
942 case FLOW_DISSECTOR_KEY_TIPC_ADDRS: 1042 case FLOW_DISSECTOR_KEY_TIPC:
943 return flow->addrs.tipcaddrs.srcnode; 1043 return flow->addrs.tipckey.key;
944 default: 1044 default:
945 return 0; 1045 return 0;
946 } 1046 }
@@ -1221,8 +1321,8 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = {
1221 .offset = offsetof(struct flow_keys, addrs.v6addrs), 1321 .offset = offsetof(struct flow_keys, addrs.v6addrs),
1222 }, 1322 },
1223 { 1323 {
1224 .key_id = FLOW_DISSECTOR_KEY_TIPC_ADDRS, 1324 .key_id = FLOW_DISSECTOR_KEY_TIPC,
1225 .offset = offsetof(struct flow_keys, addrs.tipcaddrs), 1325 .offset = offsetof(struct flow_keys, addrs.tipckey),
1226 }, 1326 },
1227 { 1327 {
1228 .key_id = FLOW_DISSECTOR_KEY_PORTS, 1328 .key_id = FLOW_DISSECTOR_KEY_PORTS,
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 7c1ffd6f9501..9834cfa21b21 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -76,9 +76,9 @@ static void est_fetch_counters(struct net_rate_estimator *e,
76 76
77} 77}
78 78
79static void est_timer(unsigned long arg) 79static void est_timer(struct timer_list *t)
80{ 80{
81 struct net_rate_estimator *est = (struct net_rate_estimator *)arg; 81 struct net_rate_estimator *est = from_timer(est, t, timer);
82 struct gnet_stats_basic_packed b; 82 struct gnet_stats_basic_packed b;
83 u64 rate, brate; 83 u64 rate, brate;
84 84
@@ -170,7 +170,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
170 } 170 }
171 171
172 est->next_jiffies = jiffies + ((HZ/4) << intvl_log); 172 est->next_jiffies = jiffies + ((HZ/4) << intvl_log);
173 setup_timer(&est->timer, est_timer, (unsigned long)est); 173 timer_setup(&est->timer, est_timer, 0);
174 mod_timer(&est->timer, est->next_jiffies); 174 mod_timer(&est->timer, est->next_jiffies);
175 175
176 rcu_assign_pointer(*rate_est, est); 176 rcu_assign_pointer(*rate_est, est);
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index 814e58a3ce8b..4b54e5f107c6 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/skbuff.h> 2#include <linux/skbuff.h>
2#include <linux/slab.h> 3#include <linux/slab.h>
3#include <linux/netdevice.h> 4#include <linux/netdevice.h>
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 1307731ddfe4..e7e626fb87bb 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -51,7 +51,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
51 */ 51 */
52 preempt_disable(); 52 preempt_disable();
53 rcu_read_lock(); 53 rcu_read_lock();
54 bpf_compute_data_end(skb); 54 bpf_compute_data_pointers(skb);
55 ret = bpf_prog_run_save_cb(lwt->prog, skb); 55 ret = bpf_prog_run_save_cb(lwt->prog, skb);
56 rcu_read_unlock(); 56 rcu_read_unlock();
57 57
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 16a1a4c4eb57..d1f5fe986edd 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -51,7 +51,7 @@ do { \
51 51
52#define PNEIGH_HASHMASK 0xF 52#define PNEIGH_HASHMASK 0xF
53 53
54static void neigh_timer_handler(unsigned long arg); 54static void neigh_timer_handler(struct timer_list *t);
55static void __neigh_notify(struct neighbour *n, int type, int flags, 55static void __neigh_notify(struct neighbour *n, int type, int flags,
56 u32 pid); 56 u32 pid);
57static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid); 57static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
@@ -331,7 +331,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
331 n->output = neigh_blackhole; 331 n->output = neigh_blackhole;
332 seqlock_init(&n->hh.hh_lock); 332 seqlock_init(&n->hh.hh_lock);
333 n->parms = neigh_parms_clone(&tbl->parms); 333 n->parms = neigh_parms_clone(&tbl->parms);
334 setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n); 334 timer_setup(&n->timer, neigh_timer_handler, 0);
335 335
336 NEIGH_CACHE_STAT_INC(tbl, allocs); 336 NEIGH_CACHE_STAT_INC(tbl, allocs);
337 n->tbl = tbl; 337 n->tbl = tbl;
@@ -457,7 +457,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
457 const void *pkey) 457 const void *pkey)
458{ 458{
459 struct neighbour *n; 459 struct neighbour *n;
460 int key_len = tbl->key_len; 460 unsigned int key_len = tbl->key_len;
461 u32 hash_val; 461 u32 hash_val;
462 struct neigh_hash_table *nht; 462 struct neigh_hash_table *nht;
463 463
@@ -488,7 +488,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
488 struct net_device *dev, bool want_ref) 488 struct net_device *dev, bool want_ref)
489{ 489{
490 u32 hash_val; 490 u32 hash_val;
491 int key_len = tbl->key_len; 491 unsigned int key_len = tbl->key_len;
492 int error; 492 int error;
493 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev); 493 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
494 struct neigh_hash_table *nht; 494 struct neigh_hash_table *nht;
@@ -572,7 +572,7 @@ out_neigh_release:
572} 572}
573EXPORT_SYMBOL(__neigh_create); 573EXPORT_SYMBOL(__neigh_create);
574 574
575static u32 pneigh_hash(const void *pkey, int key_len) 575static u32 pneigh_hash(const void *pkey, unsigned int key_len)
576{ 576{
577 u32 hash_val = *(u32 *)(pkey + key_len - 4); 577 u32 hash_val = *(u32 *)(pkey + key_len - 4);
578 hash_val ^= (hash_val >> 16); 578 hash_val ^= (hash_val >> 16);
@@ -585,7 +585,7 @@ static u32 pneigh_hash(const void *pkey, int key_len)
585static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, 585static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
586 struct net *net, 586 struct net *net,
587 const void *pkey, 587 const void *pkey,
588 int key_len, 588 unsigned int key_len,
589 struct net_device *dev) 589 struct net_device *dev)
590{ 590{
591 while (n) { 591 while (n) {
@@ -601,7 +601,7 @@ static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
601struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, 601struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
602 struct net *net, const void *pkey, struct net_device *dev) 602 struct net *net, const void *pkey, struct net_device *dev)
603{ 603{
604 int key_len = tbl->key_len; 604 unsigned int key_len = tbl->key_len;
605 u32 hash_val = pneigh_hash(pkey, key_len); 605 u32 hash_val = pneigh_hash(pkey, key_len);
606 606
607 return __pneigh_lookup_1(tbl->phash_buckets[hash_val], 607 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
@@ -614,7 +614,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
614 struct net_device *dev, int creat) 614 struct net_device *dev, int creat)
615{ 615{
616 struct pneigh_entry *n; 616 struct pneigh_entry *n;
617 int key_len = tbl->key_len; 617 unsigned int key_len = tbl->key_len;
618 u32 hash_val = pneigh_hash(pkey, key_len); 618 u32 hash_val = pneigh_hash(pkey, key_len);
619 619
620 read_lock_bh(&tbl->lock); 620 read_lock_bh(&tbl->lock);
@@ -659,7 +659,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
659 struct net_device *dev) 659 struct net_device *dev)
660{ 660{
661 struct pneigh_entry *n, **np; 661 struct pneigh_entry *n, **np;
662 int key_len = tbl->key_len; 662 unsigned int key_len = tbl->key_len;
663 u32 hash_val = pneigh_hash(pkey, key_len); 663 u32 hash_val = pneigh_hash(pkey, key_len);
664 664
665 write_lock_bh(&tbl->lock); 665 write_lock_bh(&tbl->lock);
@@ -903,10 +903,10 @@ static void neigh_probe(struct neighbour *neigh)
903 903
904/* Called when a timer expires for a neighbour entry. */ 904/* Called when a timer expires for a neighbour entry. */
905 905
906static void neigh_timer_handler(unsigned long arg) 906static void neigh_timer_handler(struct timer_list *t)
907{ 907{
908 unsigned long now, next; 908 unsigned long now, next;
909 struct neighbour *neigh = (struct neighbour *)arg; 909 struct neighbour *neigh = from_timer(neigh, t, timer);
910 unsigned int state; 910 unsigned int state;
911 int notify = 0; 911 int notify = 0;
912 912
@@ -1391,9 +1391,9 @@ int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1391} 1391}
1392EXPORT_SYMBOL(neigh_direct_output); 1392EXPORT_SYMBOL(neigh_direct_output);
1393 1393
1394static void neigh_proxy_process(unsigned long arg) 1394static void neigh_proxy_process(struct timer_list *t)
1395{ 1395{
1396 struct neigh_table *tbl = (struct neigh_table *)arg; 1396 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1397 long sched_next = 0; 1397 long sched_next = 0;
1398 unsigned long now = jiffies; 1398 unsigned long now = jiffies;
1399 struct sk_buff *skb, *n; 1399 struct sk_buff *skb, *n;
@@ -1573,7 +1573,7 @@ void neigh_table_init(int index, struct neigh_table *tbl)
1573 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); 1573 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1574 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, 1574 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1575 tbl->parms.reachable_time); 1575 tbl->parms.reachable_time);
1576 setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl); 1576 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1577 skb_queue_head_init_class(&tbl->proxy_queue, 1577 skb_queue_head_init_class(&tbl->proxy_queue,
1578 &neigh_table_proxy_queue_class); 1578 &neigh_table_proxy_queue_class);
1579 1579
@@ -1662,7 +1662,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1662 if (tbl == NULL) 1662 if (tbl == NULL)
1663 return -EAFNOSUPPORT; 1663 return -EAFNOSUPPORT;
1664 1664
1665 if (nla_len(dst_attr) < tbl->key_len) 1665 if (nla_len(dst_attr) < (int)tbl->key_len)
1666 goto out; 1666 goto out;
1667 1667
1668 if (ndm->ndm_flags & NTF_PROXY) { 1668 if (ndm->ndm_flags & NTF_PROXY) {
@@ -1730,7 +1730,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1730 if (tbl == NULL) 1730 if (tbl == NULL)
1731 return -EAFNOSUPPORT; 1731 return -EAFNOSUPPORT;
1732 1732
1733 if (nla_len(tb[NDA_DST]) < tbl->key_len) 1733 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len)
1734 goto out; 1734 goto out;
1735 dst = nla_data(tb[NDA_DST]); 1735 dst = nla_data(tb[NDA_DST]);
1736 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; 1736 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 4847964931df..615ccab55f38 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/netdevice.h> 2#include <linux/netdevice.h>
2#include <linux/proc_fs.h> 3#include <linux/proc_fs.h>
3#include <linux/seq_file.h> 4#include <linux/seq_file.h>
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 927a6dcbad96..799b75268291 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -382,7 +382,7 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
382 struct net_device *netdev = to_net_dev(dev); 382 struct net_device *netdev = to_net_dev(dev);
383 struct net *net = dev_net(netdev); 383 struct net *net = dev_net(netdev);
384 size_t count = len; 384 size_t count = len;
385 ssize_t ret; 385 ssize_t ret = 0;
386 386
387 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 387 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
388 return -EPERM; 388 return -EPERM;
@@ -393,23 +393,30 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
393 393
394 if (!rtnl_trylock()) 394 if (!rtnl_trylock())
395 return restart_syscall(); 395 return restart_syscall();
396 ret = dev_set_alias(netdev, buf, count); 396
397 if (dev_isalive(netdev)) {
398 ret = dev_set_alias(netdev, buf, count);
399 if (ret < 0)
400 goto err;
401 ret = len;
402 netdev_state_change(netdev);
403 }
404err:
397 rtnl_unlock(); 405 rtnl_unlock();
398 406
399 return ret < 0 ? ret : len; 407 return ret;
400} 408}
401 409
402static ssize_t ifalias_show(struct device *dev, 410static ssize_t ifalias_show(struct device *dev,
403 struct device_attribute *attr, char *buf) 411 struct device_attribute *attr, char *buf)
404{ 412{
405 const struct net_device *netdev = to_net_dev(dev); 413 const struct net_device *netdev = to_net_dev(dev);
414 char tmp[IFALIASZ];
406 ssize_t ret = 0; 415 ssize_t ret = 0;
407 416
408 if (!rtnl_trylock()) 417 ret = dev_get_alias(netdev, tmp, sizeof(tmp));
409 return restart_syscall(); 418 if (ret > 0)
410 if (netdev->ifalias) 419 ret = sprintf(buf, "%s\n", tmp);
411 ret = sprintf(buf, "%s\n", netdev->ifalias);
412 rtnl_unlock();
413 return ret; 420 return ret;
414} 421}
415static DEVICE_ATTR_RW(ifalias); 422static DEVICE_ATTR_RW(ifalias);
@@ -1488,7 +1495,10 @@ static void netdev_release(struct device *d)
1488 1495
1489 BUG_ON(dev->reg_state != NETREG_RELEASED); 1496 BUG_ON(dev->reg_state != NETREG_RELEASED);
1490 1497
1491 kfree(dev->ifalias); 1498 /* no need to wait for rcu grace period:
1499 * device is dead and about to be freed.
1500 */
1501 kfree(rcu_access_pointer(dev->ifalias));
1492 netdev_freemem(dev); 1502 netdev_freemem(dev);
1493} 1503}
1494 1504
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 2745a1b51e03..006876c7b78d 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __NET_SYSFS_H__ 2#ifndef __NET_SYSFS_H__
2#define __NET_SYSFS_H__ 3#define __NET_SYSFS_H__
3 4
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 1132820c8e62..380934580fa1 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * consolidates trace point definitions 3 * consolidates trace point definitions
3 * 4 *
@@ -31,6 +32,7 @@
31#include <trace/events/napi.h> 32#include <trace/events/napi.h>
32#include <trace/events/sock.h> 33#include <trace/events/sock.h>
33#include <trace/events/udp.h> 34#include <trace/events/udp.h>
35#include <trace/events/tcp.h>
34#include <trace/events/fib.h> 36#include <trace/events/fib.h>
35#include <trace/events/qdisc.h> 37#include <trace/events/qdisc.h>
36#if IS_ENABLED(CONFIG_IPV6) 38#if IS_ENABLED(CONFIG_IPV6)
@@ -48,3 +50,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update);
48EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); 50EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
49 51
50EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll); 52EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
53
54EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 6cfdc7c84c48..b797832565d3 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -234,6 +234,7 @@ int peernet2id_alloc(struct net *net, struct net *peer)
234 rtnl_net_notifyid(net, RTM_NEWNSID, id); 234 rtnl_net_notifyid(net, RTM_NEWNSID, id);
235 return id; 235 return id;
236} 236}
237EXPORT_SYMBOL_GPL(peernet2id_alloc);
237 238
238/* This function returns, if assigned, the id of a peer netns. */ 239/* This function returns, if assigned, the id of a peer netns. */
239int peernet2id(struct net *net, struct net *peer) 240int peernet2id(struct net *net, struct net *peer)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 912731bed7b7..57557a6a950c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -334,7 +334,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
334 /* It is up to the caller to keep npinfo alive. */ 334 /* It is up to the caller to keep npinfo alive. */
335 struct netpoll_info *npinfo; 335 struct netpoll_info *npinfo;
336 336
337 WARN_ON_ONCE(!irqs_disabled()); 337 lockdep_assert_irqs_disabled();
338 338
339 npinfo = rcu_dereference_bh(np->dev->npinfo); 339 npinfo = rcu_dereference_bh(np->dev->npinfo);
340 if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { 340 if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 6e1e10ff433a..f95a15086225 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2165,7 +2165,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
2165 + pkt_dev->pkt_overhead; 2165 + pkt_dev->pkt_overhead;
2166 } 2166 }
2167 2167
2168 for (i = 0; i < IN6_ADDR_HSIZE; i++) 2168 for (i = 0; i < sizeof(struct in6_addr); i++)
2169 if (pkt_dev->cur_in6_saddr.s6_addr[i]) { 2169 if (pkt_dev->cur_in6_saddr.s6_addr[i]) {
2170 set = 1; 2170 set = 1;
2171 break; 2171 break;
@@ -2711,7 +2711,7 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi,
2711static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, 2711static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
2712 int datalen) 2712 int datalen)
2713{ 2713{
2714 struct timeval timestamp; 2714 struct timespec64 timestamp;
2715 struct pktgen_hdr *pgh; 2715 struct pktgen_hdr *pgh;
2716 2716
2717 pgh = skb_put(skb, sizeof(*pgh)); 2717 pgh = skb_put(skb, sizeof(*pgh));
@@ -2773,9 +2773,17 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
2773 pgh->tv_sec = 0; 2773 pgh->tv_sec = 0;
2774 pgh->tv_usec = 0; 2774 pgh->tv_usec = 0;
2775 } else { 2775 } else {
2776 do_gettimeofday(&timestamp); 2776 /*
2777 * pgh->tv_sec wraps in y2106 when interpreted as unsigned
2778 * as done by wireshark, or y2038 when interpreted as signed.
2779 * This is probably harmless, but if anyone wants to improve
2780 * it, we could introduce a variant that puts 64-bit nanoseconds
2781 * into the respective header bytes.
2782 * This would also be slightly faster to read.
2783 */
2784 ktime_get_real_ts64(&timestamp);
2777 pgh->tv_sec = htonl(timestamp.tv_sec); 2785 pgh->tv_sec = htonl(timestamp.tv_sec);
2778 pgh->tv_usec = htonl(timestamp.tv_usec); 2786 pgh->tv_usec = htonl(timestamp.tv_nsec / NSEC_PER_USEC);
2779 } 2787 }
2780} 2788}
2781 2789
@@ -3377,7 +3385,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
3377 3385
3378static void pktgen_xmit(struct pktgen_dev *pkt_dev) 3386static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3379{ 3387{
3380 unsigned int burst = ACCESS_ONCE(pkt_dev->burst); 3388 unsigned int burst = READ_ONCE(pkt_dev->burst);
3381 struct net_device *odev = pkt_dev->odev; 3389 struct net_device *odev = pkt_dev->odev;
3382 struct netdev_queue *txq; 3390 struct netdev_queue *txq;
3383 struct sk_buff *skb; 3391 struct sk_buff *skb;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a78fd61da0ec..dabba2a91fc8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -453,7 +453,7 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
453{ 453{
454 const struct rtnl_af_ops *ops; 454 const struct rtnl_af_ops *ops;
455 455
456 list_for_each_entry(ops, &rtnl_af_ops, list) { 456 list_for_each_entry_rcu(ops, &rtnl_af_ops, list) {
457 if (ops->family == family) 457 if (ops->family == family)
458 return ops; 458 return ops;
459 } 459 }
@@ -470,32 +470,22 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
470void rtnl_af_register(struct rtnl_af_ops *ops) 470void rtnl_af_register(struct rtnl_af_ops *ops)
471{ 471{
472 rtnl_lock(); 472 rtnl_lock();
473 list_add_tail(&ops->list, &rtnl_af_ops); 473 list_add_tail_rcu(&ops->list, &rtnl_af_ops);
474 rtnl_unlock(); 474 rtnl_unlock();
475} 475}
476EXPORT_SYMBOL_GPL(rtnl_af_register); 476EXPORT_SYMBOL_GPL(rtnl_af_register);
477 477
478/** 478/**
479 * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
480 * @ops: struct rtnl_af_ops * to unregister
481 *
482 * The caller must hold the rtnl_mutex.
483 */
484void __rtnl_af_unregister(struct rtnl_af_ops *ops)
485{
486 list_del(&ops->list);
487}
488EXPORT_SYMBOL_GPL(__rtnl_af_unregister);
489
490/**
491 * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. 479 * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
492 * @ops: struct rtnl_af_ops * to unregister 480 * @ops: struct rtnl_af_ops * to unregister
493 */ 481 */
494void rtnl_af_unregister(struct rtnl_af_ops *ops) 482void rtnl_af_unregister(struct rtnl_af_ops *ops)
495{ 483{
496 rtnl_lock(); 484 rtnl_lock();
497 __rtnl_af_unregister(ops); 485 list_del_rcu(&ops->list);
498 rtnl_unlock(); 486 rtnl_unlock();
487
488 synchronize_rcu();
499} 489}
500EXPORT_SYMBOL_GPL(rtnl_af_unregister); 490EXPORT_SYMBOL_GPL(rtnl_af_unregister);
501 491
@@ -508,13 +498,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev,
508 /* IFLA_AF_SPEC */ 498 /* IFLA_AF_SPEC */
509 size = nla_total_size(sizeof(struct nlattr)); 499 size = nla_total_size(sizeof(struct nlattr));
510 500
511 list_for_each_entry(af_ops, &rtnl_af_ops, list) { 501 rcu_read_lock();
502 list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
512 if (af_ops->get_link_af_size) { 503 if (af_ops->get_link_af_size) {
513 /* AF_* + nested data */ 504 /* AF_* + nested data */
514 size += nla_total_size(sizeof(struct nlattr)) + 505 size += nla_total_size(sizeof(struct nlattr)) +
515 af_ops->get_link_af_size(dev, ext_filter_mask); 506 af_ops->get_link_af_size(dev, ext_filter_mask);
516 } 507 }
517 } 508 }
509 rcu_read_unlock();
518 510
519 return size; 511 return size;
520} 512}
@@ -522,11 +514,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev,
522static bool rtnl_have_link_slave_info(const struct net_device *dev) 514static bool rtnl_have_link_slave_info(const struct net_device *dev)
523{ 515{
524 struct net_device *master_dev; 516 struct net_device *master_dev;
517 bool ret = false;
525 518
526 master_dev = netdev_master_upper_dev_get((struct net_device *) dev); 519 rcu_read_lock();
520
521 master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev);
527 if (master_dev && master_dev->rtnl_link_ops) 522 if (master_dev && master_dev->rtnl_link_ops)
528 return true; 523 ret = true;
529 return false; 524 rcu_read_unlock();
525 return ret;
530} 526}
531 527
532static int rtnl_link_slave_info_fill(struct sk_buff *skb, 528static int rtnl_link_slave_info_fill(struct sk_buff *skb,
@@ -923,8 +919,10 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
923 + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ 919 + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
924 + rtnl_xdp_size() /* IFLA_XDP */ 920 + rtnl_xdp_size() /* IFLA_XDP */
925 + nla_total_size(4) /* IFLA_EVENT */ 921 + nla_total_size(4) /* IFLA_EVENT */
926 + nla_total_size(1); /* IFLA_PROTO_DOWN */ 922 + nla_total_size(4) /* IFLA_NEW_NETNSID */
927 923 + nla_total_size(1) /* IFLA_PROTO_DOWN */
924 + nla_total_size(4) /* IFLA_IF_NETNSID */
925 + 0;
928} 926}
929 927
930static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) 928static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1211,6 +1209,36 @@ nla_put_vfinfo_failure:
1211 return -EMSGSIZE; 1209 return -EMSGSIZE;
1212} 1210}
1213 1211
1212static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
1213 struct net_device *dev,
1214 u32 ext_filter_mask)
1215{
1216 struct nlattr *vfinfo;
1217 int i, num_vfs;
1218
1219 if (!dev->dev.parent || ((ext_filter_mask & RTEXT_FILTER_VF) == 0))
1220 return 0;
1221
1222 num_vfs = dev_num_vf(dev->dev.parent);
1223 if (nla_put_u32(skb, IFLA_NUM_VF, num_vfs))
1224 return -EMSGSIZE;
1225
1226 if (!dev->netdev_ops->ndo_get_vf_config)
1227 return 0;
1228
1229 vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
1230 if (!vfinfo)
1231 return -EMSGSIZE;
1232
1233 for (i = 0; i < num_vfs; i++) {
1234 if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
1235 return -EMSGSIZE;
1236 }
1237
1238 nla_nest_end(skb, vfinfo);
1239 return 0;
1240}
1241
1214static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) 1242static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
1215{ 1243{
1216 struct rtnl_link_ifmap map; 1244 struct rtnl_link_ifmap map;
@@ -1242,10 +1270,10 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
1242 *prog_id = generic_xdp_prog->aux->id; 1270 *prog_id = generic_xdp_prog->aux->id;
1243 return XDP_ATTACHED_SKB; 1271 return XDP_ATTACHED_SKB;
1244 } 1272 }
1245 if (!ops->ndo_xdp) 1273 if (!ops->ndo_bpf)
1246 return XDP_ATTACHED_NONE; 1274 return XDP_ATTACHED_NONE;
1247 1275
1248 return __dev_xdp_attached(dev, ops->ndo_xdp, prog_id); 1276 return __dev_xdp_attached(dev, ops->ndo_bpf, prog_id);
1249} 1277}
1250 1278
1251static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) 1279static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1307,16 +1335,108 @@ static u32 rtnl_get_event(unsigned long event)
1307 return rtnl_event_type; 1335 return rtnl_event_type;
1308} 1336}
1309 1337
1310static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, 1338static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev)
1339{
1340 const struct net_device *upper_dev;
1341 int ret = 0;
1342
1343 rcu_read_lock();
1344
1345 upper_dev = netdev_master_upper_dev_get_rcu(dev);
1346 if (upper_dev)
1347 ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex);
1348
1349 rcu_read_unlock();
1350 return ret;
1351}
1352
1353static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev)
1354{
1355 int ifindex = dev_get_iflink(dev);
1356
1357 if (dev->ifindex == ifindex)
1358 return 0;
1359
1360 return nla_put_u32(skb, IFLA_LINK, ifindex);
1361}
1362
1363static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb,
1364 struct net_device *dev)
1365{
1366 char buf[IFALIASZ];
1367 int ret;
1368
1369 ret = dev_get_alias(dev, buf, sizeof(buf));
1370 return ret > 0 ? nla_put_string(skb, IFLA_IFALIAS, buf) : 0;
1371}
1372
1373static int rtnl_fill_link_netnsid(struct sk_buff *skb,
1374 const struct net_device *dev,
1375 struct net *src_net)
1376{
1377 if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) {
1378 struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
1379
1380 if (!net_eq(dev_net(dev), link_net)) {
1381 int id = peernet2id_alloc(src_net, link_net);
1382
1383 if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
1384 return -EMSGSIZE;
1385 }
1386 }
1387
1388 return 0;
1389}
1390
1391static int rtnl_fill_link_af(struct sk_buff *skb,
1392 const struct net_device *dev,
1393 u32 ext_filter_mask)
1394{
1395 const struct rtnl_af_ops *af_ops;
1396 struct nlattr *af_spec;
1397
1398 af_spec = nla_nest_start(skb, IFLA_AF_SPEC);
1399 if (!af_spec)
1400 return -EMSGSIZE;
1401
1402 list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
1403 struct nlattr *af;
1404 int err;
1405
1406 if (!af_ops->fill_link_af)
1407 continue;
1408
1409 af = nla_nest_start(skb, af_ops->family);
1410 if (!af)
1411 return -EMSGSIZE;
1412
1413 err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
1414 /*
1415 * Caller may return ENODATA to indicate that there
1416 * was no data to be dumped. This is not an error, it
1417 * means we should trim the attribute header and
1418 * continue.
1419 */
1420 if (err == -ENODATA)
1421 nla_nest_cancel(skb, af);
1422 else if (err < 0)
1423 return -EMSGSIZE;
1424
1425 nla_nest_end(skb, af);
1426 }
1427
1428 nla_nest_end(skb, af_spec);
1429 return 0;
1430}
1431
1432static int rtnl_fill_ifinfo(struct sk_buff *skb,
1433 struct net_device *dev, struct net *src_net,
1311 int type, u32 pid, u32 seq, u32 change, 1434 int type, u32 pid, u32 seq, u32 change,
1312 unsigned int flags, u32 ext_filter_mask, 1435 unsigned int flags, u32 ext_filter_mask,
1313 u32 event) 1436 u32 event, int *new_nsid, int tgt_netnsid)
1314{ 1437{
1315 struct ifinfomsg *ifm; 1438 struct ifinfomsg *ifm;
1316 struct nlmsghdr *nlh; 1439 struct nlmsghdr *nlh;
1317 struct nlattr *af_spec;
1318 struct rtnl_af_ops *af_ops;
1319 struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
1320 1440
1321 ASSERT_RTNL(); 1441 ASSERT_RTNL();
1322 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); 1442 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -1331,6 +1451,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1331 ifm->ifi_flags = dev_get_flags(dev); 1451 ifm->ifi_flags = dev_get_flags(dev);
1332 ifm->ifi_change = change; 1452 ifm->ifi_change = change;
1333 1453
1454 if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_IF_NETNSID, tgt_netnsid))
1455 goto nla_put_failure;
1456
1334 if (nla_put_string(skb, IFLA_IFNAME, dev->name) || 1457 if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
1335 nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) || 1458 nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) ||
1336 nla_put_u8(skb, IFLA_OPERSTATE, 1459 nla_put_u8(skb, IFLA_OPERSTATE,
@@ -1345,15 +1468,12 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1345#ifdef CONFIG_RPS 1468#ifdef CONFIG_RPS
1346 nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || 1469 nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
1347#endif 1470#endif
1348 (dev->ifindex != dev_get_iflink(dev) && 1471 nla_put_iflink(skb, dev) ||
1349 nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) || 1472 put_master_ifindex(skb, dev) ||
1350 (upper_dev &&
1351 nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) ||
1352 nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || 1473 nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
1353 (dev->qdisc && 1474 (dev->qdisc &&
1354 nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || 1475 nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
1355 (dev->ifalias && 1476 nla_put_ifalias(skb, dev) ||
1356 nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)) ||
1357 nla_put_u32(skb, IFLA_CARRIER_CHANGES, 1477 nla_put_u32(skb, IFLA_CARRIER_CHANGES,
1358 atomic_read(&dev->carrier_changes)) || 1478 atomic_read(&dev->carrier_changes)) ||
1359 nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) 1479 nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
@@ -1385,27 +1505,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1385 if (rtnl_fill_stats(skb, dev)) 1505 if (rtnl_fill_stats(skb, dev))
1386 goto nla_put_failure; 1506 goto nla_put_failure;
1387 1507
1388 if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) && 1508 if (rtnl_fill_vf(skb, dev, ext_filter_mask))
1389 nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)))
1390 goto nla_put_failure; 1509 goto nla_put_failure;
1391 1510
1392 if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent &&
1393 ext_filter_mask & RTEXT_FILTER_VF) {
1394 int i;
1395 struct nlattr *vfinfo;
1396 int num_vfs = dev_num_vf(dev->dev.parent);
1397
1398 vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
1399 if (!vfinfo)
1400 goto nla_put_failure;
1401 for (i = 0; i < num_vfs; i++) {
1402 if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
1403 goto nla_put_failure;
1404 }
1405
1406 nla_nest_end(skb, vfinfo);
1407 }
1408
1409 if (rtnl_port_fill(skb, dev, ext_filter_mask)) 1511 if (rtnl_port_fill(skb, dev, ext_filter_mask))
1410 goto nla_put_failure; 1512 goto nla_put_failure;
1411 1513
@@ -1417,51 +1519,23 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1417 goto nla_put_failure; 1519 goto nla_put_failure;
1418 } 1520 }
1419 1521
1420 if (dev->rtnl_link_ops && 1522 if (rtnl_fill_link_netnsid(skb, dev, src_net))
1421 dev->rtnl_link_ops->get_link_net) {
1422 struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
1423
1424 if (!net_eq(dev_net(dev), link_net)) {
1425 int id = peernet2id_alloc(dev_net(dev), link_net);
1426
1427 if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
1428 goto nla_put_failure;
1429 }
1430 }
1431
1432 if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
1433 goto nla_put_failure; 1523 goto nla_put_failure;
1434 1524
1435 list_for_each_entry(af_ops, &rtnl_af_ops, list) { 1525 if (new_nsid &&
1436 if (af_ops->fill_link_af) { 1526 nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0)
1437 struct nlattr *af; 1527 goto nla_put_failure;
1438 int err;
1439
1440 if (!(af = nla_nest_start(skb, af_ops->family)))
1441 goto nla_put_failure;
1442
1443 err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
1444
1445 /*
1446 * Caller may return ENODATA to indicate that there
1447 * was no data to be dumped. This is not an error, it
1448 * means we should trim the attribute header and
1449 * continue.
1450 */
1451 if (err == -ENODATA)
1452 nla_nest_cancel(skb, af);
1453 else if (err < 0)
1454 goto nla_put_failure;
1455
1456 nla_nest_end(skb, af);
1457 }
1458 }
1459 1528
1460 nla_nest_end(skb, af_spec); 1529 rcu_read_lock();
1530 if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
1531 goto nla_put_failure_rcu;
1532 rcu_read_unlock();
1461 1533
1462 nlmsg_end(skb, nlh); 1534 nlmsg_end(skb, nlh);
1463 return 0; 1535 return 0;
1464 1536
1537nla_put_failure_rcu:
1538 rcu_read_unlock();
1465nla_put_failure: 1539nla_put_failure:
1466 nlmsg_cancel(skb, nlh); 1540 nlmsg_cancel(skb, nlh);
1467 return -EMSGSIZE; 1541 return -EMSGSIZE;
@@ -1483,7 +1557,10 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
1483 [IFLA_LINKINFO] = { .type = NLA_NESTED }, 1557 [IFLA_LINKINFO] = { .type = NLA_NESTED },
1484 [IFLA_NET_NS_PID] = { .type = NLA_U32 }, 1558 [IFLA_NET_NS_PID] = { .type = NLA_U32 },
1485 [IFLA_NET_NS_FD] = { .type = NLA_U32 }, 1559 [IFLA_NET_NS_FD] = { .type = NLA_U32 },
1486 [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, 1560 /* IFLA_IFALIAS is a string, but policy is set to NLA_BINARY to
1561 * allow 0-length string (needed to remove an alias).
1562 */
1563 [IFLA_IFALIAS] = { .type = NLA_BINARY, .len = IFALIASZ - 1 },
1487 [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, 1564 [IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
1488 [IFLA_VF_PORTS] = { .type = NLA_NESTED }, 1565 [IFLA_VF_PORTS] = { .type = NLA_NESTED },
1489 [IFLA_PORT_SELF] = { .type = NLA_NESTED }, 1566 [IFLA_PORT_SELF] = { .type = NLA_NESTED },
@@ -1500,6 +1577,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
1500 [IFLA_XDP] = { .type = NLA_NESTED }, 1577 [IFLA_XDP] = { .type = NLA_NESTED },
1501 [IFLA_EVENT] = { .type = NLA_U32 }, 1578 [IFLA_EVENT] = { .type = NLA_U32 },
1502 [IFLA_GROUP] = { .type = NLA_U32 }, 1579 [IFLA_GROUP] = { .type = NLA_U32 },
1580 [IFLA_IF_NETNSID] = { .type = NLA_S32 },
1503}; 1581};
1504 1582
1505static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { 1583static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1603,9 +1681,28 @@ static bool link_dump_filtered(struct net_device *dev,
1603 return false; 1681 return false;
1604} 1682}
1605 1683
1684static struct net *get_target_net(struct sk_buff *skb, int netnsid)
1685{
1686 struct net *net;
1687
1688 net = get_net_ns_by_id(sock_net(skb->sk), netnsid);
1689 if (!net)
1690 return ERR_PTR(-EINVAL);
1691
1692 /* For now, the caller is required to have CAP_NET_ADMIN in
1693 * the user namespace owning the target net ns.
1694 */
1695 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
1696 put_net(net);
1697 return ERR_PTR(-EACCES);
1698 }
1699 return net;
1700}
1701
1606static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) 1702static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1607{ 1703{
1608 struct net *net = sock_net(skb->sk); 1704 struct net *net = sock_net(skb->sk);
1705 struct net *tgt_net = net;
1609 int h, s_h; 1706 int h, s_h;
1610 int idx = 0, s_idx; 1707 int idx = 0, s_idx;
1611 struct net_device *dev; 1708 struct net_device *dev;
@@ -1615,6 +1712,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1615 const struct rtnl_link_ops *kind_ops = NULL; 1712 const struct rtnl_link_ops *kind_ops = NULL;
1616 unsigned int flags = NLM_F_MULTI; 1713 unsigned int flags = NLM_F_MULTI;
1617 int master_idx = 0; 1714 int master_idx = 0;
1715 int netnsid = -1;
1618 int err; 1716 int err;
1619 int hdrlen; 1717 int hdrlen;
1620 1718
@@ -1633,6 +1731,15 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1633 1731
1634 if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, 1732 if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX,
1635 ifla_policy, NULL) >= 0) { 1733 ifla_policy, NULL) >= 0) {
1734 if (tb[IFLA_IF_NETNSID]) {
1735 netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
1736 tgt_net = get_target_net(skb, netnsid);
1737 if (IS_ERR(tgt_net)) {
1738 tgt_net = net;
1739 netnsid = -1;
1740 }
1741 }
1742
1636 if (tb[IFLA_EXT_MASK]) 1743 if (tb[IFLA_EXT_MASK])
1637 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); 1744 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
1638 1745
@@ -1648,17 +1755,19 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1648 1755
1649 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1756 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1650 idx = 0; 1757 idx = 0;
1651 head = &net->dev_index_head[h]; 1758 head = &tgt_net->dev_index_head[h];
1652 hlist_for_each_entry(dev, head, index_hlist) { 1759 hlist_for_each_entry(dev, head, index_hlist) {
1653 if (link_dump_filtered(dev, master_idx, kind_ops)) 1760 if (link_dump_filtered(dev, master_idx, kind_ops))
1654 goto cont; 1761 goto cont;
1655 if (idx < s_idx) 1762 if (idx < s_idx)
1656 goto cont; 1763 goto cont;
1657 err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, 1764 err = rtnl_fill_ifinfo(skb, dev, net,
1765 RTM_NEWLINK,
1658 NETLINK_CB(cb->skb).portid, 1766 NETLINK_CB(cb->skb).portid,
1659 cb->nlh->nlmsg_seq, 0, 1767 cb->nlh->nlmsg_seq, 0,
1660 flags, 1768 flags,
1661 ext_filter_mask, 0); 1769 ext_filter_mask, 0, NULL,
1770 netnsid);
1662 1771
1663 if (err < 0) { 1772 if (err < 0) {
1664 if (likely(skb->len)) 1773 if (likely(skb->len))
@@ -1677,6 +1786,8 @@ out_err:
1677 cb->args[0] = h; 1786 cb->args[0] = h;
1678 cb->seq = net->dev_base_seq; 1787 cb->seq = net->dev_base_seq;
1679 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 1788 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1789 if (netnsid >= 0)
1790 put_net(tgt_net);
1680 1791
1681 return err; 1792 return err;
1682} 1793}
@@ -1723,17 +1834,27 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
1723 nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { 1834 nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
1724 const struct rtnl_af_ops *af_ops; 1835 const struct rtnl_af_ops *af_ops;
1725 1836
1726 if (!(af_ops = rtnl_af_lookup(nla_type(af)))) 1837 rcu_read_lock();
1838 af_ops = rtnl_af_lookup(nla_type(af));
1839 if (!af_ops) {
1840 rcu_read_unlock();
1727 return -EAFNOSUPPORT; 1841 return -EAFNOSUPPORT;
1842 }
1728 1843
1729 if (!af_ops->set_link_af) 1844 if (!af_ops->set_link_af) {
1845 rcu_read_unlock();
1730 return -EOPNOTSUPP; 1846 return -EOPNOTSUPP;
1847 }
1731 1848
1732 if (af_ops->validate_link_af) { 1849 if (af_ops->validate_link_af) {
1733 err = af_ops->validate_link_af(dev, af); 1850 err = af_ops->validate_link_af(dev, af);
1734 if (err < 0) 1851 if (err < 0) {
1852 rcu_read_unlock();
1735 return err; 1853 return err;
1854 }
1736 } 1855 }
1856
1857 rcu_read_unlock();
1737 } 1858 }
1738 } 1859 }
1739 1860
@@ -1909,7 +2030,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
1909 return err; 2030 return err;
1910} 2031}
1911 2032
1912static int do_set_master(struct net_device *dev, int ifindex) 2033static int do_set_master(struct net_device *dev, int ifindex,
2034 struct netlink_ext_ack *extack)
1913{ 2035{
1914 struct net_device *upper_dev = netdev_master_upper_dev_get(dev); 2036 struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
1915 const struct net_device_ops *ops; 2037 const struct net_device_ops *ops;
@@ -1934,7 +2056,7 @@ static int do_set_master(struct net_device *dev, int ifindex)
1934 return -EINVAL; 2056 return -EINVAL;
1935 ops = upper_dev->netdev_ops; 2057 ops = upper_dev->netdev_ops;
1936 if (ops->ndo_add_slave) { 2058 if (ops->ndo_add_slave) {
1937 err = ops->ndo_add_slave(upper_dev, dev); 2059 err = ops->ndo_add_slave(upper_dev, dev, extack);
1938 if (err) 2060 if (err)
1939 return err; 2061 return err;
1940 } else { 2062 } else {
@@ -2067,7 +2189,7 @@ static int do_setlink(const struct sk_buff *skb,
2067 } 2189 }
2068 2190
2069 if (tb[IFLA_MASTER]) { 2191 if (tb[IFLA_MASTER]) {
2070 err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER])); 2192 err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
2071 if (err) 2193 if (err)
2072 goto errout; 2194 goto errout;
2073 status |= DO_SETLINK_MODIFIED; 2195 status |= DO_SETLINK_MODIFIED;
@@ -2093,7 +2215,7 @@ static int do_setlink(const struct sk_buff *skb,
2093 dev->tx_queue_len = orig_len; 2215 dev->tx_queue_len = orig_len;
2094 goto errout; 2216 goto errout;
2095 } 2217 }
2096 status |= DO_SETLINK_NOTIFY; 2218 status |= DO_SETLINK_MODIFIED;
2097 } 2219 }
2098 } 2220 }
2099 2221
@@ -2190,13 +2312,17 @@ static int do_setlink(const struct sk_buff *skb,
2190 nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { 2312 nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
2191 const struct rtnl_af_ops *af_ops; 2313 const struct rtnl_af_ops *af_ops;
2192 2314
2193 if (!(af_ops = rtnl_af_lookup(nla_type(af)))) 2315 rcu_read_lock();
2194 BUG(); 2316
2317 BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af))));
2195 2318
2196 err = af_ops->set_link_af(dev, af); 2319 err = af_ops->set_link_af(dev, af);
2197 if (err < 0) 2320 if (err < 0) {
2321 rcu_read_unlock();
2198 goto errout; 2322 goto errout;
2323 }
2199 2324
2325 rcu_read_unlock();
2200 status |= DO_SETLINK_NOTIFY; 2326 status |= DO_SETLINK_NOTIFY;
2201 } 2327 }
2202 } 2328 }
@@ -2248,7 +2374,7 @@ static int do_setlink(const struct sk_buff *skb,
2248 2374
2249errout: 2375errout:
2250 if (status & DO_SETLINK_MODIFIED) { 2376 if (status & DO_SETLINK_MODIFIED) {
2251 if (status & DO_SETLINK_NOTIFY) 2377 if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY)
2252 netdev_state_change(dev); 2378 netdev_state_change(dev);
2253 2379
2254 if (err < 0) 2380 if (err < 0)
@@ -2274,6 +2400,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
2274 if (err < 0) 2400 if (err < 0)
2275 goto errout; 2401 goto errout;
2276 2402
2403 if (tb[IFLA_IF_NETNSID])
2404 return -EOPNOTSUPP;
2405
2277 if (tb[IFLA_IFNAME]) 2406 if (tb[IFLA_IFNAME])
2278 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); 2407 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
2279 else 2408 else
@@ -2368,6 +2497,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
2368 if (err < 0) 2497 if (err < 0)
2369 return err; 2498 return err;
2370 2499
2500 if (tb[IFLA_IF_NETNSID])
2501 return -EOPNOTSUPP;
2502
2371 if (tb[IFLA_IFNAME]) 2503 if (tb[IFLA_IFNAME])
2372 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); 2504 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
2373 2505
@@ -2499,6 +2631,9 @@ replay:
2499 if (err < 0) 2631 if (err < 0)
2500 return err; 2632 return err;
2501 2633
2634 if (tb[IFLA_IF_NETNSID])
2635 return -EOPNOTSUPP;
2636
2502 if (tb[IFLA_IFNAME]) 2637 if (tb[IFLA_IFNAME])
2503 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); 2638 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
2504 else 2639 else
@@ -2576,12 +2711,6 @@ replay:
2576 return err; 2711 return err;
2577 slave_data = slave_attr; 2712 slave_data = slave_attr;
2578 } 2713 }
2579 if (m_ops->slave_validate) {
2580 err = m_ops->slave_validate(tb, slave_data,
2581 extack);
2582 if (err < 0)
2583 return err;
2584 }
2585 } 2714 }
2586 2715
2587 if (dev) { 2716 if (dev) {
@@ -2711,7 +2840,8 @@ replay:
2711 goto out_unregister; 2840 goto out_unregister;
2712 } 2841 }
2713 if (tb[IFLA_MASTER]) { 2842 if (tb[IFLA_MASTER]) {
2714 err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER])); 2843 err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]),
2844 extack);
2715 if (err) 2845 if (err)
2716 goto out_unregister; 2846 goto out_unregister;
2717 } 2847 }
@@ -2737,11 +2867,13 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
2737 struct netlink_ext_ack *extack) 2867 struct netlink_ext_ack *extack)
2738{ 2868{
2739 struct net *net = sock_net(skb->sk); 2869 struct net *net = sock_net(skb->sk);
2870 struct net *tgt_net = net;
2740 struct ifinfomsg *ifm; 2871 struct ifinfomsg *ifm;
2741 char ifname[IFNAMSIZ]; 2872 char ifname[IFNAMSIZ];
2742 struct nlattr *tb[IFLA_MAX+1]; 2873 struct nlattr *tb[IFLA_MAX+1];
2743 struct net_device *dev = NULL; 2874 struct net_device *dev = NULL;
2744 struct sk_buff *nskb; 2875 struct sk_buff *nskb;
2876 int netnsid = -1;
2745 int err; 2877 int err;
2746 u32 ext_filter_mask = 0; 2878 u32 ext_filter_mask = 0;
2747 2879
@@ -2749,35 +2881,50 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
2749 if (err < 0) 2881 if (err < 0)
2750 return err; 2882 return err;
2751 2883
2884 if (tb[IFLA_IF_NETNSID]) {
2885 netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
2886 tgt_net = get_target_net(skb, netnsid);
2887 if (IS_ERR(tgt_net))
2888 return PTR_ERR(tgt_net);
2889 }
2890
2752 if (tb[IFLA_IFNAME]) 2891 if (tb[IFLA_IFNAME])
2753 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); 2892 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
2754 2893
2755 if (tb[IFLA_EXT_MASK]) 2894 if (tb[IFLA_EXT_MASK])
2756 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); 2895 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
2757 2896
2897 err = -EINVAL;
2758 ifm = nlmsg_data(nlh); 2898 ifm = nlmsg_data(nlh);
2759 if (ifm->ifi_index > 0) 2899 if (ifm->ifi_index > 0)
2760 dev = __dev_get_by_index(net, ifm->ifi_index); 2900 dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
2761 else if (tb[IFLA_IFNAME]) 2901 else if (tb[IFLA_IFNAME])
2762 dev = __dev_get_by_name(net, ifname); 2902 dev = __dev_get_by_name(tgt_net, ifname);
2763 else 2903 else
2764 return -EINVAL; 2904 goto out;
2765 2905
2906 err = -ENODEV;
2766 if (dev == NULL) 2907 if (dev == NULL)
2767 return -ENODEV; 2908 goto out;
2768 2909
2910 err = -ENOBUFS;
2769 nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL); 2911 nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL);
2770 if (nskb == NULL) 2912 if (nskb == NULL)
2771 return -ENOBUFS; 2913 goto out;
2772 2914
2773 err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid, 2915 err = rtnl_fill_ifinfo(nskb, dev, net,
2774 nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0); 2916 RTM_NEWLINK, NETLINK_CB(skb).portid,
2917 nlh->nlmsg_seq, 0, 0, ext_filter_mask,
2918 0, NULL, netnsid);
2775 if (err < 0) { 2919 if (err < 0) {
2776 /* -EMSGSIZE implies BUG in if_nlmsg_size */ 2920 /* -EMSGSIZE implies BUG in if_nlmsg_size */
2777 WARN_ON(err == -EMSGSIZE); 2921 WARN_ON(err == -EMSGSIZE);
2778 kfree_skb(nskb); 2922 kfree_skb(nskb);
2779 } else 2923 } else
2780 err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid); 2924 err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
2925out:
2926 if (netnsid >= 0)
2927 put_net(tgt_net);
2781 2928
2782 return err; 2929 return err;
2783} 2930}
@@ -2856,7 +3003,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
2856 3003
2857struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, 3004struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
2858 unsigned int change, 3005 unsigned int change,
2859 u32 event, gfp_t flags) 3006 u32 event, gfp_t flags, int *new_nsid)
2860{ 3007{
2861 struct net *net = dev_net(dev); 3008 struct net *net = dev_net(dev);
2862 struct sk_buff *skb; 3009 struct sk_buff *skb;
@@ -2867,7 +3014,9 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
2867 if (skb == NULL) 3014 if (skb == NULL)
2868 goto errout; 3015 goto errout;
2869 3016
2870 err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event); 3017 err = rtnl_fill_ifinfo(skb, dev, dev_net(dev),
3018 type, 0, 0, change, 0, 0, event,
3019 new_nsid, -1);
2871 if (err < 0) { 3020 if (err < 0) {
2872 /* -EMSGSIZE implies BUG in if_nlmsg_size() */ 3021 /* -EMSGSIZE implies BUG in if_nlmsg_size() */
2873 WARN_ON(err == -EMSGSIZE); 3022 WARN_ON(err == -EMSGSIZE);
@@ -2890,14 +3039,14 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags)
2890 3039
2891static void rtmsg_ifinfo_event(int type, struct net_device *dev, 3040static void rtmsg_ifinfo_event(int type, struct net_device *dev,
2892 unsigned int change, u32 event, 3041 unsigned int change, u32 event,
2893 gfp_t flags) 3042 gfp_t flags, int *new_nsid)
2894{ 3043{
2895 struct sk_buff *skb; 3044 struct sk_buff *skb;
2896 3045
2897 if (dev->reg_state != NETREG_REGISTERED) 3046 if (dev->reg_state != NETREG_REGISTERED)
2898 return; 3047 return;
2899 3048
2900 skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags); 3049 skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid);
2901 if (skb) 3050 if (skb)
2902 rtmsg_ifinfo_send(skb, dev, flags); 3051 rtmsg_ifinfo_send(skb, dev, flags);
2903} 3052}
@@ -2905,9 +3054,15 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev,
2905void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, 3054void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
2906 gfp_t flags) 3055 gfp_t flags)
2907{ 3056{
2908 rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags); 3057 rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, NULL);
3058}
3059
3060void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
3061 gfp_t flags, int *new_nsid)
3062{
3063 rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags,
3064 new_nsid);
2909} 3065}
2910EXPORT_SYMBOL(rtmsg_ifinfo);
2911 3066
2912static int nlmsg_populate_fdb_fill(struct sk_buff *skb, 3067static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
2913 struct net_device *dev, 3068 struct net_device *dev,
@@ -3014,21 +3169,21 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm,
3014} 3169}
3015EXPORT_SYMBOL(ndo_dflt_fdb_add); 3170EXPORT_SYMBOL(ndo_dflt_fdb_add);
3016 3171
3017static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid) 3172static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid,
3173 struct netlink_ext_ack *extack)
3018{ 3174{
3019 u16 vid = 0; 3175 u16 vid = 0;
3020 3176
3021 if (vlan_attr) { 3177 if (vlan_attr) {
3022 if (nla_len(vlan_attr) != sizeof(u16)) { 3178 if (nla_len(vlan_attr) != sizeof(u16)) {
3023 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan\n"); 3179 NL_SET_ERR_MSG(extack, "invalid vlan attribute size");
3024 return -EINVAL; 3180 return -EINVAL;
3025 } 3181 }
3026 3182
3027 vid = nla_get_u16(vlan_attr); 3183 vid = nla_get_u16(vlan_attr);
3028 3184
3029 if (!vid || vid >= VLAN_VID_MASK) { 3185 if (!vid || vid >= VLAN_VID_MASK) {
3030 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan id %d\n", 3186 NL_SET_ERR_MSG(extack, "invalid vlan id");
3031 vid);
3032 return -EINVAL; 3187 return -EINVAL;
3033 } 3188 }
3034 } 3189 }
@@ -3053,24 +3208,24 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
3053 3208
3054 ndm = nlmsg_data(nlh); 3209 ndm = nlmsg_data(nlh);
3055 if (ndm->ndm_ifindex == 0) { 3210 if (ndm->ndm_ifindex == 0) {
3056 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ifindex\n"); 3211 NL_SET_ERR_MSG(extack, "invalid ifindex");
3057 return -EINVAL; 3212 return -EINVAL;
3058 } 3213 }
3059 3214
3060 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 3215 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
3061 if (dev == NULL) { 3216 if (dev == NULL) {
3062 pr_info("PF_BRIDGE: RTM_NEWNEIGH with unknown ifindex\n"); 3217 NL_SET_ERR_MSG(extack, "unknown ifindex");
3063 return -ENODEV; 3218 return -ENODEV;
3064 } 3219 }
3065 3220
3066 if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { 3221 if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
3067 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid address\n"); 3222 NL_SET_ERR_MSG(extack, "invalid address");
3068 return -EINVAL; 3223 return -EINVAL;
3069 } 3224 }
3070 3225
3071 addr = nla_data(tb[NDA_LLADDR]); 3226 addr = nla_data(tb[NDA_LLADDR]);
3072 3227
3073 err = fdb_vid_parse(tb[NDA_VLAN], &vid); 3228 err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
3074 if (err) 3229 if (err)
3075 return err; 3230 return err;
3076 3231
@@ -3157,24 +3312,24 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
3157 3312
3158 ndm = nlmsg_data(nlh); 3313 ndm = nlmsg_data(nlh);
3159 if (ndm->ndm_ifindex == 0) { 3314 if (ndm->ndm_ifindex == 0) {
3160 pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ifindex\n"); 3315 NL_SET_ERR_MSG(extack, "invalid ifindex");
3161 return -EINVAL; 3316 return -EINVAL;
3162 } 3317 }
3163 3318
3164 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 3319 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
3165 if (dev == NULL) { 3320 if (dev == NULL) {
3166 pr_info("PF_BRIDGE: RTM_DELNEIGH with unknown ifindex\n"); 3321 NL_SET_ERR_MSG(extack, "unknown ifindex");
3167 return -ENODEV; 3322 return -ENODEV;
3168 } 3323 }
3169 3324
3170 if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { 3325 if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
3171 pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n"); 3326 NL_SET_ERR_MSG(extack, "invalid address");
3172 return -EINVAL; 3327 return -EINVAL;
3173 } 3328 }
3174 3329
3175 addr = nla_data(tb[NDA_LLADDR]); 3330 addr = nla_data(tb[NDA_LLADDR]);
3176 3331
3177 err = fdb_vid_parse(tb[NDA_VLAN], &vid); 3332 err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
3178 if (err) 3333 if (err)
3179 return err; 3334 return err;
3180 3335
@@ -3614,7 +3769,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
3614 3769
3615 dev = __dev_get_by_index(net, ifm->ifi_index); 3770 dev = __dev_get_by_index(net, ifm->ifi_index);
3616 if (!dev) { 3771 if (!dev) {
3617 pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); 3772 NL_SET_ERR_MSG(extack, "unknown ifindex");
3618 return -ENODEV; 3773 return -ENODEV;
3619 } 3774 }
3620 3775
@@ -3689,7 +3844,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
3689 3844
3690 dev = __dev_get_by_index(net, ifm->ifi_index); 3845 dev = __dev_get_by_index(net, ifm->ifi_index);
3691 if (!dev) { 3846 if (!dev) {
3692 pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); 3847 NL_SET_ERR_MSG(extack, "unknown ifindex");
3693 return -ENODEV; 3848 return -ENODEV;
3694 } 3849 }
3695 3850
@@ -3854,6 +4009,9 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
3854 return -EMSGSIZE; 4009 return -EMSGSIZE;
3855 4010
3856 ifsm = nlmsg_data(nlh); 4011 ifsm = nlmsg_data(nlh);
4012 ifsm->family = PF_UNSPEC;
4013 ifsm->pad1 = 0;
4014 ifsm->pad2 = 0;
3857 ifsm->ifindex = dev->ifindex; 4015 ifsm->ifindex = dev->ifindex;
3858 ifsm->filter_mask = filter_mask; 4016 ifsm->filter_mask = filter_mask;
3859 4017
@@ -3937,25 +4095,30 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
3937 if (!attr) 4095 if (!attr)
3938 goto nla_put_failure; 4096 goto nla_put_failure;
3939 4097
3940 list_for_each_entry(af_ops, &rtnl_af_ops, list) { 4098 rcu_read_lock();
4099 list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
3941 if (af_ops->fill_stats_af) { 4100 if (af_ops->fill_stats_af) {
3942 struct nlattr *af; 4101 struct nlattr *af;
3943 int err; 4102 int err;
3944 4103
3945 af = nla_nest_start(skb, af_ops->family); 4104 af = nla_nest_start(skb, af_ops->family);
3946 if (!af) 4105 if (!af) {
4106 rcu_read_unlock();
3947 goto nla_put_failure; 4107 goto nla_put_failure;
3948 4108 }
3949 err = af_ops->fill_stats_af(skb, dev); 4109 err = af_ops->fill_stats_af(skb, dev);
3950 4110
3951 if (err == -ENODATA) 4111 if (err == -ENODATA) {
3952 nla_nest_cancel(skb, af); 4112 nla_nest_cancel(skb, af);
3953 else if (err < 0) 4113 } else if (err < 0) {
4114 rcu_read_unlock();
3954 goto nla_put_failure; 4115 goto nla_put_failure;
4116 }
3955 4117
3956 nla_nest_end(skb, af); 4118 nla_nest_end(skb, af);
3957 } 4119 }
3958 } 4120 }
4121 rcu_read_unlock();
3959 4122
3960 nla_nest_end(skb, attr); 4123 nla_nest_end(skb, attr);
3961 4124
@@ -4024,7 +4187,8 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
4024 /* for IFLA_STATS_AF_SPEC */ 4187 /* for IFLA_STATS_AF_SPEC */
4025 size += nla_total_size(0); 4188 size += nla_total_size(0);
4026 4189
4027 list_for_each_entry(af_ops, &rtnl_af_ops, list) { 4190 rcu_read_lock();
4191 list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
4028 if (af_ops->get_stats_af_size) { 4192 if (af_ops->get_stats_af_size) {
4029 size += nla_total_size( 4193 size += nla_total_size(
4030 af_ops->get_stats_af_size(dev)); 4194 af_ops->get_stats_af_size(dev));
@@ -4033,6 +4197,7 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
4033 size += nla_total_size(0); 4197 size += nla_total_size(0);
4034 } 4198 }
4035 } 4199 }
4200 rcu_read_unlock();
4036 } 4201 }
4037 4202
4038 return size; 4203 return size;
@@ -4276,15 +4441,20 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
4276 4441
4277 switch (event) { 4442 switch (event) {
4278 case NETDEV_REBOOT: 4443 case NETDEV_REBOOT:
4444 case NETDEV_CHANGEMTU:
4279 case NETDEV_CHANGEADDR: 4445 case NETDEV_CHANGEADDR:
4280 case NETDEV_CHANGENAME: 4446 case NETDEV_CHANGENAME:
4281 case NETDEV_FEAT_CHANGE: 4447 case NETDEV_FEAT_CHANGE:
4282 case NETDEV_BONDING_FAILOVER: 4448 case NETDEV_BONDING_FAILOVER:
4449 case NETDEV_POST_TYPE_CHANGE:
4283 case NETDEV_NOTIFY_PEERS: 4450 case NETDEV_NOTIFY_PEERS:
4451 case NETDEV_CHANGEUPPER:
4284 case NETDEV_RESEND_IGMP: 4452 case NETDEV_RESEND_IGMP:
4285 case NETDEV_CHANGEINFODATA: 4453 case NETDEV_CHANGEINFODATA:
4454 case NETDEV_CHANGELOWERSTATE:
4455 case NETDEV_CHANGE_TX_QUEUE_LEN:
4286 rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event), 4456 rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event),
4287 GFP_KERNEL); 4457 GFP_KERNEL, NULL);
4288 break; 4458 break;
4289 default: 4459 default:
4290 break; 4460 break;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 16982de649b9..6b0ff396fa9d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -41,7 +41,6 @@
41#include <linux/module.h> 41#include <linux/module.h>
42#include <linux/types.h> 42#include <linux/types.h>
43#include <linux/kernel.h> 43#include <linux/kernel.h>
44#include <linux/kmemcheck.h>
45#include <linux/mm.h> 44#include <linux/mm.h>
46#include <linux/interrupt.h> 45#include <linux/interrupt.h>
47#include <linux/in.h> 46#include <linux/in.h>
@@ -234,14 +233,12 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
234 shinfo = skb_shinfo(skb); 233 shinfo = skb_shinfo(skb);
235 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); 234 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
236 atomic_set(&shinfo->dataref, 1); 235 atomic_set(&shinfo->dataref, 1);
237 kmemcheck_annotate_variable(shinfo->destructor_arg);
238 236
239 if (flags & SKB_ALLOC_FCLONE) { 237 if (flags & SKB_ALLOC_FCLONE) {
240 struct sk_buff_fclones *fclones; 238 struct sk_buff_fclones *fclones;
241 239
242 fclones = container_of(skb, struct sk_buff_fclones, skb1); 240 fclones = container_of(skb, struct sk_buff_fclones, skb1);
243 241
244 kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
245 skb->fclone = SKB_FCLONE_ORIG; 242 skb->fclone = SKB_FCLONE_ORIG;
246 refcount_set(&fclones->fclone_ref, 1); 243 refcount_set(&fclones->fclone_ref, 1);
247 244
@@ -301,7 +298,6 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
301 shinfo = skb_shinfo(skb); 298 shinfo = skb_shinfo(skb);
302 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); 299 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
303 atomic_set(&shinfo->dataref, 1); 300 atomic_set(&shinfo->dataref, 1);
304 kmemcheck_annotate_variable(shinfo->destructor_arg);
305 301
306 return skb; 302 return skb;
307} 303}
@@ -357,7 +353,7 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
357 */ 353 */
358void *netdev_alloc_frag(unsigned int fragsz) 354void *netdev_alloc_frag(unsigned int fragsz)
359{ 355{
360 return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); 356 return __netdev_alloc_frag(fragsz, GFP_ATOMIC);
361} 357}
362EXPORT_SYMBOL(netdev_alloc_frag); 358EXPORT_SYMBOL(netdev_alloc_frag);
363 359
@@ -370,7 +366,7 @@ static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
370 366
371void *napi_alloc_frag(unsigned int fragsz) 367void *napi_alloc_frag(unsigned int fragsz)
372{ 368{
373 return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); 369 return __napi_alloc_frag(fragsz, GFP_ATOMIC);
374} 370}
375EXPORT_SYMBOL(napi_alloc_frag); 371EXPORT_SYMBOL(napi_alloc_frag);
376 372
@@ -1124,9 +1120,13 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
1124 1120
1125 err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len); 1121 err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len);
1126 if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) { 1122 if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
1123 struct sock *save_sk = skb->sk;
1124
1127 /* Streams do not free skb on error. Reset to prev state. */ 1125 /* Streams do not free skb on error. Reset to prev state. */
1128 msg->msg_iter = orig_iter; 1126 msg->msg_iter = orig_iter;
1127 skb->sk = sk;
1129 ___pskb_trim(skb, orig_len); 1128 ___pskb_trim(skb, orig_len);
1129 skb->sk = save_sk;
1130 return err; 1130 return err;
1131 } 1131 }
1132 1132
@@ -1279,7 +1279,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
1279 if (!n) 1279 if (!n)
1280 return NULL; 1280 return NULL;
1281 1281
1282 kmemcheck_annotate_bitfield(n, flags1);
1283 n->fclone = SKB_FCLONE_UNAVAILABLE; 1282 n->fclone = SKB_FCLONE_UNAVAILABLE;
1284 } 1283 }
1285 1284
@@ -1350,8 +1349,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
1350 /* Set the tail pointer and length */ 1349 /* Set the tail pointer and length */
1351 skb_put(n, skb->len); 1350 skb_put(n, skb->len);
1352 1351
1353 if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) 1352 BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
1354 BUG();
1355 1353
1356 copy_skb_header(n, skb); 1354 copy_skb_header(n, skb);
1357 return n; 1355 return n;
@@ -1449,8 +1447,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
1449 1447
1450 BUG_ON(nhead < 0); 1448 BUG_ON(nhead < 0);
1451 1449
1452 if (skb_shared(skb)) 1450 BUG_ON(skb_shared(skb));
1453 BUG();
1454 1451
1455 size = SKB_DATA_ALIGN(size); 1452 size = SKB_DATA_ALIGN(size);
1456 1453
@@ -1509,6 +1506,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
1509 skb->nohdr = 0; 1506 skb->nohdr = 0;
1510 atomic_set(&skb_shinfo(skb)->dataref, 1); 1507 atomic_set(&skb_shinfo(skb)->dataref, 1);
1511 1508
1509 skb_metadata_clear(skb);
1510
1512 /* It is not generally safe to change skb->truesize. 1511 /* It is not generally safe to change skb->truesize.
1513 * For the moment, we really care of rx path, or 1512 * For the moment, we really care of rx path, or
1514 * when skb is orphaned (not attached to a socket). 1513 * when skb is orphaned (not attached to a socket).
@@ -1593,9 +1592,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
1593 head_copy_off = newheadroom - head_copy_len; 1592 head_copy_off = newheadroom - head_copy_len;
1594 1593
1595 /* Copy the linear header and data. */ 1594 /* Copy the linear header and data. */
1596 if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, 1595 BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
1597 skb->len + head_copy_len)) 1596 skb->len + head_copy_len));
1598 BUG();
1599 1597
1600 copy_skb_header(n, skb); 1598 copy_skb_header(n, skb);
1601 1599
@@ -1876,8 +1874,8 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
1876 return NULL; 1874 return NULL;
1877 } 1875 }
1878 1876
1879 if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta)) 1877 BUG_ON(skb_copy_bits(skb, skb_headlen(skb),
1880 BUG(); 1878 skb_tail_pointer(skb), delta));
1881 1879
1882 /* Optimization: no fragments, no reasons to preestimate 1880 /* Optimization: no fragments, no reasons to preestimate
1883 * size of pulled pages. Superb. 1881 * size of pulled pages. Superb.
@@ -1896,7 +1894,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
1896 } 1894 }
1897 1895
1898 /* If we need update frag list, we are in troubles. 1896 /* If we need update frag list, we are in troubles.
1899 * Certainly, it possible to add an offset to skb data, 1897 * Certainly, it is possible to add an offset to skb data,
1900 * but taking into account that pulling is expected to 1898 * but taking into account that pulling is expected to
1901 * be very rare operation, it is worth to fight against 1899 * be very rare operation, it is worth to fight against
1902 * further bloating skb head and crucify ourselves here instead. 1900 * further bloating skb head and crucify ourselves here instead.
@@ -2848,12 +2846,15 @@ EXPORT_SYMBOL(skb_queue_purge);
2848 */ 2846 */
2849void skb_rbtree_purge(struct rb_root *root) 2847void skb_rbtree_purge(struct rb_root *root)
2850{ 2848{
2851 struct sk_buff *skb, *next; 2849 struct rb_node *p = rb_first(root);
2852 2850
2853 rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode) 2851 while (p) {
2854 kfree_skb(skb); 2852 struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
2855 2853
2856 *root = RB_ROOT; 2854 p = rb_next(p);
2855 rb_erase(&skb->rbnode, root);
2856 kfree_skb(skb);
2857 }
2857} 2858}
2858 2859
2859/** 2860/**
@@ -4762,6 +4763,7 @@ EXPORT_SYMBOL(kfree_skb_partial);
4762bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, 4763bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
4763 bool *fragstolen, int *delta_truesize) 4764 bool *fragstolen, int *delta_truesize)
4764{ 4765{
4766 struct skb_shared_info *to_shinfo, *from_shinfo;
4765 int i, delta, len = from->len; 4767 int i, delta, len = from->len;
4766 4768
4767 *fragstolen = false; 4769 *fragstolen = false;
@@ -4776,7 +4778,9 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
4776 return true; 4778 return true;
4777 } 4779 }
4778 4780
4779 if (skb_has_frag_list(to) || skb_has_frag_list(from)) 4781 to_shinfo = skb_shinfo(to);
4782 from_shinfo = skb_shinfo(from);
4783 if (to_shinfo->frag_list || from_shinfo->frag_list)
4780 return false; 4784 return false;
4781 if (skb_zcopy(to) || skb_zcopy(from)) 4785 if (skb_zcopy(to) || skb_zcopy(from))
4782 return false; 4786 return false;
@@ -4785,8 +4789,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
4785 struct page *page; 4789 struct page *page;
4786 unsigned int offset; 4790 unsigned int offset;
4787 4791
4788 if (skb_shinfo(to)->nr_frags + 4792 if (to_shinfo->nr_frags +
4789 skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) 4793 from_shinfo->nr_frags >= MAX_SKB_FRAGS)
4790 return false; 4794 return false;
4791 4795
4792 if (skb_head_is_locked(from)) 4796 if (skb_head_is_locked(from))
@@ -4797,12 +4801,12 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
4797 page = virt_to_head_page(from->head); 4801 page = virt_to_head_page(from->head);
4798 offset = from->data - (unsigned char *)page_address(page); 4802 offset = from->data - (unsigned char *)page_address(page);
4799 4803
4800 skb_fill_page_desc(to, skb_shinfo(to)->nr_frags, 4804 skb_fill_page_desc(to, to_shinfo->nr_frags,
4801 page, offset, skb_headlen(from)); 4805 page, offset, skb_headlen(from));
4802 *fragstolen = true; 4806 *fragstolen = true;
4803 } else { 4807 } else {
4804 if (skb_shinfo(to)->nr_frags + 4808 if (to_shinfo->nr_frags +
4805 skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS) 4809 from_shinfo->nr_frags > MAX_SKB_FRAGS)
4806 return false; 4810 return false;
4807 4811
4808 delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from)); 4812 delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from));
@@ -4810,19 +4814,19 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
4810 4814
4811 WARN_ON_ONCE(delta < len); 4815 WARN_ON_ONCE(delta < len);
4812 4816
4813 memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags, 4817 memcpy(to_shinfo->frags + to_shinfo->nr_frags,
4814 skb_shinfo(from)->frags, 4818 from_shinfo->frags,
4815 skb_shinfo(from)->nr_frags * sizeof(skb_frag_t)); 4819 from_shinfo->nr_frags * sizeof(skb_frag_t));
4816 skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags; 4820 to_shinfo->nr_frags += from_shinfo->nr_frags;
4817 4821
4818 if (!skb_cloned(from)) 4822 if (!skb_cloned(from))
4819 skb_shinfo(from)->nr_frags = 0; 4823 from_shinfo->nr_frags = 0;
4820 4824
4821 /* if the skb is not cloned this does nothing 4825 /* if the skb is not cloned this does nothing
4822 * since we set nr_frags to 0. 4826 * since we set nr_frags to 0.
4823 */ 4827 */
4824 for (i = 0; i < skb_shinfo(from)->nr_frags; i++) 4828 for (i = 0; i < from_shinfo->nr_frags; i++)
4825 skb_frag_ref(from, i); 4829 __skb_frag_ref(&from_shinfo->frags[i]);
4826 4830
4827 to->truesize += delta; 4831 to->truesize += delta;
4828 to->len += len; 4832 to->len += len;
@@ -4860,6 +4864,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
4860 if (!xnet) 4864 if (!xnet)
4861 return; 4865 return;
4862 4866
4867 ipvs_reset(skb);
4863 skb_orphan(skb); 4868 skb_orphan(skb);
4864 skb->mark = 0; 4869 skb->mark = 0;
4865} 4870}
diff --git a/net/core/sock.c b/net/core/sock.c
index 9b7b6bbb2a23..c0b5b2f17412 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1469,8 +1469,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1469 sk = kmalloc(prot->obj_size, priority); 1469 sk = kmalloc(prot->obj_size, priority);
1470 1470
1471 if (sk != NULL) { 1471 if (sk != NULL) {
1472 kmemcheck_annotate_bitfield(sk, flags);
1473
1474 if (security_sk_alloc(sk, family, priority)) 1472 if (security_sk_alloc(sk, family, priority))
1475 goto out_free; 1473 goto out_free;
1476 1474
@@ -1654,6 +1652,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1654 1652
1655 sock_copy(newsk, sk); 1653 sock_copy(newsk, sk);
1656 1654
1655 newsk->sk_prot_creator = sk->sk_prot;
1656
1657 /* SANITY */ 1657 /* SANITY */
1658 if (likely(newsk->sk_net_refcnt)) 1658 if (likely(newsk->sk_net_refcnt))
1659 get_net(sock_net(newsk)); 1659 get_net(sock_net(newsk));
@@ -1675,20 +1675,28 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1675 newsk->sk_dst_pending_confirm = 0; 1675 newsk->sk_dst_pending_confirm = 0;
1676 newsk->sk_wmem_queued = 0; 1676 newsk->sk_wmem_queued = 0;
1677 newsk->sk_forward_alloc = 0; 1677 newsk->sk_forward_alloc = 0;
1678
1679 /* sk->sk_memcg will be populated at accept() time */
1680 newsk->sk_memcg = NULL;
1681
1678 atomic_set(&newsk->sk_drops, 0); 1682 atomic_set(&newsk->sk_drops, 0);
1679 newsk->sk_send_head = NULL; 1683 newsk->sk_send_head = NULL;
1680 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; 1684 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1681 atomic_set(&newsk->sk_zckey, 0); 1685 atomic_set(&newsk->sk_zckey, 0);
1682 1686
1683 sock_reset_flag(newsk, SOCK_DONE); 1687 sock_reset_flag(newsk, SOCK_DONE);
1688 cgroup_sk_alloc(&newsk->sk_cgrp_data);
1684 1689
1685 filter = rcu_dereference_protected(newsk->sk_filter, 1); 1690 rcu_read_lock();
1691 filter = rcu_dereference(sk->sk_filter);
1686 if (filter != NULL) 1692 if (filter != NULL)
1687 /* though it's an empty new sock, the charging may fail 1693 /* though it's an empty new sock, the charging may fail
1688 * if sysctl_optmem_max was changed between creation of 1694 * if sysctl_optmem_max was changed between creation of
1689 * original socket and cloning 1695 * original socket and cloning
1690 */ 1696 */
1691 is_charged = sk_filter_charge(newsk, filter); 1697 is_charged = sk_filter_charge(newsk, filter);
1698 RCU_INIT_POINTER(newsk->sk_filter, filter);
1699 rcu_read_unlock();
1692 1700
1693 if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { 1701 if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
1694 /* We need to make sure that we don't uncharge the new 1702 /* We need to make sure that we don't uncharge the new
@@ -1709,9 +1717,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1709 newsk->sk_incoming_cpu = raw_smp_processor_id(); 1717 newsk->sk_incoming_cpu = raw_smp_processor_id();
1710 atomic64_set(&newsk->sk_cookie, 0); 1718 atomic64_set(&newsk->sk_cookie, 0);
1711 1719
1712 mem_cgroup_sk_alloc(newsk);
1713 cgroup_sk_alloc(&newsk->sk_cgrp_data);
1714
1715 /* 1720 /*
1716 * Before updating sk_refcnt, we must commit prior changes to memory 1721 * Before updating sk_refcnt, we must commit prior changes to memory
1717 * (Documentation/RCU/rculist_nulls.txt for details) 1722 * (Documentation/RCU/rculist_nulls.txt for details)
@@ -2339,16 +2344,18 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
2339 2344
2340 /* guarantee minimum buffer size under pressure */ 2345 /* guarantee minimum buffer size under pressure */
2341 if (kind == SK_MEM_RECV) { 2346 if (kind == SK_MEM_RECV) {
2342 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) 2347 if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
2343 return 1; 2348 return 1;
2344 2349
2345 } else { /* SK_MEM_SEND */ 2350 } else { /* SK_MEM_SEND */
2351 int wmem0 = sk_get_wmem0(sk, prot);
2352
2346 if (sk->sk_type == SOCK_STREAM) { 2353 if (sk->sk_type == SOCK_STREAM) {
2347 if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) 2354 if (sk->sk_wmem_queued < wmem0)
2348 return 1; 2355 return 1;
2349 } else if (refcount_read(&sk->sk_wmem_alloc) < 2356 } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
2350 prot->sysctl_wmem[0])
2351 return 1; 2357 return 1;
2358 }
2352 } 2359 }
2353 2360
2354 if (sk_has_memory_pressure(sk)) { 2361 if (sk_has_memory_pressure(sk)) {
@@ -2678,7 +2685,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
2678 sk_init_common(sk); 2685 sk_init_common(sk);
2679 sk->sk_send_head = NULL; 2686 sk->sk_send_head = NULL;
2680 2687
2681 init_timer(&sk->sk_timer); 2688 timer_setup(&sk->sk_timer, NULL, 0);
2682 2689
2683 sk->sk_allocation = GFP_KERNEL; 2690 sk->sk_allocation = GFP_KERNEL;
2684 sk->sk_rcvbuf = sysctl_rmem_default; 2691 sk->sk_rcvbuf = sysctl_rmem_default;
@@ -2737,6 +2744,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
2737 2744
2738 sk->sk_max_pacing_rate = ~0U; 2745 sk->sk_max_pacing_rate = ~0U;
2739 sk->sk_pacing_rate = ~0U; 2746 sk->sk_pacing_rate = ~0U;
2747 sk->sk_pacing_shift = 10;
2740 sk->sk_incoming_cpu = -1; 2748 sk->sk_incoming_cpu = -1;
2741 /* 2749 /*
2742 * Before updating sk_refcnt, we must commit prior changes to memory 2750 * Before updating sk_refcnt, we must commit prior changes to memory
@@ -3035,7 +3043,6 @@ struct prot_inuse {
3035 3043
3036static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); 3044static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
3037 3045
3038#ifdef CONFIG_NET_NS
3039void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) 3046void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
3040{ 3047{
3041 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val); 3048 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
@@ -3079,27 +3086,6 @@ static __init int net_inuse_init(void)
3079} 3086}
3080 3087
3081core_initcall(net_inuse_init); 3088core_initcall(net_inuse_init);
3082#else
3083static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
3084
3085void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
3086{
3087 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
3088}
3089EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
3090
3091int sock_prot_inuse_get(struct net *net, struct proto *prot)
3092{
3093 int cpu, idx = prot->inuse_idx;
3094 int res = 0;
3095
3096 for_each_possible_cpu(cpu)
3097 res += per_cpu(prot_inuse, cpu).val[idx];
3098
3099 return res >= 0 ? res : 0;
3100}
3101EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
3102#endif
3103 3089
3104static void assign_proto_idx(struct proto *prot) 3090static void assign_proto_idx(struct proto *prot)
3105{ 3091{
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index eed1ebf7f29d..5eeb1d20cc38 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * To speed up listener socket lookup, create an array to store all sockets 3 * To speed up listener socket lookup, create an array to store all sockets
3 * listening on the same port. This allows a decision to be made after finding 4 * listening on the same port. This allows a decision to be made after finding
@@ -36,9 +37,14 @@ int reuseport_alloc(struct sock *sk)
36 * soft irq of receive path or setsockopt from process context 37 * soft irq of receive path or setsockopt from process context
37 */ 38 */
38 spin_lock_bh(&reuseport_lock); 39 spin_lock_bh(&reuseport_lock);
39 WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, 40
40 lockdep_is_held(&reuseport_lock)), 41 /* Allocation attempts can occur concurrently via the setsockopt path
41 "multiple allocations for the same socket"); 42 * and the bind/hash path. Nothing to do when we lose the race.
43 */
44 if (rcu_dereference_protected(sk->sk_reuseport_cb,
45 lockdep_is_held(&reuseport_lock)))
46 goto out;
47
42 reuse = __reuseport_alloc(INIT_SOCKS); 48 reuse = __reuseport_alloc(INIT_SOCKS);
43 if (!reuse) { 49 if (!reuse) {
44 spin_unlock_bh(&reuseport_lock); 50 spin_unlock_bh(&reuseport_lock);
@@ -49,6 +55,7 @@ int reuseport_alloc(struct sock *sk)
49 reuse->num_socks = 1; 55 reuse->num_socks = 1;
50 rcu_assign_pointer(sk->sk_reuseport_cb, reuse); 56 rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
51 57
58out:
52 spin_unlock_bh(&reuseport_lock); 59 spin_unlock_bh(&reuseport_lock);
53 60
54 return 0; 61 return 0;
diff --git a/net/core/stream.c b/net/core/stream.c
index 20231dbb1da0..1cff9c6270c6 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * SUCS NET3: 3 * SUCS NET3:
3 * 4 *
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b7cd9aafe99e..cbc3dde4cfcc 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* -*- linux-c -*- 2/* -*- linux-c -*-
2 * sysctl_net_core.c: sysctl interface to net core subsystem. 3 * sysctl_net_core.c: sysctl interface to net core subsystem.
3 * 4 *
diff --git a/net/core/tso.c b/net/core/tso.c
index 5dca7ce8ee9f..43f4eba61933 100644
--- a/net/core/tso.c
+++ b/net/core/tso.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/export.h> 2#include <linux/export.h>
2#include <linux/if_vlan.h> 3#include <linux/if_vlan.h>
3#include <net/ip.h> 4#include <net/ip.h>
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 5c8362b037ed..2e7b56097bc4 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o 2obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
2 3
3dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \ 4dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index e1295d5f2c56..1c75cd1255f6 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -126,10 +126,10 @@ static void ccid2_change_l_seq_window(struct sock *sk, u64 val)
126 DCCPF_SEQ_WMAX)); 126 DCCPF_SEQ_WMAX));
127} 127}
128 128
129static void ccid2_hc_tx_rto_expire(unsigned long data) 129static void ccid2_hc_tx_rto_expire(struct timer_list *t)
130{ 130{
131 struct sock *sk = (struct sock *)data; 131 struct ccid2_hc_tx_sock *hc = from_timer(hc, t, tx_rtotimer);
132 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 132 struct sock *sk = hc->sk;
133 const bool sender_was_blocked = ccid2_cwnd_network_limited(hc); 133 const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
134 134
135 bh_lock_sock(sk); 135 bh_lock_sock(sk);
@@ -733,8 +733,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
733 hc->tx_rpdupack = -1; 733 hc->tx_rpdupack = -1;
734 hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_jiffies32; 734 hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_jiffies32;
735 hc->tx_cwnd_used = 0; 735 hc->tx_cwnd_used = 0;
736 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 736 hc->sk = sk;
737 (unsigned long)sk); 737 timer_setup(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 0);
738 INIT_LIST_HEAD(&hc->tx_av_chunks); 738 INIT_LIST_HEAD(&hc->tx_av_chunks);
739 return 0; 739 return 0;
740} 740}
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 6e50ef2898fb..1af0116dc6ce 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -85,6 +85,7 @@ struct ccid2_hc_tx_sock {
85 tx_rto; 85 tx_rto;
86 u64 tx_rtt_seq:48; 86 u64 tx_rtt_seq:48;
87 struct timer_list tx_rtotimer; 87 struct timer_list tx_rtotimer;
88 struct sock *sk;
88 89
89 /* Congestion Window validation (optional, RFC 2861) */ 90 /* Congestion Window validation (optional, RFC 2861) */
90 u32 tx_cwnd_used, 91 u32 tx_cwnd_used,
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 119c04317d48..8b5ba6dffac7 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -195,10 +195,10 @@ static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hc,
195 } 195 }
196} 196}
197 197
198static void ccid3_hc_tx_no_feedback_timer(unsigned long data) 198static void ccid3_hc_tx_no_feedback_timer(struct timer_list *t)
199{ 199{
200 struct sock *sk = (struct sock *)data; 200 struct ccid3_hc_tx_sock *hc = from_timer(hc, t, tx_no_feedback_timer);
201 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 201 struct sock *sk = hc->sk;
202 unsigned long t_nfb = USEC_PER_SEC / 5; 202 unsigned long t_nfb = USEC_PER_SEC / 5;
203 203
204 bh_lock_sock(sk); 204 bh_lock_sock(sk);
@@ -505,8 +505,9 @@ static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
505 505
506 hc->tx_state = TFRC_SSTATE_NO_SENT; 506 hc->tx_state = TFRC_SSTATE_NO_SENT;
507 hc->tx_hist = NULL; 507 hc->tx_hist = NULL;
508 setup_timer(&hc->tx_no_feedback_timer, 508 hc->sk = sk;
509 ccid3_hc_tx_no_feedback_timer, (unsigned long)sk); 509 timer_setup(&hc->tx_no_feedback_timer,
510 ccid3_hc_tx_no_feedback_timer, 0);
510 return 0; 511 return 0;
511} 512}
512 513
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 1a9933c29672..813d91c6e1e2 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -106,6 +106,7 @@ struct ccid3_hc_tx_sock {
106 u8 tx_last_win_count; 106 u8 tx_last_win_count;
107 ktime_t tx_t_last_win_count; 107 ktime_t tx_t_last_win_count;
108 struct timer_list tx_no_feedback_timer; 108 struct timer_list tx_no_feedback_timer;
109 struct sock *sk;
109 ktime_t tx_t_ld; 110 ktime_t tx_t_ld;
110 ktime_t tx_t_nom; 111 ktime_t tx_t_nom;
111 struct tfrc_tx_hist_entry *tx_hist; 112 struct tfrc_tx_hist_entry *tx_hist;
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 08df7a3acb3d..876e18592d71 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -149,10 +149,8 @@ static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
149{ 149{
150 const u8 idx_a = tfrc_rx_hist_index(h, a), 150 const u8 idx_a = tfrc_rx_hist_index(h, a),
151 idx_b = tfrc_rx_hist_index(h, b); 151 idx_b = tfrc_rx_hist_index(h, b);
152 struct tfrc_rx_hist_entry *tmp = h->ring[idx_a];
153 152
154 h->ring[idx_a] = h->ring[idx_b]; 153 swap(h->ring[idx_a], h->ring[idx_b]);
155 h->ring[idx_b] = tmp;
156} 154}
157 155
158/* 156/*
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
index 62b5828acde0..d7f265e1f50c 100644
--- a/net/dccp/ccids/lib/tfrc.c
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * TFRC library initialisation 3 * TFRC library initialisation
3 * 4 *
diff --git a/net/dccp/input.c b/net/dccp/input.c
index fa6be9750bb4..d28d46bff6ab 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -534,6 +534,7 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
534 case DCCP_PKT_DATA: 534 case DCCP_PKT_DATA:
535 if (sk->sk_state == DCCP_RESPOND) 535 if (sk->sk_state == DCCP_RESPOND)
536 break; 536 break;
537 /* fall through */
537 case DCCP_PKT_DATAACK: 538 case DCCP_PKT_DATAACK:
538 case DCCP_PKT_ACK: 539 case DCCP_PKT_ACK:
539 /* 540 /*
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 001c08696334..e65fcb45c3f6 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -414,8 +414,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
414 sk_daddr_set(newsk, ireq->ir_rmt_addr); 414 sk_daddr_set(newsk, ireq->ir_rmt_addr);
415 sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); 415 sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
416 newinet->inet_saddr = ireq->ir_loc_addr; 416 newinet->inet_saddr = ireq->ir_loc_addr;
417 newinet->inet_opt = ireq->opt; 417 RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
418 ireq->opt = NULL;
419 newinet->mc_index = inet_iif(skb); 418 newinet->mc_index = inet_iif(skb);
420 newinet->mc_ttl = ip_hdr(skb)->ttl; 419 newinet->mc_ttl = ip_hdr(skb)->ttl;
421 newinet->inet_id = jiffies; 420 newinet->inet_id = jiffies;
@@ -430,7 +429,10 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
430 if (__inet_inherit_port(sk, newsk) < 0) 429 if (__inet_inherit_port(sk, newsk) < 0)
431 goto put_and_exit; 430 goto put_and_exit;
432 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); 431 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
433 432 if (*own_req)
433 ireq->ireq_opt = NULL;
434 else
435 newinet->inet_opt = NULL;
434 return newsk; 436 return newsk;
435 437
436exit_overflow: 438exit_overflow:
@@ -441,6 +443,7 @@ exit:
441 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); 443 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
442 return NULL; 444 return NULL;
443put_and_exit: 445put_and_exit:
446 newinet->inet_opt = NULL;
444 inet_csk_prepare_forced_close(newsk); 447 inet_csk_prepare_forced_close(newsk);
445 dccp_done(newsk); 448 dccp_done(newsk);
446 goto exit; 449 goto exit;
@@ -492,7 +495,7 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req
492 ireq->ir_rmt_addr); 495 ireq->ir_rmt_addr);
493 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, 496 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
494 ireq->ir_rmt_addr, 497 ireq->ir_rmt_addr,
495 ireq->opt); 498 ireq_opt_deref(ireq));
496 err = net_xmit_eval(err); 499 err = net_xmit_eval(err);
497 } 500 }
498 501
@@ -548,7 +551,7 @@ out:
548static void dccp_v4_reqsk_destructor(struct request_sock *req) 551static void dccp_v4_reqsk_destructor(struct request_sock *req)
549{ 552{
550 dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); 553 dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
551 kfree(inet_rsk(req)->opt); 554 kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
552} 555}
553 556
554void dccp_syn_ack_timeout(const struct request_sock *req) 557void dccp_syn_ack_timeout(const struct request_sock *req)
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 51cdfc3bd8ca..4e40db017e19 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -227,8 +227,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
227 * Ack vectors are processed by the TX CCID if it is 227 * Ack vectors are processed by the TX CCID if it is
228 * interested. The RX CCID need not parse Ack Vectors, 228 * interested. The RX CCID need not parse Ack Vectors,
229 * since it is only interested in clearing old state. 229 * since it is only interested in clearing old state.
230 * Fall through.
231 */ 230 */
231 /* fall through */
232 case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC: 232 case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
233 if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, 233 if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
234 pkt_type, opt, value, len)) 234 pkt_type, opt, value, len))
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 3a2c34027758..b50a8732ff43 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -125,10 +125,11 @@ static void dccp_retransmit_timer(struct sock *sk)
125 __sk_dst_reset(sk); 125 __sk_dst_reset(sk);
126} 126}
127 127
128static void dccp_write_timer(unsigned long data) 128static void dccp_write_timer(struct timer_list *t)
129{ 129{
130 struct sock *sk = (struct sock *)data; 130 struct inet_connection_sock *icsk =
131 struct inet_connection_sock *icsk = inet_csk(sk); 131 from_timer(icsk, t, icsk_retransmit_timer);
132 struct sock *sk = &icsk->icsk_inet.sk;
132 int event = 0; 133 int event = 0;
133 134
134 bh_lock_sock(sk); 135 bh_lock_sock(sk);
@@ -161,19 +162,20 @@ out:
161 sock_put(sk); 162 sock_put(sk);
162} 163}
163 164
164static void dccp_keepalive_timer(unsigned long data) 165static void dccp_keepalive_timer(struct timer_list *t)
165{ 166{
166 struct sock *sk = (struct sock *)data; 167 struct sock *sk = from_timer(sk, t, sk_timer);
167 168
168 pr_err("dccp should not use a keepalive timer !\n"); 169 pr_err("dccp should not use a keepalive timer !\n");
169 sock_put(sk); 170 sock_put(sk);
170} 171}
171 172
172/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */ 173/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
173static void dccp_delack_timer(unsigned long data) 174static void dccp_delack_timer(struct timer_list *t)
174{ 175{
175 struct sock *sk = (struct sock *)data; 176 struct inet_connection_sock *icsk =
176 struct inet_connection_sock *icsk = inet_csk(sk); 177 from_timer(icsk, t, icsk_delack_timer);
178 struct sock *sk = &icsk->icsk_inet.sk;
177 179
178 bh_lock_sock(sk); 180 bh_lock_sock(sk);
179 if (sock_owned_by_user(sk)) { 181 if (sock_owned_by_user(sk)) {
@@ -232,10 +234,13 @@ static void dccp_write_xmitlet(unsigned long data)
232 bh_unlock_sock(sk); 234 bh_unlock_sock(sk);
233} 235}
234 236
235static void dccp_write_xmit_timer(unsigned long data) 237static void dccp_write_xmit_timer(struct timer_list *t)
236{ 238{
237 dccp_write_xmitlet(data); 239 struct dccp_sock *dp = from_timer(dp, t, dccps_xmit_timer);
238 sock_put((struct sock *)data); 240 struct sock *sk = &dp->dccps_inet_connection.icsk_inet.sk;
241
242 dccp_write_xmitlet((unsigned long)sk);
243 sock_put(sk);
239} 244}
240 245
241void dccp_init_xmit_timers(struct sock *sk) 246void dccp_init_xmit_timers(struct sock *sk)
@@ -243,8 +248,7 @@ void dccp_init_xmit_timers(struct sock *sk)
243 struct dccp_sock *dp = dccp_sk(sk); 248 struct dccp_sock *dp = dccp_sk(sk);
244 249
245 tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk); 250 tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk);
246 setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer, 251 timer_setup(&dp->dccps_xmit_timer, dccp_write_xmit_timer, 0);
247 (unsigned long)sk);
248 inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, 252 inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
249 &dccp_keepalive_timer); 253 &dccp_keepalive_timer);
250} 254}
diff --git a/net/decnet/Makefile b/net/decnet/Makefile
index e44003af71f6..9e38122d942b 100644
--- a/net/decnet/Makefile
+++ b/net/decnet/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1 2
2obj-$(CONFIG_DECNET) += decnet.o 3obj-$(CONFIG_DECNET) += decnet.o
3 4
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 73a0399dc7a2..518cea17b811 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -533,10 +533,6 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf
533 scp->keepalive = 10 * HZ; 533 scp->keepalive = 10 * HZ;
534 scp->keepalive_fxn = dn_keepalive; 534 scp->keepalive_fxn = dn_keepalive;
535 535
536 init_timer(&scp->delack_timer);
537 scp->delack_pending = 0;
538 scp->delack_fxn = dn_nsp_delayed_ack;
539
540 dn_start_slow_timer(sk); 536 dn_start_slow_timer(sk);
541out: 537out:
542 return sk; 538 return sk;
@@ -634,10 +630,12 @@ static void dn_destroy_sock(struct sock *sk)
634 goto disc_reject; 630 goto disc_reject;
635 case DN_RUN: 631 case DN_RUN:
636 scp->state = DN_DI; 632 scp->state = DN_DI;
633 /* fall through */
637 case DN_DI: 634 case DN_DI:
638 case DN_DR: 635 case DN_DR:
639disc_reject: 636disc_reject:
640 dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation); 637 dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation);
638 /* fall through */
641 case DN_NC: 639 case DN_NC:
642 case DN_NR: 640 case DN_NR:
643 case DN_RJ: 641 case DN_RJ:
@@ -651,6 +649,7 @@ disc_reject:
651 break; 649 break;
652 default: 650 default:
653 printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n"); 651 printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n");
652 /* fall through */
654 case DN_O: 653 case DN_O:
655 dn_stop_slow_timer(sk); 654 dn_stop_slow_timer(sk);
656 655
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 4d339de56862..9153247dad28 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * DECnet An implementation of the DECnet protocol suite for the LINUX 3 * DECnet An implementation of the DECnet protocol suite for the LINUX
3 * operating system. DECnet is implemented using the BSD Socket 4 * operating system. DECnet is implemented using the BSD Socket
@@ -1038,14 +1039,14 @@ static void dn_eth_down(struct net_device *dev)
1038 1039
1039static void dn_dev_set_timer(struct net_device *dev); 1040static void dn_dev_set_timer(struct net_device *dev);
1040 1041
1041static void dn_dev_timer_func(unsigned long arg) 1042static void dn_dev_timer_func(struct timer_list *t)
1042{ 1043{
1043 struct net_device *dev = (struct net_device *)arg; 1044 struct dn_dev *dn_db = from_timer(dn_db, t, timer);
1044 struct dn_dev *dn_db; 1045 struct net_device *dev;
1045 struct dn_ifaddr *ifa; 1046 struct dn_ifaddr *ifa;
1046 1047
1047 rcu_read_lock(); 1048 rcu_read_lock();
1048 dn_db = rcu_dereference(dev->dn_ptr); 1049 dev = dn_db->dev;
1049 if (dn_db->t3 <= dn_db->parms.t2) { 1050 if (dn_db->t3 <= dn_db->parms.t2) {
1050 if (dn_db->parms.timer3) { 1051 if (dn_db->parms.timer3) {
1051 for (ifa = rcu_dereference(dn_db->ifa_list); 1052 for (ifa = rcu_dereference(dn_db->ifa_list);
@@ -1070,8 +1071,6 @@ static void dn_dev_set_timer(struct net_device *dev)
1070 if (dn_db->parms.t2 > dn_db->parms.t3) 1071 if (dn_db->parms.t2 > dn_db->parms.t3)
1071 dn_db->parms.t2 = dn_db->parms.t3; 1072 dn_db->parms.t2 = dn_db->parms.t3;
1072 1073
1073 dn_db->timer.data = (unsigned long)dev;
1074 dn_db->timer.function = dn_dev_timer_func;
1075 dn_db->timer.expires = jiffies + (dn_db->parms.t2 * HZ); 1074 dn_db->timer.expires = jiffies + (dn_db->parms.t2 * HZ);
1076 1075
1077 add_timer(&dn_db->timer); 1076 add_timer(&dn_db->timer);
@@ -1100,7 +1099,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
1100 1099
1101 rcu_assign_pointer(dev->dn_ptr, dn_db); 1100 rcu_assign_pointer(dev->dn_ptr, dn_db);
1102 dn_db->dev = dev; 1101 dn_db->dev = dev;
1103 init_timer(&dn_db->timer); 1102 timer_setup(&dn_db->timer, dn_dev_timer_func, 0);
1104 1103
1105 dn_db->uptime = jiffies; 1104 dn_db->uptime = jiffies;
1106 1105
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 3d37464c8b4a..b37a1b833c77 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * DECnet An implementation of the DECnet protocol suite for the LINUX 3 * DECnet An implementation of the DECnet protocol suite for the LINUX
3 * operating system. DECnet is implemented using the BSD Socket 4 * operating system. DECnet is implemented using the BSD Socket
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 22bf0b95d6ed..528119a5618e 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * DECnet An implementation of the DECnet protocol suite for the LINUX 3 * DECnet An implementation of the DECnet protocol suite for the LINUX
3 * operating system. DECnet is implemented using the BSD Socket 4 * operating system. DECnet is implemented using the BSD Socket
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 7ac086d5c0c0..1b2120645730 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -776,12 +776,8 @@ static int dn_nsp_rx_packet(struct net *net, struct sock *sk2,
776 * Swap src & dst and look up in the normal way. 776 * Swap src & dst and look up in the normal way.
777 */ 777 */
778 if (unlikely(cb->rt_flags & DN_RT_F_RTS)) { 778 if (unlikely(cb->rt_flags & DN_RT_F_RTS)) {
779 __le16 tmp = cb->dst_port; 779 swap(cb->dst_port, cb->src_port);
780 cb->dst_port = cb->src_port; 780 swap(cb->dst, cb->src);
781 cb->src_port = tmp;
782 tmp = cb->dst;
783 cb->dst = cb->src;
784 cb->src = tmp;
785 } 781 }
786 782
787 /* 783 /*
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 66f035e476ea..56a52a004c56 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -313,11 +313,8 @@ static __le16 *dn_mk_ack_header(struct sock *sk, struct sk_buff *skb, unsigned c
313 ackcrs |= 0x8000; 313 ackcrs |= 0x8000;
314 314
315 /* If this is an "other data/ack" message, swap acknum and ackcrs */ 315 /* If this is an "other data/ack" message, swap acknum and ackcrs */
316 if (other) { 316 if (other)
317 unsigned short tmp = acknum; 317 swap(acknum, ackcrs);
318 acknum = ackcrs;
319 ackcrs = tmp;
320 }
321 318
322 /* Set "cross subchannel" bit in ackcrs */ 319 /* Set "cross subchannel" bit in ackcrs */
323 ackcrs |= 0x2000; 320 ackcrs |= 0x2000;
@@ -491,17 +488,6 @@ void dn_send_conn_ack (struct sock *sk)
491 dn_nsp_send(skb); 488 dn_nsp_send(skb);
492} 489}
493 490
494void dn_nsp_delayed_ack(struct sock *sk)
495{
496 struct dn_scp *scp = DN_SK(sk);
497
498 if (scp->ackxmt_oth != scp->numoth_rcv)
499 dn_nsp_send_oth_ack(sk);
500
501 if (scp->ackxmt_dat != scp->numdat_rcv)
502 dn_nsp_send_data_ack(sk);
503}
504
505static int dn_nsp_retrans_conn_conf(struct sock *sk) 491static int dn_nsp_retrans_conn_conf(struct sock *sk)
506{ 492{
507 struct dn_scp *scp = DN_SK(sk); 493 struct dn_scp *scp = DN_SK(sk);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 0bd3afd01dd2..324cb9f2f551 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -125,13 +125,13 @@ static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
125 struct sk_buff *skb, 125 struct sk_buff *skb,
126 const void *daddr); 126 const void *daddr);
127static int dn_route_input(struct sk_buff *); 127static int dn_route_input(struct sk_buff *);
128static void dn_run_flush(unsigned long dummy); 128static void dn_run_flush(struct timer_list *unused);
129 129
130static struct dn_rt_hash_bucket *dn_rt_hash_table; 130static struct dn_rt_hash_bucket *dn_rt_hash_table;
131static unsigned int dn_rt_hash_mask; 131static unsigned int dn_rt_hash_mask;
132 132
133static struct timer_list dn_route_timer; 133static struct timer_list dn_route_timer;
134static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush, 0, 0); 134static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush);
135int decnet_dst_gc_interval = 2; 135int decnet_dst_gc_interval = 2;
136 136
137static struct dst_ops dn_dst_ops = { 137static struct dst_ops dn_dst_ops = {
@@ -183,7 +183,7 @@ static __inline__ unsigned int dn_hash(__le16 src, __le16 dst)
183 return dn_rt_hash_mask & (unsigned int)tmp; 183 return dn_rt_hash_mask & (unsigned int)tmp;
184} 184}
185 185
186static void dn_dst_check_expire(unsigned long dummy) 186static void dn_dst_check_expire(struct timer_list *unused)
187{ 187{
188 int i; 188 int i;
189 struct dn_route *rt; 189 struct dn_route *rt;
@@ -338,7 +338,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
338 dn_rt_hash_table[hash].chain); 338 dn_rt_hash_table[hash].chain);
339 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth); 339 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
340 340
341 dst_use(&rth->dst, now); 341 dst_hold_and_use(&rth->dst, now);
342 spin_unlock_bh(&dn_rt_hash_table[hash].lock); 342 spin_unlock_bh(&dn_rt_hash_table[hash].lock);
343 343
344 dst_release_immediate(&rt->dst); 344 dst_release_immediate(&rt->dst);
@@ -351,13 +351,13 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
351 rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain); 351 rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain);
352 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt); 352 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
353 353
354 dst_use(&rt->dst, now); 354 dst_hold_and_use(&rt->dst, now);
355 spin_unlock_bh(&dn_rt_hash_table[hash].lock); 355 spin_unlock_bh(&dn_rt_hash_table[hash].lock);
356 *rp = rt; 356 *rp = rt;
357 return 0; 357 return 0;
358} 358}
359 359
360static void dn_run_flush(unsigned long dummy) 360static void dn_run_flush(struct timer_list *unused)
361{ 361{
362 int i; 362 int i;
363 struct dn_route *rt, *next; 363 struct dn_route *rt, *next;
@@ -1258,7 +1258,7 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *
1258 (flp->flowidn_mark == rt->fld.flowidn_mark) && 1258 (flp->flowidn_mark == rt->fld.flowidn_mark) &&
1259 dn_is_output_route(rt) && 1259 dn_is_output_route(rt) &&
1260 (rt->fld.flowidn_oif == flp->flowidn_oif)) { 1260 (rt->fld.flowidn_oif == flp->flowidn_oif)) {
1261 dst_use(&rt->dst, jiffies); 1261 dst_hold_and_use(&rt->dst, jiffies);
1262 rcu_read_unlock_bh(); 1262 rcu_read_unlock_bh();
1263 *pprt = &rt->dst; 1263 *pprt = &rt->dst;
1264 return 0; 1264 return 0;
@@ -1535,7 +1535,7 @@ static int dn_route_input(struct sk_buff *skb)
1535 (rt->fld.flowidn_oif == 0) && 1535 (rt->fld.flowidn_oif == 0) &&
1536 (rt->fld.flowidn_mark == skb->mark) && 1536 (rt->fld.flowidn_mark == skb->mark) &&
1537 (rt->fld.flowidn_iif == cb->iif)) { 1537 (rt->fld.flowidn_iif == cb->iif)) {
1538 dst_use(&rt->dst, jiffies); 1538 dst_hold_and_use(&rt->dst, jiffies);
1539 rcu_read_unlock(); 1539 rcu_read_unlock();
1540 skb_dst_set(skb, (struct dst_entry *)rt); 1540 skb_dst_set(skb, (struct dst_entry *)rt);
1541 return 0; 1541 return 0;
@@ -1875,7 +1875,7 @@ void __init dn_route_init(void)
1875 kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0, 1875 kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
1876 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 1876 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1877 dst_entries_init(&dn_dst_ops); 1877 dst_entries_init(&dn_dst_ops);
1878 setup_timer(&dn_route_timer, dn_dst_check_expire, 0); 1878 timer_setup(&dn_route_timer, dn_dst_check_expire, 0);
1879 dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; 1879 dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
1880 add_timer(&dn_route_timer); 1880 add_timer(&dn_route_timer);
1881 1881
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 295bbd6a56f2..c795c3f509c9 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1 2
2/* 3/*
3 * DECnet An implementation of the DECnet protocol suite for the LINUX 4 * DECnet An implementation of the DECnet protocol suite for the LINUX
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 232675480756..f0710b5d037d 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * DECnet An implementation of the DECnet protocol suite for the LINUX 3 * DECnet An implementation of the DECnet protocol suite for the LINUX
3 * operating system. DECnet is implemented using the BSD Socket 4 * operating system. DECnet is implemented using the BSD Socket
@@ -155,6 +156,7 @@ static void dn_rehash_zone(struct dn_zone *dz)
155 default: 156 default:
156 printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n", 157 printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n",
157 old_divisor); 158 old_divisor);
159 /* fall through */
158 case 256: 160 case 256:
159 new_divisor = 1024; 161 new_divisor = 1024;
160 new_hashmask = 0x3FF; 162 new_hashmask = 0x3FF;
diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c
index 1d330fd43dc7..aa4155875ca8 100644
--- a/net/decnet/dn_timer.c
+++ b/net/decnet/dn_timer.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * DECnet An implementation of the DECnet protocol suite for the LINUX 3 * DECnet An implementation of the DECnet protocol suite for the LINUX
3 * operating system. DECnet is implemented using the BSD Socket 4 * operating system. DECnet is implemented using the BSD Socket
@@ -33,11 +34,11 @@
33 34
34#define SLOW_INTERVAL (HZ/2) 35#define SLOW_INTERVAL (HZ/2)
35 36
36static void dn_slow_timer(unsigned long arg); 37static void dn_slow_timer(struct timer_list *t);
37 38
38void dn_start_slow_timer(struct sock *sk) 39void dn_start_slow_timer(struct sock *sk)
39{ 40{
40 setup_timer(&sk->sk_timer, dn_slow_timer, (unsigned long)sk); 41 timer_setup(&sk->sk_timer, dn_slow_timer, 0);
41 sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL); 42 sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL);
42} 43}
43 44
@@ -46,9 +47,9 @@ void dn_stop_slow_timer(struct sock *sk)
46 sk_stop_timer(sk, &sk->sk_timer); 47 sk_stop_timer(sk, &sk->sk_timer);
47} 48}
48 49
49static void dn_slow_timer(unsigned long arg) 50static void dn_slow_timer(struct timer_list *t)
50{ 51{
51 struct sock *sk = (struct sock *)arg; 52 struct sock *sk = from_timer(sk, t, sk_timer);
52 struct dn_scp *scp = DN_SK(sk); 53 struct dn_scp *scp = DN_SK(sk);
53 54
54 bh_lock_sock(sk); 55 bh_lock_sock(sk);
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 6c7da6c29bf0..55bf64a22b59 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * DECnet An implementation of the DECnet protocol suite for the LINUX 3 * DECnet An implementation of the DECnet protocol suite for the LINUX
3 * operating system. DECnet is implemented using the BSD Socket 4 * operating system. DECnet is implemented using the BSD Socket
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 8737412c7b27..e1d4d898a007 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -224,7 +224,7 @@ static int dns_resolver_match_preparse(struct key_match_data *match_data)
224static void dns_resolver_describe(const struct key *key, struct seq_file *m) 224static void dns_resolver_describe(const struct key *key, struct seq_file *m)
225{ 225{
226 seq_puts(m, key->description); 226 seq_puts(m, key->description);
227 if (key_is_instantiated(key)) { 227 if (key_is_positive(key)) {
228 int err = PTR_ERR(key->payload.data[dns_key_error]); 228 int err = PTR_ERR(key->payload.data[dns_key_error]);
229 229
230 if (err) 230 if (err)
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index cc5f8f971689..03c3bdf25468 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -7,6 +7,7 @@ config HAVE_NET_DSA
7config NET_DSA 7config NET_DSA
8 tristate "Distributed Switch Architecture" 8 tristate "Distributed Switch Architecture"
9 depends on HAVE_NET_DSA && MAY_USE_DEVLINK 9 depends on HAVE_NET_DSA && MAY_USE_DEVLINK
10 depends on BRIDGE || BRIDGE=n
10 select NET_SWITCHDEV 11 select NET_SWITCHDEV
11 select PHYLIB 12 select PHYLIB
12 ---help--- 13 ---help---
@@ -19,6 +20,9 @@ if NET_DSA
19config NET_DSA_TAG_BRCM 20config NET_DSA_TAG_BRCM
20 bool 21 bool
21 22
23config NET_DSA_TAG_BRCM_PREPEND
24 bool
25
22config NET_DSA_TAG_DSA 26config NET_DSA_TAG_DSA
23 bool 27 bool
24 28
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index fcce25da937c..0e13c1f95d13 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,9 +1,11 @@
1# SPDX-License-Identifier: GPL-2.0
1# the core 2# the core
2obj-$(CONFIG_NET_DSA) += dsa_core.o 3obj-$(CONFIG_NET_DSA) += dsa_core.o
3dsa_core-y += dsa.o dsa2.o legacy.o port.o slave.o switch.o 4dsa_core-y += dsa.o dsa2.o legacy.o master.o port.o slave.o switch.o
4 5
5# tagging formats 6# tagging formats
6dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o 7dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
8dsa_core-$(CONFIG_NET_DSA_TAG_BRCM_PREPEND) += tag_brcm.o
7dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o 9dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
8dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o 10dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
9dsa_core-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o 11dsa_core-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 03c58b0eb082..6a9d0f50fbee 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -14,6 +14,7 @@
14#include <linux/platform_device.h> 14#include <linux/platform_device.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/notifier.h>
17#include <linux/of.h> 18#include <linux/of.h>
18#include <linux/of_mdio.h> 19#include <linux/of_mdio.h>
19#include <linux/of_platform.h> 20#include <linux/of_platform.h>
@@ -43,6 +44,9 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
43#ifdef CONFIG_NET_DSA_TAG_BRCM 44#ifdef CONFIG_NET_DSA_TAG_BRCM
44 [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops, 45 [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops,
45#endif 46#endif
47#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND
48 [DSA_TAG_PROTO_BRCM_PREPEND] = &brcm_prepend_netdev_ops,
49#endif
46#ifdef CONFIG_NET_DSA_TAG_DSA 50#ifdef CONFIG_NET_DSA_TAG_DSA
47 [DSA_TAG_PROTO_DSA] = &dsa_netdev_ops, 51 [DSA_TAG_PROTO_DSA] = &dsa_netdev_ops,
48#endif 52#endif
@@ -67,37 +71,6 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
67 [DSA_TAG_PROTO_NONE] = &none_ops, 71 [DSA_TAG_PROTO_NONE] = &none_ops,
68}; 72};
69 73
70int dsa_cpu_dsa_setup(struct dsa_port *port)
71{
72 struct device_node *port_dn = port->dn;
73 struct dsa_switch *ds = port->ds;
74 struct phy_device *phydev;
75 int ret, mode;
76
77 if (of_phy_is_fixed_link(port_dn)) {
78 ret = of_phy_register_fixed_link(port_dn);
79 if (ret) {
80 dev_err(ds->dev, "failed to register fixed PHY\n");
81 return ret;
82 }
83 phydev = of_phy_find_device(port_dn);
84
85 mode = of_get_phy_mode(port_dn);
86 if (mode < 0)
87 mode = PHY_INTERFACE_MODE_NA;
88 phydev->interface = mode;
89
90 genphy_config_init(phydev);
91 genphy_read_status(phydev);
92 if (ds->ops->adjust_link)
93 ds->ops->adjust_link(ds, port->index, phydev);
94
95 put_device(&phydev->mdio.dev);
96 }
97
98 return 0;
99}
100
101const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol) 74const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
102{ 75{
103 const struct dsa_device_ops *ops; 76 const struct dsa_device_ops *ops;
@@ -112,42 +85,6 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
112 return ops; 85 return ops;
113} 86}
114 87
115int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp)
116{
117 struct dsa_switch *ds = cpu_dp->ds;
118 struct net_device *master;
119 struct ethtool_ops *cpu_ops;
120
121 master = cpu_dp->netdev;
122
123 cpu_ops = devm_kzalloc(ds->dev, sizeof(*cpu_ops), GFP_KERNEL);
124 if (!cpu_ops)
125 return -ENOMEM;
126
127 memcpy(&cpu_dp->ethtool_ops, master->ethtool_ops,
128 sizeof(struct ethtool_ops));
129 cpu_dp->orig_ethtool_ops = master->ethtool_ops;
130 memcpy(cpu_ops, &cpu_dp->ethtool_ops,
131 sizeof(struct ethtool_ops));
132 dsa_cpu_port_ethtool_init(cpu_ops);
133 master->ethtool_ops = cpu_ops;
134
135 return 0;
136}
137
138void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp)
139{
140 cpu_dp->netdev->ethtool_ops = cpu_dp->orig_ethtool_ops;
141}
142
143void dsa_cpu_dsa_destroy(struct dsa_port *port)
144{
145 struct device_node *port_dn = port->dn;
146
147 if (of_phy_is_fixed_link(port_dn))
148 of_phy_deregister_fixed_link(port_dn);
149}
150
151static int dev_is_class(struct device *dev, void *class) 88static int dev_is_class(struct device *dev, void *class)
152{ 89{
153 if (dev->class != NULL && !strcmp(dev->class->name, class)) 90 if (dev->class != NULL && !strcmp(dev->class->name, class))
@@ -188,12 +125,12 @@ EXPORT_SYMBOL_GPL(dsa_dev_to_net_device);
188static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, 125static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
189 struct packet_type *pt, struct net_device *unused) 126 struct packet_type *pt, struct net_device *unused)
190{ 127{
191 struct dsa_switch_tree *dst = dev->dsa_ptr; 128 struct dsa_port *cpu_dp = dev->dsa_ptr;
192 struct sk_buff *nskb = NULL; 129 struct sk_buff *nskb = NULL;
193 struct pcpu_sw_netstats *s; 130 struct pcpu_sw_netstats *s;
194 struct dsa_slave_priv *p; 131 struct dsa_slave_priv *p;
195 132
196 if (unlikely(dst == NULL)) { 133 if (unlikely(!cpu_dp)) {
197 kfree_skb(skb); 134 kfree_skb(skb);
198 return 0; 135 return 0;
199 } 136 }
@@ -202,7 +139,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
202 if (!skb) 139 if (!skb)
203 return 0; 140 return 0;
204 141
205 nskb = dst->rcv(skb, dev, pt); 142 nskb = cpu_dp->rcv(skb, dev, pt);
206 if (!nskb) { 143 if (!nskb) {
207 kfree_skb(skb); 144 kfree_skb(skb);
208 return 0; 145 return 0;
@@ -228,7 +165,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
228#ifdef CONFIG_PM_SLEEP 165#ifdef CONFIG_PM_SLEEP
229static bool dsa_is_port_initialized(struct dsa_switch *ds, int p) 166static bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
230{ 167{
231 return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev; 168 return dsa_is_user_port(ds, p) && ds->ports[p].slave;
232} 169}
233 170
234int dsa_switch_suspend(struct dsa_switch *ds) 171int dsa_switch_suspend(struct dsa_switch *ds)
@@ -240,7 +177,7 @@ int dsa_switch_suspend(struct dsa_switch *ds)
240 if (!dsa_is_port_initialized(ds, i)) 177 if (!dsa_is_port_initialized(ds, i))
241 continue; 178 continue;
242 179
243 ret = dsa_slave_suspend(ds->ports[i].netdev); 180 ret = dsa_slave_suspend(ds->ports[i].slave);
244 if (ret) 181 if (ret)
245 return ret; 182 return ret;
246 } 183 }
@@ -267,7 +204,7 @@ int dsa_switch_resume(struct dsa_switch *ds)
267 if (!dsa_is_port_initialized(ds, i)) 204 if (!dsa_is_port_initialized(ds, i))
268 continue; 205 continue;
269 206
270 ret = dsa_slave_resume(ds->ports[i].netdev); 207 ret = dsa_slave_resume(ds->ports[i].slave);
271 if (ret) 208 if (ret)
272 return ret; 209 return ret;
273 } 210 }
@@ -289,6 +226,28 @@ bool dsa_schedule_work(struct work_struct *work)
289 return queue_work(dsa_owq, work); 226 return queue_work(dsa_owq, work);
290} 227}
291 228
229static ATOMIC_NOTIFIER_HEAD(dsa_notif_chain);
230
231int register_dsa_notifier(struct notifier_block *nb)
232{
233 return atomic_notifier_chain_register(&dsa_notif_chain, nb);
234}
235EXPORT_SYMBOL_GPL(register_dsa_notifier);
236
237int unregister_dsa_notifier(struct notifier_block *nb)
238{
239 return atomic_notifier_chain_unregister(&dsa_notif_chain, nb);
240}
241EXPORT_SYMBOL_GPL(unregister_dsa_notifier);
242
243int call_dsa_notifiers(unsigned long val, struct net_device *dev,
244 struct dsa_notifier_info *info)
245{
246 info->dev = dev;
247 return atomic_notifier_call_chain(&dsa_notif_chain, val, info);
248}
249EXPORT_SYMBOL_GPL(call_dsa_notifiers);
250
292static int __init dsa_init_module(void) 251static int __init dsa_init_module(void)
293{ 252{
294 int rc; 253 int rc;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 873af0108e24..44e3fb7dec8c 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -21,293 +21,297 @@
21 21
22#include "dsa_priv.h" 22#include "dsa_priv.h"
23 23
24static LIST_HEAD(dsa_switch_trees); 24static LIST_HEAD(dsa_tree_list);
25static DEFINE_MUTEX(dsa2_mutex); 25static DEFINE_MUTEX(dsa2_mutex);
26 26
27static const struct devlink_ops dsa_devlink_ops = { 27static const struct devlink_ops dsa_devlink_ops = {
28}; 28};
29 29
30static struct dsa_switch_tree *dsa_get_dst(u32 tree) 30static struct dsa_switch_tree *dsa_tree_find(int index)
31{ 31{
32 struct dsa_switch_tree *dst; 32 struct dsa_switch_tree *dst;
33 33
34 list_for_each_entry(dst, &dsa_switch_trees, list) 34 list_for_each_entry(dst, &dsa_tree_list, list)
35 if (dst->tree == tree) { 35 if (dst->index == index)
36 kref_get(&dst->refcount);
37 return dst; 36 return dst;
38 } 37
39 return NULL; 38 return NULL;
40} 39}
41 40
42static void dsa_free_dst(struct kref *ref) 41static struct dsa_switch_tree *dsa_tree_alloc(int index)
43{ 42{
44 struct dsa_switch_tree *dst = container_of(ref, struct dsa_switch_tree, 43 struct dsa_switch_tree *dst;
45 refcount);
46 44
47 list_del(&dst->list); 45 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
48 kfree(dst); 46 if (!dst)
47 return NULL;
48
49 dst->index = index;
50
51 INIT_LIST_HEAD(&dst->list);
52 list_add_tail(&dsa_tree_list, &dst->list);
53
54 /* Initialize the reference counter to the number of switches, not 1 */
55 kref_init(&dst->refcount);
56 refcount_set(&dst->refcount.refcount, 0);
57
58 return dst;
49} 59}
50 60
51static void dsa_put_dst(struct dsa_switch_tree *dst) 61static void dsa_tree_free(struct dsa_switch_tree *dst)
52{ 62{
53 kref_put(&dst->refcount, dsa_free_dst); 63 list_del(&dst->list);
64 kfree(dst);
54} 65}
55 66
56static struct dsa_switch_tree *dsa_add_dst(u32 tree) 67static struct dsa_switch_tree *dsa_tree_touch(int index)
57{ 68{
58 struct dsa_switch_tree *dst; 69 struct dsa_switch_tree *dst;
59 70
60 dst = kzalloc(sizeof(*dst), GFP_KERNEL); 71 dst = dsa_tree_find(index);
61 if (!dst) 72 if (!dst)
62 return NULL; 73 dst = dsa_tree_alloc(index);
63 dst->tree = tree;
64 INIT_LIST_HEAD(&dst->list);
65 list_add_tail(&dsa_switch_trees, &dst->list);
66 kref_init(&dst->refcount);
67 74
68 return dst; 75 return dst;
69} 76}
70 77
71static void dsa_dst_add_ds(struct dsa_switch_tree *dst, 78static void dsa_tree_get(struct dsa_switch_tree *dst)
72 struct dsa_switch *ds, u32 index)
73{ 79{
74 kref_get(&dst->refcount); 80 kref_get(&dst->refcount);
75 dst->ds[index] = ds;
76} 81}
77 82
78static void dsa_dst_del_ds(struct dsa_switch_tree *dst, 83static void dsa_tree_release(struct kref *ref)
79 struct dsa_switch *ds, u32 index)
80{ 84{
81 dst->ds[index] = NULL; 85 struct dsa_switch_tree *dst;
82 kref_put(&dst->refcount, dsa_free_dst); 86
87 dst = container_of(ref, struct dsa_switch_tree, refcount);
88
89 dsa_tree_free(dst);
83} 90}
84 91
85/* For platform data configurations, we need to have a valid name argument to 92static void dsa_tree_put(struct dsa_switch_tree *dst)
86 * differentiate a disabled port from an enabled one
87 */
88static bool dsa_port_is_valid(struct dsa_port *port)
89{ 93{
90 return !!(port->dn || port->name); 94 kref_put(&dst->refcount, dsa_tree_release);
91} 95}
92 96
93static bool dsa_port_is_dsa(struct dsa_port *port) 97static bool dsa_port_is_dsa(struct dsa_port *port)
94{ 98{
95 if (port->name && !strcmp(port->name, "dsa")) 99 return port->type == DSA_PORT_TYPE_DSA;
96 return true;
97 else
98 return !!of_parse_phandle(port->dn, "link", 0);
99} 100}
100 101
101static bool dsa_port_is_cpu(struct dsa_port *port) 102static bool dsa_port_is_cpu(struct dsa_port *port)
102{ 103{
103 if (port->name && !strcmp(port->name, "cpu")) 104 return port->type == DSA_PORT_TYPE_CPU;
104 return true;
105 else
106 return !!of_parse_phandle(port->dn, "ethernet", 0);
107} 105}
108 106
109static bool dsa_ds_find_port_dn(struct dsa_switch *ds, 107static bool dsa_port_is_user(struct dsa_port *dp)
110 struct device_node *port)
111{ 108{
112 u32 index; 109 return dp->type == DSA_PORT_TYPE_USER;
113
114 for (index = 0; index < ds->num_ports; index++)
115 if (ds->ports[index].dn == port)
116 return true;
117 return false;
118} 110}
119 111
120static struct dsa_switch *dsa_dst_find_port_dn(struct dsa_switch_tree *dst, 112static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst,
121 struct device_node *port) 113 struct device_node *dn)
122{ 114{
123 struct dsa_switch *ds; 115 struct dsa_switch *ds;
124 u32 index; 116 struct dsa_port *dp;
117 int device, port;
125 118
126 for (index = 0; index < DSA_MAX_SWITCHES; index++) { 119 for (device = 0; device < DSA_MAX_SWITCHES; device++) {
127 ds = dst->ds[index]; 120 ds = dst->ds[device];
128 if (!ds) 121 if (!ds)
129 continue; 122 continue;
130 123
131 if (dsa_ds_find_port_dn(ds, port)) 124 for (port = 0; port < ds->num_ports; port++) {
132 return ds; 125 dp = &ds->ports[port];
126
127 if (dp->dn == dn)
128 return dp;
129 }
133 } 130 }
134 131
135 return NULL; 132 return NULL;
136} 133}
137 134
138static int dsa_port_complete(struct dsa_switch_tree *dst, 135static bool dsa_port_setup_routing_table(struct dsa_port *dp)
139 struct dsa_switch *src_ds,
140 struct dsa_port *port,
141 u32 src_port)
142{ 136{
143 struct device_node *link; 137 struct dsa_switch *ds = dp->ds;
144 int index; 138 struct dsa_switch_tree *dst = ds->dst;
145 struct dsa_switch *dst_ds; 139 struct device_node *dn = dp->dn;
146 140 struct of_phandle_iterator it;
147 for (index = 0;; index++) { 141 struct dsa_port *link_dp;
148 link = of_parse_phandle(port->dn, "link", index); 142 int err;
149 if (!link)
150 break;
151
152 dst_ds = dsa_dst_find_port_dn(dst, link);
153 of_node_put(link);
154 143
155 if (!dst_ds) 144 of_for_each_phandle(&it, err, dn, "link", NULL, 0) {
156 return 1; 145 link_dp = dsa_tree_find_port_by_node(dst, it.node);
146 if (!link_dp) {
147 of_node_put(it.node);
148 return false;
149 }
157 150
158 src_ds->rtable[dst_ds->index] = src_port; 151 ds->rtable[link_dp->ds->index] = dp->index;
159 } 152 }
160 153
161 return 0; 154 return true;
162} 155}
163 156
164/* A switch is complete if all the DSA ports phandles point to ports 157static bool dsa_switch_setup_routing_table(struct dsa_switch *ds)
165 * known in the tree. A return value of 1 means the tree is not
166 * complete. This is not an error condition. A value of 0 is
167 * success.
168 */
169static int dsa_ds_complete(struct dsa_switch_tree *dst, struct dsa_switch *ds)
170{ 158{
171 struct dsa_port *port; 159 bool complete = true;
172 u32 index; 160 struct dsa_port *dp;
173 int err; 161 int i;
174
175 for (index = 0; index < ds->num_ports; index++) {
176 port = &ds->ports[index];
177 if (!dsa_port_is_valid(port))
178 continue;
179 162
180 if (!dsa_port_is_dsa(port)) 163 for (i = 0; i < DSA_MAX_SWITCHES; i++)
181 continue; 164 ds->rtable[i] = DSA_RTABLE_NONE;
182 165
183 err = dsa_port_complete(dst, ds, port, index); 166 for (i = 0; i < ds->num_ports; i++) {
184 if (err != 0) 167 dp = &ds->ports[i];
185 return err;
186 168
187 ds->dsa_port_mask |= BIT(index); 169 if (dsa_port_is_dsa(dp)) {
170 complete = dsa_port_setup_routing_table(dp);
171 if (!complete)
172 break;
173 }
188 } 174 }
189 175
190 return 0; 176 return complete;
191} 177}
192 178
193/* A tree is complete if all the DSA ports phandles point to ports 179static bool dsa_tree_setup_routing_table(struct dsa_switch_tree *dst)
194 * known in the tree. A return value of 1 means the tree is not
195 * complete. This is not an error condition. A value of 0 is
196 * success.
197 */
198static int dsa_dst_complete(struct dsa_switch_tree *dst)
199{ 180{
200 struct dsa_switch *ds; 181 struct dsa_switch *ds;
201 u32 index; 182 bool complete = true;
202 int err; 183 int device;
203 184
204 for (index = 0; index < DSA_MAX_SWITCHES; index++) { 185 for (device = 0; device < DSA_MAX_SWITCHES; device++) {
205 ds = dst->ds[index]; 186 ds = dst->ds[device];
206 if (!ds) 187 if (!ds)
207 continue; 188 continue;
208 189
209 err = dsa_ds_complete(dst, ds); 190 complete = dsa_switch_setup_routing_table(ds);
210 if (err != 0) 191 if (!complete)
211 return err; 192 break;
212 } 193 }
213 194
214 return 0; 195 return complete;
215} 196}
216 197
217static int dsa_dsa_port_apply(struct dsa_port *port) 198static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst)
218{ 199{
219 struct dsa_switch *ds = port->ds; 200 struct dsa_switch *ds;
220 int err; 201 struct dsa_port *dp;
202 int device, port;
221 203
222 err = dsa_cpu_dsa_setup(port); 204 for (device = 0; device < DSA_MAX_SWITCHES; device++) {
223 if (err) { 205 ds = dst->ds[device];
224 dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n", 206 if (!ds)
225 port->index, err); 207 continue;
226 return err;
227 }
228 208
229 memset(&port->devlink_port, 0, sizeof(port->devlink_port)); 209 for (port = 0; port < ds->num_ports; port++) {
210 dp = &ds->ports[port];
230 211
231 return devlink_port_register(ds->devlink, &port->devlink_port, 212 if (dsa_port_is_cpu(dp))
232 port->index); 213 return dp;
233} 214 }
215 }
234 216
235static void dsa_dsa_port_unapply(struct dsa_port *port) 217 return NULL;
236{
237 devlink_port_unregister(&port->devlink_port);
238 dsa_cpu_dsa_destroy(port);
239} 218}
240 219
241static int dsa_cpu_port_apply(struct dsa_port *port) 220static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
242{ 221{
243 struct dsa_switch *ds = port->ds; 222 struct dsa_switch *ds;
244 int err; 223 struct dsa_port *dp;
224 int device, port;
245 225
246 err = dsa_cpu_dsa_setup(port); 226 /* DSA currently only supports a single CPU port */
247 if (err) { 227 dst->cpu_dp = dsa_tree_find_first_cpu(dst);
248 dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n", 228 if (!dst->cpu_dp) {
249 port->index, err); 229 pr_warn("Tree has no master device\n");
250 return err; 230 return -EINVAL;
251 } 231 }
252 232
253 memset(&port->devlink_port, 0, sizeof(port->devlink_port)); 233 /* Assign the default CPU port to all ports of the fabric */
254 err = devlink_port_register(ds->devlink, &port->devlink_port, 234 for (device = 0; device < DSA_MAX_SWITCHES; device++) {
255 port->index); 235 ds = dst->ds[device];
256 return err; 236 if (!ds)
237 continue;
238
239 for (port = 0; port < ds->num_ports; port++) {
240 dp = &ds->ports[port];
241
242 if (dsa_port_is_user(dp))
243 dp->cpu_dp = dst->cpu_dp;
244 }
245 }
246
247 return 0;
257} 248}
258 249
259static void dsa_cpu_port_unapply(struct dsa_port *port) 250static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
260{ 251{
261 devlink_port_unregister(&port->devlink_port); 252 /* DSA currently only supports a single CPU port */
262 dsa_cpu_dsa_destroy(port); 253 dst->cpu_dp = NULL;
263 port->ds->cpu_port_mask &= ~BIT(port->index);
264
265} 254}
266 255
267static int dsa_user_port_apply(struct dsa_port *port) 256static int dsa_port_setup(struct dsa_port *dp)
268{ 257{
269 struct dsa_switch *ds = port->ds; 258 struct dsa_switch *ds = dp->ds;
270 const char *name = port->name;
271 int err; 259 int err;
272 260
273 if (port->dn) 261 memset(&dp->devlink_port, 0, sizeof(dp->devlink_port));
274 name = of_get_property(port->dn, "label", NULL);
275 if (!name)
276 name = "eth%d";
277 262
278 err = dsa_slave_create(port, name); 263 err = devlink_port_register(ds->devlink, &dp->devlink_port, dp->index);
279 if (err) {
280 dev_warn(ds->dev, "Failed to create slave %d: %d\n",
281 port->index, err);
282 port->netdev = NULL;
283 return err;
284 }
285
286 memset(&port->devlink_port, 0, sizeof(port->devlink_port));
287 err = devlink_port_register(ds->devlink, &port->devlink_port,
288 port->index);
289 if (err) 264 if (err)
290 return err; 265 return err;
291 266
292 devlink_port_type_eth_set(&port->devlink_port, port->netdev); 267 switch (dp->type) {
268 case DSA_PORT_TYPE_UNUSED:
269 break;
270 case DSA_PORT_TYPE_CPU:
271 case DSA_PORT_TYPE_DSA:
272 err = dsa_port_fixed_link_register_of(dp);
273 if (err) {
274 dev_err(ds->dev, "failed to register fixed link for port %d.%d\n",
275 ds->index, dp->index);
276 return err;
277 }
278
279 break;
280 case DSA_PORT_TYPE_USER:
281 err = dsa_slave_create(dp);
282 if (err)
283 dev_err(ds->dev, "failed to create slave for port %d.%d\n",
284 ds->index, dp->index);
285 else
286 devlink_port_type_eth_set(&dp->devlink_port, dp->slave);
287 break;
288 }
293 289
294 return 0; 290 return 0;
295} 291}
296 292
297static void dsa_user_port_unapply(struct dsa_port *port) 293static void dsa_port_teardown(struct dsa_port *dp)
298{ 294{
299 devlink_port_unregister(&port->devlink_port); 295 devlink_port_unregister(&dp->devlink_port);
300 if (port->netdev) { 296
301 dsa_slave_destroy(port->netdev); 297 switch (dp->type) {
302 port->netdev = NULL; 298 case DSA_PORT_TYPE_UNUSED:
303 port->ds->enabled_port_mask &= ~(1 << port->index); 299 break;
300 case DSA_PORT_TYPE_CPU:
301 case DSA_PORT_TYPE_DSA:
302 dsa_port_fixed_link_unregister_of(dp);
303 break;
304 case DSA_PORT_TYPE_USER:
305 if (dp->slave) {
306 dsa_slave_destroy(dp->slave);
307 dp->slave = NULL;
308 }
309 break;
304 } 310 }
305} 311}
306 312
307static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) 313static int dsa_switch_setup(struct dsa_switch *ds)
308{ 314{
309 struct dsa_port *port;
310 u32 index;
311 int err; 315 int err;
312 316
313 /* Initialize ds->phys_mii_mask before registering the slave MDIO bus 317 /* Initialize ds->phys_mii_mask before registering the slave MDIO bus
@@ -315,7 +319,7 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
315 * the slave MDIO bus driver rely on these values for probing PHY 319 * the slave MDIO bus driver rely on these values for probing PHY
316 * devices or not 320 * devices or not
317 */ 321 */
318 ds->phys_mii_mask = ds->enabled_port_mask; 322 ds->phys_mii_mask |= dsa_user_ports(ds);
319 323
320 /* Add the switch to devlink before calling setup, so that setup can 324 /* Add the switch to devlink before calling setup, so that setup can
321 * add dpipe tables 325 * add dpipe tables
@@ -336,12 +340,6 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
336 if (err) 340 if (err)
337 return err; 341 return err;
338 342
339 if (ds->ops->set_addr) {
340 err = ds->ops->set_addr(ds, dst->cpu_dp->netdev->dev_addr);
341 if (err < 0)
342 return err;
343 }
344
345 if (!ds->slave_mii_bus && ds->ops->phy_read) { 343 if (!ds->slave_mii_bus && ds->ops->phy_read) {
346 ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); 344 ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
347 if (!ds->slave_mii_bus) 345 if (!ds->slave_mii_bus)
@@ -354,56 +352,11 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
354 return err; 352 return err;
355 } 353 }
356 354
357 for (index = 0; index < ds->num_ports; index++) {
358 port = &ds->ports[index];
359 if (!dsa_port_is_valid(port))
360 continue;
361
362 if (dsa_port_is_dsa(port)) {
363 err = dsa_dsa_port_apply(port);
364 if (err)
365 return err;
366 continue;
367 }
368
369 if (dsa_port_is_cpu(port)) {
370 err = dsa_cpu_port_apply(port);
371 if (err)
372 return err;
373 continue;
374 }
375
376 err = dsa_user_port_apply(port);
377 if (err)
378 continue;
379 }
380
381 return 0; 355 return 0;
382} 356}
383 357
384static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds) 358static void dsa_switch_teardown(struct dsa_switch *ds)
385{ 359{
386 struct dsa_port *port;
387 u32 index;
388
389 for (index = 0; index < ds->num_ports; index++) {
390 port = &ds->ports[index];
391 if (!dsa_port_is_valid(port))
392 continue;
393
394 if (dsa_port_is_dsa(port)) {
395 dsa_dsa_port_unapply(port);
396 continue;
397 }
398
399 if (dsa_port_is_cpu(port)) {
400 dsa_cpu_port_unapply(port);
401 continue;
402 }
403
404 dsa_user_port_unapply(port);
405 }
406
407 if (ds->slave_mii_bus && ds->ops->phy_read) 360 if (ds->slave_mii_bus && ds->ops->phy_read)
408 mdiobus_unregister(ds->slave_mii_bus); 361 mdiobus_unregister(ds->slave_mii_bus);
409 362
@@ -417,198 +370,228 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
417 370
418} 371}
419 372
420static int dsa_dst_apply(struct dsa_switch_tree *dst) 373static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
421{ 374{
422 struct dsa_switch *ds; 375 struct dsa_switch *ds;
423 u32 index; 376 struct dsa_port *dp;
377 int device, port;
424 int err; 378 int err;
425 379
426 for (index = 0; index < DSA_MAX_SWITCHES; index++) { 380 for (device = 0; device < DSA_MAX_SWITCHES; device++) {
427 ds = dst->ds[index]; 381 ds = dst->ds[device];
428 if (!ds) 382 if (!ds)
429 continue; 383 continue;
430 384
431 err = dsa_ds_apply(dst, ds); 385 err = dsa_switch_setup(ds);
432 if (err) 386 if (err)
433 return err; 387 return err;
434 }
435 388
436 if (dst->cpu_dp) { 389 for (port = 0; port < ds->num_ports; port++) {
437 err = dsa_cpu_port_ethtool_setup(dst->cpu_dp); 390 dp = &ds->ports[port];
438 if (err)
439 return err;
440 }
441 391
442 /* If we use a tagging format that doesn't have an ethertype 392 err = dsa_port_setup(dp);
443 * field, make sure that all packets from this point on get 393 if (err)
444 * sent to the tag format's receive function. 394 return err;
445 */ 395 }
446 wmb(); 396 }
447 dst->cpu_dp->netdev->dsa_ptr = dst;
448 dst->applied = true;
449 397
450 return 0; 398 return 0;
451} 399}
452 400
453static void dsa_dst_unapply(struct dsa_switch_tree *dst) 401static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
454{ 402{
455 struct dsa_switch *ds; 403 struct dsa_switch *ds;
456 u32 index; 404 struct dsa_port *dp;
457 405 int device, port;
458 if (!dst->applied)
459 return;
460
461 dst->cpu_dp->netdev->dsa_ptr = NULL;
462
463 /* If we used a tagging format that doesn't have an ethertype
464 * field, make sure that all packets from this point get sent
465 * without the tag and go through the regular receive path.
466 */
467 wmb();
468 406
469 for (index = 0; index < DSA_MAX_SWITCHES; index++) { 407 for (device = 0; device < DSA_MAX_SWITCHES; device++) {
470 ds = dst->ds[index]; 408 ds = dst->ds[device];
471 if (!ds) 409 if (!ds)
472 continue; 410 continue;
473 411
474 dsa_ds_unapply(dst, ds); 412 for (port = 0; port < ds->num_ports; port++) {
475 } 413 dp = &ds->ports[port];
414
415 dsa_port_teardown(dp);
416 }
476 417
477 if (dst->cpu_dp) { 418 dsa_switch_teardown(ds);
478 dsa_cpu_port_ethtool_restore(dst->cpu_dp);
479 dst->cpu_dp = NULL;
480 } 419 }
420}
421
422static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
423{
424 struct dsa_port *cpu_dp = dst->cpu_dp;
425 struct net_device *master = cpu_dp->master;
481 426
482 pr_info("DSA: tree %d unapplied\n", dst->tree); 427 /* DSA currently supports a single pair of CPU port and master device */
483 dst->applied = false; 428 return dsa_master_setup(master, cpu_dp);
484} 429}
485 430
486static int dsa_cpu_parse(struct dsa_port *port, u32 index, 431static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
487 struct dsa_switch_tree *dst,
488 struct dsa_switch *ds)
489{ 432{
490 enum dsa_tag_protocol tag_protocol; 433 struct dsa_port *cpu_dp = dst->cpu_dp;
491 struct net_device *ethernet_dev; 434 struct net_device *master = cpu_dp->master;
492 struct device_node *ethernet;
493 435
494 if (port->dn) { 436 return dsa_master_teardown(master);
495 ethernet = of_parse_phandle(port->dn, "ethernet", 0); 437}
496 if (!ethernet)
497 return -EINVAL;
498 ethernet_dev = of_find_net_device_by_node(ethernet);
499 } else {
500 ethernet_dev = dsa_dev_to_net_device(ds->cd->netdev[index]);
501 dev_put(ethernet_dev);
502 }
503 438
504 if (!ethernet_dev) 439static int dsa_tree_setup(struct dsa_switch_tree *dst)
505 return -EPROBE_DEFER; 440{
441 bool complete;
442 int err;
506 443
507 if (!dst->cpu_dp) { 444 if (dst->setup) {
508 dst->cpu_dp = port; 445 pr_err("DSA: tree %d already setup! Disjoint trees?\n",
509 dst->cpu_dp->netdev = ethernet_dev; 446 dst->index);
447 return -EEXIST;
510 } 448 }
511 449
512 /* Initialize cpu_port_mask now for drv->setup() 450 complete = dsa_tree_setup_routing_table(dst);
513 * to have access to a correct value, just like what 451 if (!complete)
514 * net/dsa/dsa.c::dsa_switch_setup_one does. 452 return 0;
515 */
516 ds->cpu_port_mask |= BIT(index);
517 453
518 tag_protocol = ds->ops->get_tag_protocol(ds); 454 err = dsa_tree_setup_default_cpu(dst);
519 dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); 455 if (err)
520 if (IS_ERR(dst->tag_ops)) { 456 return err;
521 dev_warn(ds->dev, "No tagger for this switch\n");
522 ds->cpu_port_mask &= ~BIT(index);
523 return PTR_ERR(dst->tag_ops);
524 }
525 457
526 dst->rcv = dst->tag_ops->rcv; 458 err = dsa_tree_setup_switches(dst);
459 if (err)
460 return err;
461
462 err = dsa_tree_setup_master(dst);
463 if (err)
464 return err;
465
466 dst->setup = true;
467
468 pr_info("DSA: tree %d setup\n", dst->index);
527 469
528 return 0; 470 return 0;
529} 471}
530 472
531static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds) 473static void dsa_tree_teardown(struct dsa_switch_tree *dst)
532{ 474{
533 struct dsa_port *port; 475 if (!dst->setup)
534 u32 index; 476 return;
477
478 dsa_tree_teardown_master(dst);
479
480 dsa_tree_teardown_switches(dst);
481
482 dsa_tree_teardown_default_cpu(dst);
483
484 pr_info("DSA: tree %d torn down\n", dst->index);
485
486 dst->setup = false;
487}
488
489static void dsa_tree_remove_switch(struct dsa_switch_tree *dst,
490 unsigned int index)
491{
492 dsa_tree_teardown(dst);
493
494 dst->ds[index] = NULL;
495 dsa_tree_put(dst);
496}
497
498static int dsa_tree_add_switch(struct dsa_switch_tree *dst,
499 struct dsa_switch *ds)
500{
501 unsigned int index = ds->index;
535 int err; 502 int err;
536 503
537 for (index = 0; index < ds->num_ports; index++) { 504 if (dst->ds[index])
538 port = &ds->ports[index]; 505 return -EBUSY;
539 if (!dsa_port_is_valid(port) ||
540 dsa_port_is_dsa(port))
541 continue;
542 506
543 if (dsa_port_is_cpu(port)) { 507 dsa_tree_get(dst);
544 err = dsa_cpu_parse(port, index, dst, ds); 508 dst->ds[index] = ds;
545 if (err)
546 return err;
547 } else {
548 /* Initialize enabled_port_mask now for drv->setup()
549 * to have access to a correct value, just like what
550 * net/dsa/dsa.c::dsa_switch_setup_one does.
551 */
552 ds->enabled_port_mask |= BIT(index);
553 }
554 509
555 } 510 err = dsa_tree_setup(dst);
511 if (err)
512 dsa_tree_remove_switch(dst, index);
513
514 return err;
515}
556 516
557 pr_info("DSA: switch %d %d parsed\n", dst->tree, ds->index); 517static int dsa_port_parse_user(struct dsa_port *dp, const char *name)
518{
519 if (!name)
520 name = "eth%d";
521
522 dp->type = DSA_PORT_TYPE_USER;
523 dp->name = name;
558 524
559 return 0; 525 return 0;
560} 526}
561 527
562static int dsa_dst_parse(struct dsa_switch_tree *dst) 528static int dsa_port_parse_dsa(struct dsa_port *dp)
563{ 529{
564 struct dsa_switch *ds; 530 dp->type = DSA_PORT_TYPE_DSA;
565 struct dsa_port *dp;
566 u32 index;
567 int port;
568 int err;
569 531
570 for (index = 0; index < DSA_MAX_SWITCHES; index++) { 532 return 0;
571 ds = dst->ds[index]; 533}
572 if (!ds)
573 continue;
574 534
575 err = dsa_ds_parse(dst, ds); 535static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master)
576 if (err) 536{
577 return err; 537 struct dsa_switch *ds = dp->ds;
578 } 538 struct dsa_switch_tree *dst = ds->dst;
539 const struct dsa_device_ops *tag_ops;
540 enum dsa_tag_protocol tag_protocol;
579 541
580 if (!dst->cpu_dp) { 542 tag_protocol = ds->ops->get_tag_protocol(ds, dp->index);
581 pr_warn("Tree has no master device\n"); 543 tag_ops = dsa_resolve_tag_protocol(tag_protocol);
582 return -EINVAL; 544 if (IS_ERR(tag_ops)) {
545 dev_warn(ds->dev, "No tagger for this switch\n");
546 return PTR_ERR(tag_ops);
583 } 547 }
584 548
585 /* Assign the default CPU port to all ports of the fabric */ 549 dp->type = DSA_PORT_TYPE_CPU;
586 for (index = 0; index < DSA_MAX_SWITCHES; index++) { 550 dp->rcv = tag_ops->rcv;
587 ds = dst->ds[index]; 551 dp->tag_ops = tag_ops;
588 if (!ds) 552 dp->master = master;
589 continue; 553 dp->dst = dst;
590 554
591 for (port = 0; port < ds->num_ports; port++) { 555 return 0;
592 dp = &ds->ports[port]; 556}
593 if (!dsa_port_is_valid(dp) ||
594 dsa_port_is_dsa(dp) ||
595 dsa_port_is_cpu(dp))
596 continue;
597 557
598 dp->cpu_dp = dst->cpu_dp; 558static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
599 } 559{
560 struct device_node *ethernet = of_parse_phandle(dn, "ethernet", 0);
561 const char *name = of_get_property(dn, "label", NULL);
562 bool link = of_property_read_bool(dn, "link");
563
564 dp->dn = dn;
565
566 if (ethernet) {
567 struct net_device *master;
568
569 master = of_find_net_device_by_node(ethernet);
570 if (!master)
571 return -EPROBE_DEFER;
572
573 return dsa_port_parse_cpu(dp, master);
600 } 574 }
601 575
602 pr_info("DSA: tree %d parsed\n", dst->tree); 576 if (link)
577 return dsa_port_parse_dsa(dp);
603 578
604 return 0; 579 return dsa_port_parse_user(dp, name);
605} 580}
606 581
607static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds) 582static int dsa_switch_parse_ports_of(struct dsa_switch *ds,
583 struct device_node *dn)
608{ 584{
609 struct device_node *port; 585 struct device_node *ports, *port;
610 int err; 586 struct dsa_port *dp;
611 u32 reg; 587 u32 reg;
588 int err;
589
590 ports = of_get_child_by_name(dn, "ports");
591 if (!ports) {
592 dev_err(ds->dev, "no ports child node found\n");
593 return -EINVAL;
594 }
612 595
613 for_each_available_child_of_node(ports, port) { 596 for_each_available_child_of_node(ports, port) {
614 err = of_property_read_u32(port, "reg", &reg); 597 err = of_property_read_u32(port, "reg", &reg);
@@ -618,174 +601,140 @@ static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds)
618 if (reg >= ds->num_ports) 601 if (reg >= ds->num_ports)
619 return -EINVAL; 602 return -EINVAL;
620 603
621 ds->ports[reg].dn = port; 604 dp = &ds->ports[reg];
605
606 err = dsa_port_parse_of(dp, port);
607 if (err)
608 return err;
622 } 609 }
623 610
624 return 0; 611 return 0;
625} 612}
626 613
627static int dsa_parse_ports(struct dsa_chip_data *cd, struct dsa_switch *ds) 614static int dsa_switch_parse_member_of(struct dsa_switch *ds,
615 struct device_node *dn)
628{ 616{
629 bool valid_name_found = false; 617 u32 m[2] = { 0, 0 };
630 unsigned int i; 618 int sz;
631 619
632 for (i = 0; i < DSA_MAX_PORTS; i++) { 620 /* Don't error out if this optional property isn't found */
633 if (!cd->port_names[i]) 621 sz = of_property_read_variable_u32_array(dn, "dsa,member", m, 2, 2);
634 continue; 622 if (sz < 0 && sz != -EINVAL)
623 return sz;
635 624
636 ds->ports[i].name = cd->port_names[i]; 625 ds->index = m[1];
637 valid_name_found = true; 626 if (ds->index >= DSA_MAX_SWITCHES)
638 }
639
640 if (!valid_name_found && i == DSA_MAX_PORTS)
641 return -EINVAL; 627 return -EINVAL;
642 628
629 ds->dst = dsa_tree_touch(m[0]);
630 if (!ds->dst)
631 return -ENOMEM;
632
643 return 0; 633 return 0;
644} 634}
645 635
646static int dsa_parse_member_dn(struct device_node *np, u32 *tree, u32 *index) 636static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn)
647{ 637{
648 int err; 638 int err;
649 639
650 *tree = *index = 0; 640 err = dsa_switch_parse_member_of(ds, dn);
651
652 err = of_property_read_u32_index(np, "dsa,member", 0, tree);
653 if (err) {
654 /* Does not exist, but it is optional */
655 if (err == -EINVAL)
656 return 0;
657 return err;
658 }
659
660 err = of_property_read_u32_index(np, "dsa,member", 1, index);
661 if (err) 641 if (err)
662 return err; 642 return err;
663 643
664 if (*index >= DSA_MAX_SWITCHES) 644 return dsa_switch_parse_ports_of(ds, dn);
665 return -EINVAL;
666
667 return 0;
668} 645}
669 646
670static int dsa_parse_member(struct dsa_chip_data *pd, u32 *tree, u32 *index) 647static int dsa_port_parse(struct dsa_port *dp, const char *name,
648 struct device *dev)
671{ 649{
672 if (!pd) 650 if (!strcmp(name, "cpu")) {
673 return -ENODEV; 651 struct net_device *master;
674 652
675 /* We do not support complex trees with dsa_chip_data */ 653 master = dsa_dev_to_net_device(dev);
676 *tree = 0; 654 if (!master)
677 *index = 0; 655 return -EPROBE_DEFER;
678 656
679 return 0; 657 dev_put(master);
680}
681
682static struct device_node *dsa_get_ports(struct dsa_switch *ds,
683 struct device_node *np)
684{
685 struct device_node *ports;
686 658
687 ports = of_get_child_by_name(np, "ports"); 659 return dsa_port_parse_cpu(dp, master);
688 if (!ports) {
689 dev_err(ds->dev, "no ports child node found\n");
690 return ERR_PTR(-EINVAL);
691 } 660 }
692 661
693 return ports; 662 if (!strcmp(name, "dsa"))
663 return dsa_port_parse_dsa(dp);
664
665 return dsa_port_parse_user(dp, name);
694} 666}
695 667
696static int _dsa_register_switch(struct dsa_switch *ds) 668static int dsa_switch_parse_ports(struct dsa_switch *ds,
669 struct dsa_chip_data *cd)
697{ 670{
698 struct dsa_chip_data *pdata = ds->dev->platform_data; 671 bool valid_name_found = false;
699 struct device_node *np = ds->dev->of_node; 672 struct dsa_port *dp;
700 struct dsa_switch_tree *dst; 673 struct device *dev;
701 struct device_node *ports; 674 const char *name;
702 u32 tree, index; 675 unsigned int i;
703 int i, err; 676 int err;
704
705 if (np) {
706 err = dsa_parse_member_dn(np, &tree, &index);
707 if (err)
708 return err;
709 677
710 ports = dsa_get_ports(ds, np); 678 for (i = 0; i < DSA_MAX_PORTS; i++) {
711 if (IS_ERR(ports)) 679 name = cd->port_names[i];
712 return PTR_ERR(ports); 680 dev = cd->netdev[i];
681 dp = &ds->ports[i];
713 682
714 err = dsa_parse_ports_dn(ports, ds); 683 if (!name)
715 if (err) 684 continue;
716 return err;
717 } else {
718 err = dsa_parse_member(pdata, &tree, &index);
719 if (err)
720 return err;
721 685
722 err = dsa_parse_ports(pdata, ds); 686 err = dsa_port_parse(dp, name, dev);
723 if (err) 687 if (err)
724 return err; 688 return err;
725 }
726 689
727 dst = dsa_get_dst(tree); 690 valid_name_found = true;
728 if (!dst) {
729 dst = dsa_add_dst(tree);
730 if (!dst)
731 return -ENOMEM;
732 }
733
734 if (dst->ds[index]) {
735 err = -EBUSY;
736 goto out;
737 } 691 }
738 692
739 ds->dst = dst; 693 if (!valid_name_found && i == DSA_MAX_PORTS)
740 ds->index = index; 694 return -EINVAL;
741 ds->cd = pdata;
742
743 /* Initialize the routing table */
744 for (i = 0; i < DSA_MAX_SWITCHES; ++i)
745 ds->rtable[i] = DSA_RTABLE_NONE;
746 695
747 dsa_dst_add_ds(dst, ds, index); 696 return 0;
697}
748 698
749 err = dsa_dst_complete(dst); 699static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd)
750 if (err < 0) 700{
751 goto out_del_dst; 701 ds->cd = cd;
752 702
753 if (err == 1) { 703 /* We don't support interconnected switches nor multiple trees via
754 /* Not all switches registered yet */ 704 * platform data, so this is the unique switch of the tree.
755 err = 0; 705 */
756 goto out; 706 ds->index = 0;
757 } 707 ds->dst = dsa_tree_touch(0);
708 if (!ds->dst)
709 return -ENOMEM;
758 710
759 if (dst->applied) { 711 return dsa_switch_parse_ports(ds, cd);
760 pr_info("DSA: Disjoint trees?\n"); 712}
761 return -EINVAL;
762 }
763 713
764 err = dsa_dst_parse(dst); 714static int dsa_switch_add(struct dsa_switch *ds)
765 if (err) { 715{
766 if (err == -EPROBE_DEFER) { 716 struct dsa_switch_tree *dst = ds->dst;
767 dsa_dst_del_ds(dst, ds, ds->index);
768 return err;
769 }
770 717
771 goto out_del_dst; 718 return dsa_tree_add_switch(dst, ds);
772 } 719}
773 720
774 err = dsa_dst_apply(dst); 721static int dsa_switch_probe(struct dsa_switch *ds)
775 if (err) { 722{
776 dsa_dst_unapply(dst); 723 struct dsa_chip_data *pdata = ds->dev->platform_data;
777 goto out_del_dst; 724 struct device_node *np = ds->dev->of_node;
778 } 725 int err;
779 726
780 dsa_put_dst(dst); 727 if (np)
781 return 0; 728 err = dsa_switch_parse_of(ds, np);
729 else if (pdata)
730 err = dsa_switch_parse(ds, pdata);
731 else
732 err = -ENODEV;
782 733
783out_del_dst: 734 if (err)
784 dsa_dst_del_ds(dst, ds, ds->index); 735 return err;
785out:
786 dsa_put_dst(dst);
787 736
788 return err; 737 return dsa_switch_add(ds);
789} 738}
790 739
791struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n) 740struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
@@ -815,26 +764,25 @@ int dsa_register_switch(struct dsa_switch *ds)
815 int err; 764 int err;
816 765
817 mutex_lock(&dsa2_mutex); 766 mutex_lock(&dsa2_mutex);
818 err = _dsa_register_switch(ds); 767 err = dsa_switch_probe(ds);
819 mutex_unlock(&dsa2_mutex); 768 mutex_unlock(&dsa2_mutex);
820 769
821 return err; 770 return err;
822} 771}
823EXPORT_SYMBOL_GPL(dsa_register_switch); 772EXPORT_SYMBOL_GPL(dsa_register_switch);
824 773
825static void _dsa_unregister_switch(struct dsa_switch *ds) 774static void dsa_switch_remove(struct dsa_switch *ds)
826{ 775{
827 struct dsa_switch_tree *dst = ds->dst; 776 struct dsa_switch_tree *dst = ds->dst;
777 unsigned int index = ds->index;
828 778
829 dsa_dst_unapply(dst); 779 dsa_tree_remove_switch(dst, index);
830
831 dsa_dst_del_ds(dst, ds, ds->index);
832} 780}
833 781
834void dsa_unregister_switch(struct dsa_switch *ds) 782void dsa_unregister_switch(struct dsa_switch *ds)
835{ 783{
836 mutex_lock(&dsa2_mutex); 784 mutex_lock(&dsa2_mutex);
837 _dsa_unregister_switch(ds); 785 dsa_switch_remove(ds);
838 mutex_unlock(&dsa2_mutex); 786 mutex_unlock(&dsa2_mutex);
839} 787}
840EXPORT_SYMBOL_GPL(dsa_unregister_switch); 788EXPORT_SYMBOL_GPL(dsa_unregister_switch);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 9c3eeb72462d..7d036696e8c4 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -66,7 +66,7 @@ struct dsa_notifier_vlan_info {
66}; 66};
67 67
68struct dsa_slave_priv { 68struct dsa_slave_priv {
69 /* Copy of dp->ds->dst->tag_ops->xmit for faster access in hot path */ 69 /* Copy of CPU port xmit for faster access in slave transmit hot path */
70 struct sk_buff * (*xmit)(struct sk_buff *skb, 70 struct sk_buff * (*xmit)(struct sk_buff *skb,
71 struct net_device *dev); 71 struct net_device *dev);
72 72
@@ -79,7 +79,6 @@ struct dsa_slave_priv {
79 * The phylib phy_device pointer for the PHY connected 79 * The phylib phy_device pointer for the PHY connected
80 * to this port. 80 * to this port.
81 */ 81 */
82 struct phy_device *phy;
83 phy_interface_t phy_interface; 82 phy_interface_t phy_interface;
84 int old_link; 83 int old_link;
85 int old_pause; 84 int old_pause;
@@ -94,11 +93,7 @@ struct dsa_slave_priv {
94}; 93};
95 94
96/* dsa.c */ 95/* dsa.c */
97int dsa_cpu_dsa_setup(struct dsa_port *port);
98void dsa_cpu_dsa_destroy(struct dsa_port *dport);
99const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol); 96const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
100int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp);
101void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp);
102bool dsa_schedule_work(struct work_struct *work); 97bool dsa_schedule_work(struct work_struct *work);
103 98
104/* legacy.c */ 99/* legacy.c */
@@ -112,10 +107,35 @@ int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
112 struct net_device *dev, 107 struct net_device *dev,
113 const unsigned char *addr, u16 vid); 108 const unsigned char *addr, u16 vid);
114 109
110/* master.c */
111int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp);
112void dsa_master_teardown(struct net_device *dev);
113
114static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
115 int device, int port)
116{
117 struct dsa_port *cpu_dp = dev->dsa_ptr;
118 struct dsa_switch_tree *dst = cpu_dp->dst;
119 struct dsa_switch *ds;
120
121 if (device < 0 || device >= DSA_MAX_SWITCHES)
122 return NULL;
123
124 ds = dst->ds[device];
125 if (!ds)
126 return NULL;
127
128 if (port < 0 || port >= ds->num_ports)
129 return NULL;
130
131 return ds->ports[port].slave;
132}
133
115/* port.c */ 134/* port.c */
116int dsa_port_set_state(struct dsa_port *dp, u8 state, 135int dsa_port_set_state(struct dsa_port *dp, u8 state,
117 struct switchdev_trans *trans); 136 struct switchdev_trans *trans);
118void dsa_port_set_state_now(struct dsa_port *dp, u8 state); 137int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
138void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy);
119int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br); 139int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
120void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); 140void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
121int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, 141int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
@@ -126,33 +146,52 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
126 u16 vid); 146 u16 vid);
127int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, 147int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
128 u16 vid); 148 u16 vid);
129int dsa_port_mdb_add(struct dsa_port *dp, 149int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data);
150int dsa_port_mdb_add(const struct dsa_port *dp,
130 const struct switchdev_obj_port_mdb *mdb, 151 const struct switchdev_obj_port_mdb *mdb,
131 struct switchdev_trans *trans); 152 struct switchdev_trans *trans);
132int dsa_port_mdb_del(struct dsa_port *dp, 153int dsa_port_mdb_del(const struct dsa_port *dp,
133 const struct switchdev_obj_port_mdb *mdb); 154 const struct switchdev_obj_port_mdb *mdb);
134int dsa_port_vlan_add(struct dsa_port *dp, 155int dsa_port_vlan_add(struct dsa_port *dp,
135 const struct switchdev_obj_port_vlan *vlan, 156 const struct switchdev_obj_port_vlan *vlan,
136 struct switchdev_trans *trans); 157 struct switchdev_trans *trans);
137int dsa_port_vlan_del(struct dsa_port *dp, 158int dsa_port_vlan_del(struct dsa_port *dp,
138 const struct switchdev_obj_port_vlan *vlan); 159 const struct switchdev_obj_port_vlan *vlan);
160int dsa_port_fixed_link_register_of(struct dsa_port *dp);
161void dsa_port_fixed_link_unregister_of(struct dsa_port *dp);
162
139/* slave.c */ 163/* slave.c */
140extern const struct dsa_device_ops notag_netdev_ops; 164extern const struct dsa_device_ops notag_netdev_ops;
141void dsa_slave_mii_bus_init(struct dsa_switch *ds); 165void dsa_slave_mii_bus_init(struct dsa_switch *ds);
142void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops); 166int dsa_slave_create(struct dsa_port *dp);
143int dsa_slave_create(struct dsa_port *port, const char *name);
144void dsa_slave_destroy(struct net_device *slave_dev); 167void dsa_slave_destroy(struct net_device *slave_dev);
145int dsa_slave_suspend(struct net_device *slave_dev); 168int dsa_slave_suspend(struct net_device *slave_dev);
146int dsa_slave_resume(struct net_device *slave_dev); 169int dsa_slave_resume(struct net_device *slave_dev);
147int dsa_slave_register_notifier(void); 170int dsa_slave_register_notifier(void);
148void dsa_slave_unregister_notifier(void); 171void dsa_slave_unregister_notifier(void);
149 172
173static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev)
174{
175 struct dsa_slave_priv *p = netdev_priv(dev);
176
177 return p->dp;
178}
179
180static inline struct net_device *
181dsa_slave_to_master(const struct net_device *dev)
182{
183 struct dsa_port *dp = dsa_slave_to_port(dev);
184
185 return dp->cpu_dp->master;
186}
187
150/* switch.c */ 188/* switch.c */
151int dsa_switch_register_notifier(struct dsa_switch *ds); 189int dsa_switch_register_notifier(struct dsa_switch *ds);
152void dsa_switch_unregister_notifier(struct dsa_switch *ds); 190void dsa_switch_unregister_notifier(struct dsa_switch *ds);
153 191
154/* tag_brcm.c */ 192/* tag_brcm.c */
155extern const struct dsa_device_ops brcm_netdev_ops; 193extern const struct dsa_device_ops brcm_netdev_ops;
194extern const struct dsa_device_ops brcm_prepend_netdev_ops;
156 195
157/* tag_dsa.c */ 196/* tag_dsa.c */
158extern const struct dsa_device_ops dsa_netdev_ops; 197extern const struct dsa_device_ops dsa_netdev_ops;
@@ -175,14 +214,4 @@ extern const struct dsa_device_ops qca_netdev_ops;
175/* tag_trailer.c */ 214/* tag_trailer.c */
176extern const struct dsa_device_ops trailer_netdev_ops; 215extern const struct dsa_device_ops trailer_netdev_ops;
177 216
178static inline struct net_device *dsa_master_netdev(struct dsa_slave_priv *p)
179{
180 return p->dp->cpu_dp->netdev;
181}
182
183static inline struct dsa_port *dsa_get_cpu_port(struct dsa_switch_tree *dst)
184{
185 return dst->cpu_dp;
186}
187
188#endif 217#endif
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index 91e6f7981d39..84611d7fcfa2 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -86,7 +86,7 @@ static int dsa_cpu_dsa_setups(struct dsa_switch *ds)
86 if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) 86 if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
87 continue; 87 continue;
88 88
89 ret = dsa_cpu_dsa_setup(&ds->ports[port]); 89 ret = dsa_port_fixed_link_register_of(&ds->ports[port]);
90 if (ret) 90 if (ret)
91 return ret; 91 return ret;
92 } 92 }
@@ -101,6 +101,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
101 struct dsa_chip_data *cd = ds->cd; 101 struct dsa_chip_data *cd = ds->cd;
102 bool valid_name_found = false; 102 bool valid_name_found = false;
103 int index = ds->index; 103 int index = ds->index;
104 struct dsa_port *dp;
104 int i, ret; 105 int i, ret;
105 106
106 /* 107 /*
@@ -109,9 +110,12 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
109 for (i = 0; i < ds->num_ports; i++) { 110 for (i = 0; i < ds->num_ports; i++) {
110 char *name; 111 char *name;
111 112
113 dp = &ds->ports[i];
114
112 name = cd->port_names[i]; 115 name = cd->port_names[i];
113 if (name == NULL) 116 if (name == NULL)
114 continue; 117 continue;
118 dp->name = name;
115 119
116 if (!strcmp(name, "cpu")) { 120 if (!strcmp(name, "cpu")) {
117 if (dst->cpu_dp) { 121 if (dst->cpu_dp) {
@@ -120,12 +124,12 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
120 return -EINVAL; 124 return -EINVAL;
121 } 125 }
122 dst->cpu_dp = &ds->ports[i]; 126 dst->cpu_dp = &ds->ports[i];
123 dst->cpu_dp->netdev = master; 127 dst->cpu_dp->master = master;
124 ds->cpu_port_mask |= 1 << i; 128 dp->type = DSA_PORT_TYPE_CPU;
125 } else if (!strcmp(name, "dsa")) { 129 } else if (!strcmp(name, "dsa")) {
126 ds->dsa_port_mask |= 1 << i; 130 dp->type = DSA_PORT_TYPE_DSA;
127 } else { 131 } else {
128 ds->enabled_port_mask |= 1 << i; 132 dp->type = DSA_PORT_TYPE_USER;
129 } 133 }
130 valid_name_found = true; 134 valid_name_found = true;
131 } 135 }
@@ -136,7 +140,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
136 /* Make the built-in MII bus mask match the number of ports, 140 /* Make the built-in MII bus mask match the number of ports,
137 * switch drivers can override this later 141 * switch drivers can override this later
138 */ 142 */
139 ds->phys_mii_mask = ds->enabled_port_mask; 143 ds->phys_mii_mask |= dsa_user_ports(ds);
140 144
141 /* 145 /*
142 * If the CPU connects to this switch, set the switch tree 146 * If the CPU connects to this switch, set the switch tree
@@ -144,14 +148,19 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
144 * switch. 148 * switch.
145 */ 149 */
146 if (dst->cpu_dp->ds == ds) { 150 if (dst->cpu_dp->ds == ds) {
151 const struct dsa_device_ops *tag_ops;
147 enum dsa_tag_protocol tag_protocol; 152 enum dsa_tag_protocol tag_protocol;
148 153
149 tag_protocol = ops->get_tag_protocol(ds); 154 tag_protocol = ops->get_tag_protocol(ds, dst->cpu_dp->index);
150 dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); 155 tag_ops = dsa_resolve_tag_protocol(tag_protocol);
151 if (IS_ERR(dst->tag_ops)) 156 if (IS_ERR(tag_ops))
152 return PTR_ERR(dst->tag_ops); 157 return PTR_ERR(tag_ops);
158
159 dst->cpu_dp->tag_ops = tag_ops;
153 160
154 dst->rcv = dst->tag_ops->rcv; 161 /* Few copies for faster access in master receive hot path */
162 dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv;
163 dst->cpu_dp->dst = dst;
155 } 164 }
156 165
157 memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable)); 166 memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable));
@@ -167,12 +176,6 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
167 if (ret) 176 if (ret)
168 return ret; 177 return ret;
169 178
170 if (ops->set_addr) {
171 ret = ops->set_addr(ds, master->dev_addr);
172 if (ret < 0)
173 return ret;
174 }
175
176 if (!ds->slave_mii_bus && ops->phy_read) { 179 if (!ds->slave_mii_bus && ops->phy_read) {
177 ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); 180 ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
178 if (!ds->slave_mii_bus) 181 if (!ds->slave_mii_bus)
@@ -191,10 +194,10 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
191 ds->ports[i].dn = cd->port_dn[i]; 194 ds->ports[i].dn = cd->port_dn[i];
192 ds->ports[i].cpu_dp = dst->cpu_dp; 195 ds->ports[i].cpu_dp = dst->cpu_dp;
193 196
194 if (!(ds->enabled_port_mask & (1 << i))) 197 if (dsa_is_user_port(ds, i))
195 continue; 198 continue;
196 199
197 ret = dsa_slave_create(&ds->ports[i], cd->port_names[i]); 200 ret = dsa_slave_create(&ds->ports[i]);
198 if (ret < 0) 201 if (ret < 0)
199 netdev_err(master, "[%d]: can't create dsa slave device for port %d(%s): %d\n", 202 netdev_err(master, "[%d]: can't create dsa slave device for port %d(%s): %d\n",
200 index, i, cd->port_names[i], ret); 203 index, i, cd->port_names[i], ret);
@@ -206,10 +209,6 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
206 netdev_err(master, "[%d] : can't configure CPU and DSA ports\n", 209 netdev_err(master, "[%d] : can't configure CPU and DSA ports\n",
207 index); 210 index);
208 211
209 ret = dsa_cpu_port_ethtool_setup(ds->dst->cpu_dp);
210 if (ret)
211 return ret;
212
213 return 0; 212 return 0;
214} 213}
215 214
@@ -263,24 +262,20 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
263 262
264 /* Destroy network devices for physical switch ports. */ 263 /* Destroy network devices for physical switch ports. */
265 for (port = 0; port < ds->num_ports; port++) { 264 for (port = 0; port < ds->num_ports; port++) {
266 if (!(ds->enabled_port_mask & (1 << port))) 265 if (!dsa_is_user_port(ds, port))
267 continue; 266 continue;
268 267
269 if (!ds->ports[port].netdev) 268 if (!ds->ports[port].slave)
270 continue; 269 continue;
271 270
272 dsa_slave_destroy(ds->ports[port].netdev); 271 dsa_slave_destroy(ds->ports[port].slave);
273 } 272 }
274 273
275 /* Disable configuration of the CPU and DSA ports */ 274 /* Disable configuration of the CPU and DSA ports */
276 for (port = 0; port < ds->num_ports; port++) { 275 for (port = 0; port < ds->num_ports; port++) {
277 if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) 276 if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
278 continue; 277 continue;
279 dsa_cpu_dsa_destroy(&ds->ports[port]); 278 dsa_port_fixed_link_unregister_of(&ds->ports[port]);
280
281 /* Clearing a bit which is not set does no harm */
282 ds->cpu_port_mask |= ~(1 << port);
283 ds->dsa_port_mask |= ~(1 << port);
284 } 279 }
285 280
286 if (ds->slave_mii_bus && ds->ops->phy_read) 281 if (ds->slave_mii_bus && ds->ops->phy_read)
@@ -598,15 +593,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
598 if (!configured) 593 if (!configured)
599 return -EPROBE_DEFER; 594 return -EPROBE_DEFER;
600 595
601 /* 596 return dsa_master_setup(dst->cpu_dp->master, dst->cpu_dp);
602 * If we use a tagging format that doesn't have an ethertype
603 * field, make sure that all packets from this point on get
604 * sent to the tag format's receive function.
605 */
606 wmb();
607 dev->dsa_ptr = dst;
608
609 return 0;
610} 597}
611 598
612static int dsa_probe(struct platform_device *pdev) 599static int dsa_probe(struct platform_device *pdev)
@@ -671,13 +658,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
671{ 658{
672 int i; 659 int i;
673 660
674 dst->cpu_dp->netdev->dsa_ptr = NULL; 661 dsa_master_teardown(dst->cpu_dp->master);
675
676 /* If we used a tagging format that doesn't have an ethertype
677 * field, make sure that all packets from this point get sent
678 * without the tag and go through the regular receive path.
679 */
680 wmb();
681 662
682 for (i = 0; i < dst->pd->nr_chips; i++) { 663 for (i = 0; i < dst->pd->nr_chips; i++) {
683 struct dsa_switch *ds = dst->ds[i]; 664 struct dsa_switch *ds = dst->ds[i];
@@ -686,9 +667,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
686 dsa_switch_destroy(ds); 667 dsa_switch_destroy(ds);
687 } 668 }
688 669
689 dsa_cpu_port_ethtool_restore(dst->cpu_dp); 670 dev_put(dst->cpu_dp->master);
690
691 dev_put(dst->cpu_dp->netdev);
692} 671}
693 672
694static int dsa_remove(struct platform_device *pdev) 673static int dsa_remove(struct platform_device *pdev)
@@ -745,8 +724,7 @@ int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
745 const unsigned char *addr, u16 vid, 724 const unsigned char *addr, u16 vid,
746 u16 flags) 725 u16 flags)
747{ 726{
748 struct dsa_slave_priv *p = netdev_priv(dev); 727 struct dsa_port *dp = dsa_slave_to_port(dev);
749 struct dsa_port *dp = p->dp;
750 728
751 return dsa_port_fdb_add(dp, addr, vid); 729 return dsa_port_fdb_add(dp, addr, vid);
752} 730}
@@ -755,8 +733,7 @@ int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
755 struct net_device *dev, 733 struct net_device *dev,
756 const unsigned char *addr, u16 vid) 734 const unsigned char *addr, u16 vid)
757{ 735{
758 struct dsa_slave_priv *p = netdev_priv(dev); 736 struct dsa_port *dp = dsa_slave_to_port(dev);
759 struct dsa_port *dp = p->dp;
760 737
761 return dsa_port_fdb_del(dp, addr, vid); 738 return dsa_port_fdb_del(dp, addr, vid);
762} 739}
diff --git a/net/dsa/master.c b/net/dsa/master.c
new file mode 100644
index 000000000000..00589147f042
--- /dev/null
+++ b/net/dsa/master.c
@@ -0,0 +1,143 @@
1/*
2 * Handling of a master device, switching frames via its switch fabric CPU port
3 *
4 * Copyright (c) 2017 Savoir-faire Linux Inc.
5 * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 */
12
13#include "dsa_priv.h"
14
15static void dsa_master_get_ethtool_stats(struct net_device *dev,
16 struct ethtool_stats *stats,
17 uint64_t *data)
18{
19 struct dsa_port *cpu_dp = dev->dsa_ptr;
20 const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
21 struct dsa_switch *ds = cpu_dp->ds;
22 int port = cpu_dp->index;
23 int count = 0;
24
25 if (ops && ops->get_sset_count && ops->get_ethtool_stats) {
26 count = ops->get_sset_count(dev, ETH_SS_STATS);
27 ops->get_ethtool_stats(dev, stats, data);
28 }
29
30 if (ds->ops->get_ethtool_stats)
31 ds->ops->get_ethtool_stats(ds, port, data + count);
32}
33
34static int dsa_master_get_sset_count(struct net_device *dev, int sset)
35{
36 struct dsa_port *cpu_dp = dev->dsa_ptr;
37 const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
38 struct dsa_switch *ds = cpu_dp->ds;
39 int count = 0;
40
41 if (ops && ops->get_sset_count)
42 count += ops->get_sset_count(dev, sset);
43
44 if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
45 count += ds->ops->get_sset_count(ds);
46
47 return count;
48}
49
50static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
51 uint8_t *data)
52{
53 struct dsa_port *cpu_dp = dev->dsa_ptr;
54 const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
55 struct dsa_switch *ds = cpu_dp->ds;
56 int port = cpu_dp->index;
57 int len = ETH_GSTRING_LEN;
58 int mcount = 0, count;
59 unsigned int i;
60 uint8_t pfx[4];
61 uint8_t *ndata;
62
63 snprintf(pfx, sizeof(pfx), "p%.2d", port);
64 /* We do not want to be NULL-terminated, since this is a prefix */
65 pfx[sizeof(pfx) - 1] = '_';
66
67 if (ops && ops->get_sset_count && ops->get_strings) {
68 mcount = ops->get_sset_count(dev, ETH_SS_STATS);
69 ops->get_strings(dev, stringset, data);
70 }
71
72 if (stringset == ETH_SS_STATS && ds->ops->get_strings) {
73 ndata = data + mcount * len;
74 /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
75 * the output after to prepend our CPU port prefix we
76 * constructed earlier
77 */
78 ds->ops->get_strings(ds, port, ndata);
79 count = ds->ops->get_sset_count(ds);
80 for (i = 0; i < count; i++) {
81 memmove(ndata + (i * len + sizeof(pfx)),
82 ndata + i * len, len - sizeof(pfx));
83 memcpy(ndata + i * len, pfx, sizeof(pfx));
84 }
85 }
86}
87
88static int dsa_master_ethtool_setup(struct net_device *dev)
89{
90 struct dsa_port *cpu_dp = dev->dsa_ptr;
91 struct dsa_switch *ds = cpu_dp->ds;
92 struct ethtool_ops *ops;
93
94 ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL);
95 if (!ops)
96 return -ENOMEM;
97
98 cpu_dp->orig_ethtool_ops = dev->ethtool_ops;
99 if (cpu_dp->orig_ethtool_ops)
100 memcpy(ops, cpu_dp->orig_ethtool_ops, sizeof(*ops));
101
102 ops->get_sset_count = dsa_master_get_sset_count;
103 ops->get_ethtool_stats = dsa_master_get_ethtool_stats;
104 ops->get_strings = dsa_master_get_strings;
105
106 dev->ethtool_ops = ops;
107
108 return 0;
109}
110
111static void dsa_master_ethtool_teardown(struct net_device *dev)
112{
113 struct dsa_port *cpu_dp = dev->dsa_ptr;
114
115 dev->ethtool_ops = cpu_dp->orig_ethtool_ops;
116 cpu_dp->orig_ethtool_ops = NULL;
117}
118
119int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
120{
121 /* If we use a tagging format that doesn't have an ethertype
122 * field, make sure that all packets from this point on get
123 * sent to the tag format's receive function.
124 */
125 wmb();
126
127 dev->dsa_ptr = cpu_dp;
128
129 return dsa_master_ethtool_setup(dev);
130}
131
132void dsa_master_teardown(struct net_device *dev)
133{
134 dsa_master_ethtool_teardown(dev);
135
136 dev->dsa_ptr = NULL;
137
138 /* If we used a tagging format that doesn't have an ethertype
139 * field, make sure that all packets from this point get sent
140 * without the tag and go through the regular receive path.
141 */
142 wmb();
143}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 659676ba3f8b..bb4be2679904 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -12,10 +12,12 @@
12 12
13#include <linux/if_bridge.h> 13#include <linux/if_bridge.h>
14#include <linux/notifier.h> 14#include <linux/notifier.h>
15#include <linux/of_mdio.h>
16#include <linux/of_net.h>
15 17
16#include "dsa_priv.h" 18#include "dsa_priv.h"
17 19
18static int dsa_port_notify(struct dsa_port *dp, unsigned long e, void *v) 20static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v)
19{ 21{
20 struct raw_notifier_head *nh = &dp->ds->dst->nh; 22 struct raw_notifier_head *nh = &dp->ds->dst->nh;
21 int err; 23 int err;
@@ -56,7 +58,7 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state,
56 return 0; 58 return 0;
57} 59}
58 60
59void dsa_port_set_state_now(struct dsa_port *dp, u8 state) 61static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
60{ 62{
61 int err; 63 int err;
62 64
@@ -65,6 +67,35 @@ void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
65 pr_err("DSA: failed to set STP state %u (%d)\n", state, err); 67 pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
66} 68}
67 69
70int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
71{
72 u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING;
73 struct dsa_switch *ds = dp->ds;
74 int port = dp->index;
75 int err;
76
77 if (ds->ops->port_enable) {
78 err = ds->ops->port_enable(ds, port, phy);
79 if (err)
80 return err;
81 }
82
83 dsa_port_set_state_now(dp, stp_state);
84
85 return 0;
86}
87
88void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy)
89{
90 struct dsa_switch *ds = dp->ds;
91 int port = dp->index;
92
93 dsa_port_set_state_now(dp, BR_STATE_DISABLED);
94
95 if (ds->ops->port_disable)
96 ds->ops->port_disable(ds, port, phy);
97}
98
68int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br) 99int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
69{ 100{
70 struct dsa_notifier_bridge_info info = { 101 struct dsa_notifier_bridge_info info = {
@@ -173,7 +204,18 @@ int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
173 return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info); 204 return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info);
174} 205}
175 206
176int dsa_port_mdb_add(struct dsa_port *dp, 207int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data)
208{
209 struct dsa_switch *ds = dp->ds;
210 int port = dp->index;
211
212 if (!ds->ops->port_fdb_dump)
213 return -EOPNOTSUPP;
214
215 return ds->ops->port_fdb_dump(ds, port, cb, data);
216}
217
218int dsa_port_mdb_add(const struct dsa_port *dp,
177 const struct switchdev_obj_port_mdb *mdb, 219 const struct switchdev_obj_port_mdb *mdb,
178 struct switchdev_trans *trans) 220 struct switchdev_trans *trans)
179{ 221{
@@ -187,7 +229,7 @@ int dsa_port_mdb_add(struct dsa_port *dp,
187 return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info); 229 return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info);
188} 230}
189 231
190int dsa_port_mdb_del(struct dsa_port *dp, 232int dsa_port_mdb_del(const struct dsa_port *dp,
191 const struct switchdev_obj_port_mdb *mdb) 233 const struct switchdev_obj_port_mdb *mdb)
192{ 234{
193 struct dsa_notifier_mdb_info info = { 235 struct dsa_notifier_mdb_info info = {
@@ -210,7 +252,10 @@ int dsa_port_vlan_add(struct dsa_port *dp,
210 .vlan = vlan, 252 .vlan = vlan,
211 }; 253 };
212 254
213 return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info); 255 if (br_vlan_enabled(dp->bridge_dev))
256 return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
257
258 return 0;
214} 259}
215 260
216int dsa_port_vlan_del(struct dsa_port *dp, 261int dsa_port_vlan_del(struct dsa_port *dp,
@@ -222,5 +267,53 @@ int dsa_port_vlan_del(struct dsa_port *dp,
222 .vlan = vlan, 267 .vlan = vlan,
223 }; 268 };
224 269
225 return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info); 270 if (br_vlan_enabled(dp->bridge_dev))
271 return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
272
273 return 0;
274}
275
276int dsa_port_fixed_link_register_of(struct dsa_port *dp)
277{
278 struct device_node *dn = dp->dn;
279 struct dsa_switch *ds = dp->ds;
280 struct phy_device *phydev;
281 int port = dp->index;
282 int mode;
283 int err;
284
285 if (of_phy_is_fixed_link(dn)) {
286 err = of_phy_register_fixed_link(dn);
287 if (err) {
288 dev_err(ds->dev,
289 "failed to register the fixed PHY of port %d\n",
290 port);
291 return err;
292 }
293
294 phydev = of_phy_find_device(dn);
295
296 mode = of_get_phy_mode(dn);
297 if (mode < 0)
298 mode = PHY_INTERFACE_MODE_NA;
299 phydev->interface = mode;
300
301 genphy_config_init(phydev);
302 genphy_read_status(phydev);
303
304 if (ds->ops->adjust_link)
305 ds->ops->adjust_link(ds, port, phydev);
306
307 put_device(&phydev->mdio.dev);
308 }
309
310 return 0;
311}
312
313void dsa_port_fixed_link_unregister_of(struct dsa_port *dp)
314{
315 struct device_node *dn = dp->dn;
316
317 if (of_phy_is_fixed_link(dn))
318 of_phy_deregister_fixed_link(dn);
226} 319}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 2afa99506f8b..d6e7a642493b 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -55,7 +55,7 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
55 ds->slave_mii_bus->read = dsa_slave_phy_read; 55 ds->slave_mii_bus->read = dsa_slave_phy_read;
56 ds->slave_mii_bus->write = dsa_slave_phy_write; 56 ds->slave_mii_bus->write = dsa_slave_phy_write;
57 snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d.%d", 57 snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d.%d",
58 ds->dst->tree, ds->index); 58 ds->dst->index, ds->index);
59 ds->slave_mii_bus->parent = ds->dev; 59 ds->slave_mii_bus->parent = ds->dev;
60 ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask; 60 ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask;
61} 61}
@@ -64,18 +64,13 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
64/* slave device handling ****************************************************/ 64/* slave device handling ****************************************************/
65static int dsa_slave_get_iflink(const struct net_device *dev) 65static int dsa_slave_get_iflink(const struct net_device *dev)
66{ 66{
67 struct dsa_slave_priv *p = netdev_priv(dev); 67 return dsa_slave_to_master(dev)->ifindex;
68
69 return dsa_master_netdev(p)->ifindex;
70} 68}
71 69
72static int dsa_slave_open(struct net_device *dev) 70static int dsa_slave_open(struct net_device *dev)
73{ 71{
74 struct dsa_slave_priv *p = netdev_priv(dev); 72 struct net_device *master = dsa_slave_to_master(dev);
75 struct dsa_port *dp = p->dp; 73 struct dsa_port *dp = dsa_slave_to_port(dev);
76 struct dsa_switch *ds = dp->ds;
77 struct net_device *master = dsa_master_netdev(p);
78 u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING;
79 int err; 74 int err;
80 75
81 if (!(master->flags & IFF_UP)) 76 if (!(master->flags & IFF_UP))
@@ -98,16 +93,12 @@ static int dsa_slave_open(struct net_device *dev)
98 goto clear_allmulti; 93 goto clear_allmulti;
99 } 94 }
100 95
101 if (ds->ops->port_enable) { 96 err = dsa_port_enable(dp, dev->phydev);
102 err = ds->ops->port_enable(ds, p->dp->index, p->phy); 97 if (err)
103 if (err) 98 goto clear_promisc;
104 goto clear_promisc;
105 }
106
107 dsa_port_set_state_now(p->dp, stp_state);
108 99
109 if (p->phy) 100 if (dev->phydev)
110 phy_start(p->phy); 101 phy_start(dev->phydev);
111 102
112 return 0; 103 return 0;
113 104
@@ -126,12 +117,13 @@ out:
126 117
127static int dsa_slave_close(struct net_device *dev) 118static int dsa_slave_close(struct net_device *dev)
128{ 119{
129 struct dsa_slave_priv *p = netdev_priv(dev); 120 struct net_device *master = dsa_slave_to_master(dev);
130 struct net_device *master = dsa_master_netdev(p); 121 struct dsa_port *dp = dsa_slave_to_port(dev);
131 struct dsa_switch *ds = p->dp->ds; 122
123 if (dev->phydev)
124 phy_stop(dev->phydev);
132 125
133 if (p->phy) 126 dsa_port_disable(dp, dev->phydev);
134 phy_stop(p->phy);
135 127
136 dev_mc_unsync(master, dev); 128 dev_mc_unsync(master, dev);
137 dev_uc_unsync(master, dev); 129 dev_uc_unsync(master, dev);
@@ -143,18 +135,12 @@ static int dsa_slave_close(struct net_device *dev)
143 if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) 135 if (!ether_addr_equal(dev->dev_addr, master->dev_addr))
144 dev_uc_del(master, dev->dev_addr); 136 dev_uc_del(master, dev->dev_addr);
145 137
146 if (ds->ops->port_disable)
147 ds->ops->port_disable(ds, p->dp->index, p->phy);
148
149 dsa_port_set_state_now(p->dp, BR_STATE_DISABLED);
150
151 return 0; 138 return 0;
152} 139}
153 140
154static void dsa_slave_change_rx_flags(struct net_device *dev, int change) 141static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
155{ 142{
156 struct dsa_slave_priv *p = netdev_priv(dev); 143 struct net_device *master = dsa_slave_to_master(dev);
157 struct net_device *master = dsa_master_netdev(p);
158 144
159 if (change & IFF_ALLMULTI) 145 if (change & IFF_ALLMULTI)
160 dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); 146 dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1);
@@ -164,8 +150,7 @@ static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
164 150
165static void dsa_slave_set_rx_mode(struct net_device *dev) 151static void dsa_slave_set_rx_mode(struct net_device *dev)
166{ 152{
167 struct dsa_slave_priv *p = netdev_priv(dev); 153 struct net_device *master = dsa_slave_to_master(dev);
168 struct net_device *master = dsa_master_netdev(p);
169 154
170 dev_mc_sync(master, dev); 155 dev_mc_sync(master, dev);
171 dev_uc_sync(master, dev); 156 dev_uc_sync(master, dev);
@@ -173,8 +158,7 @@ static void dsa_slave_set_rx_mode(struct net_device *dev)
173 158
174static int dsa_slave_set_mac_address(struct net_device *dev, void *a) 159static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
175{ 160{
176 struct dsa_slave_priv *p = netdev_priv(dev); 161 struct net_device *master = dsa_slave_to_master(dev);
177 struct net_device *master = dsa_master_netdev(p);
178 struct sockaddr *addr = a; 162 struct sockaddr *addr = a;
179 int err; 163 int err;
180 164
@@ -255,43 +239,34 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
255 struct net_device *dev, struct net_device *filter_dev, 239 struct net_device *dev, struct net_device *filter_dev,
256 int *idx) 240 int *idx)
257{ 241{
242 struct dsa_port *dp = dsa_slave_to_port(dev);
258 struct dsa_slave_dump_ctx dump = { 243 struct dsa_slave_dump_ctx dump = {
259 .dev = dev, 244 .dev = dev,
260 .skb = skb, 245 .skb = skb,
261 .cb = cb, 246 .cb = cb,
262 .idx = *idx, 247 .idx = *idx,
263 }; 248 };
264 struct dsa_slave_priv *p = netdev_priv(dev);
265 struct dsa_port *dp = p->dp;
266 struct dsa_switch *ds = dp->ds;
267 int err; 249 int err;
268 250
269 if (!ds->ops->port_fdb_dump) 251 err = dsa_port_fdb_dump(dp, dsa_slave_port_fdb_do_dump, &dump);
270 return -EOPNOTSUPP;
271
272 err = ds->ops->port_fdb_dump(ds, dp->index,
273 dsa_slave_port_fdb_do_dump,
274 &dump);
275 *idx = dump.idx; 252 *idx = dump.idx;
253
276 return err; 254 return err;
277} 255}
278 256
279static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 257static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
280{ 258{
281 struct dsa_slave_priv *p = netdev_priv(dev); 259 if (!dev->phydev)
282 260 return -ENODEV;
283 if (p->phy != NULL)
284 return phy_mii_ioctl(p->phy, ifr, cmd);
285 261
286 return -EOPNOTSUPP; 262 return phy_mii_ioctl(dev->phydev, ifr, cmd);
287} 263}
288 264
289static int dsa_slave_port_attr_set(struct net_device *dev, 265static int dsa_slave_port_attr_set(struct net_device *dev,
290 const struct switchdev_attr *attr, 266 const struct switchdev_attr *attr,
291 struct switchdev_trans *trans) 267 struct switchdev_trans *trans)
292{ 268{
293 struct dsa_slave_priv *p = netdev_priv(dev); 269 struct dsa_port *dp = dsa_slave_to_port(dev);
294 struct dsa_port *dp = p->dp;
295 int ret; 270 int ret;
296 271
297 switch (attr->id) { 272 switch (attr->id) {
@@ -317,8 +292,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
317 const struct switchdev_obj *obj, 292 const struct switchdev_obj *obj,
318 struct switchdev_trans *trans) 293 struct switchdev_trans *trans)
319{ 294{
320 struct dsa_slave_priv *p = netdev_priv(dev); 295 struct dsa_port *dp = dsa_slave_to_port(dev);
321 struct dsa_port *dp = p->dp;
322 int err; 296 int err;
323 297
324 /* For the prepare phase, ensure the full set of changes is feasable in 298 /* For the prepare phase, ensure the full set of changes is feasable in
@@ -330,6 +304,13 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
330 case SWITCHDEV_OBJ_ID_PORT_MDB: 304 case SWITCHDEV_OBJ_ID_PORT_MDB:
331 err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans); 305 err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans);
332 break; 306 break;
307 case SWITCHDEV_OBJ_ID_HOST_MDB:
308 /* DSA can directly translate this to a normal MDB add,
309 * but on the CPU port.
310 */
311 err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj),
312 trans);
313 break;
333 case SWITCHDEV_OBJ_ID_PORT_VLAN: 314 case SWITCHDEV_OBJ_ID_PORT_VLAN:
334 err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), 315 err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj),
335 trans); 316 trans);
@@ -345,14 +326,19 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
345static int dsa_slave_port_obj_del(struct net_device *dev, 326static int dsa_slave_port_obj_del(struct net_device *dev,
346 const struct switchdev_obj *obj) 327 const struct switchdev_obj *obj)
347{ 328{
348 struct dsa_slave_priv *p = netdev_priv(dev); 329 struct dsa_port *dp = dsa_slave_to_port(dev);
349 struct dsa_port *dp = p->dp;
350 int err; 330 int err;
351 331
352 switch (obj->id) { 332 switch (obj->id) {
353 case SWITCHDEV_OBJ_ID_PORT_MDB: 333 case SWITCHDEV_OBJ_ID_PORT_MDB:
354 err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); 334 err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
355 break; 335 break;
336 case SWITCHDEV_OBJ_ID_HOST_MDB:
337 /* DSA can directly translate this to a normal MDB add,
338 * but on the CPU port.
339 */
340 err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
341 break;
356 case SWITCHDEV_OBJ_ID_PORT_VLAN: 342 case SWITCHDEV_OBJ_ID_PORT_VLAN:
357 err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj)); 343 err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
358 break; 344 break;
@@ -367,13 +353,14 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
367static int dsa_slave_port_attr_get(struct net_device *dev, 353static int dsa_slave_port_attr_get(struct net_device *dev,
368 struct switchdev_attr *attr) 354 struct switchdev_attr *attr)
369{ 355{
370 struct dsa_slave_priv *p = netdev_priv(dev); 356 struct dsa_port *dp = dsa_slave_to_port(dev);
371 struct dsa_switch *ds = p->dp->ds; 357 struct dsa_switch *ds = dp->ds;
358 struct dsa_switch_tree *dst = ds->dst;
372 359
373 switch (attr->id) { 360 switch (attr->id) {
374 case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: 361 case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
375 attr->u.ppid.id_len = sizeof(ds->index); 362 attr->u.ppid.id_len = sizeof(dst->index);
376 memcpy(&attr->u.ppid.id, &ds->index, attr->u.ppid.id_len); 363 memcpy(&attr->u.ppid.id, &dst->index, attr->u.ppid.id_len);
377 break; 364 break;
378 case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: 365 case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
379 attr->u.brport_flags_support = 0; 366 attr->u.brport_flags_support = 0;
@@ -385,10 +372,12 @@ static int dsa_slave_port_attr_get(struct net_device *dev,
385 return 0; 372 return 0;
386} 373}
387 374
388static inline netdev_tx_t dsa_netpoll_send_skb(struct dsa_slave_priv *p, 375static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
389 struct sk_buff *skb) 376 struct sk_buff *skb)
390{ 377{
391#ifdef CONFIG_NET_POLL_CONTROLLER 378#ifdef CONFIG_NET_POLL_CONTROLLER
379 struct dsa_slave_priv *p = netdev_priv(dev);
380
392 if (p->netpoll) 381 if (p->netpoll)
393 netpoll_send_skb(p->netpoll, skb); 382 netpoll_send_skb(p->netpoll, skb);
394#else 383#else
@@ -422,43 +411,18 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
422 * tag to be successfully transmitted 411 * tag to be successfully transmitted
423 */ 412 */
424 if (unlikely(netpoll_tx_running(dev))) 413 if (unlikely(netpoll_tx_running(dev)))
425 return dsa_netpoll_send_skb(p, nskb); 414 return dsa_slave_netpoll_send_skb(dev, nskb);
426 415
427 /* Queue the SKB for transmission on the parent interface, but 416 /* Queue the SKB for transmission on the parent interface, but
428 * do not modify its EtherType 417 * do not modify its EtherType
429 */ 418 */
430 nskb->dev = dsa_master_netdev(p); 419 nskb->dev = dsa_slave_to_master(dev);
431 dev_queue_xmit(nskb); 420 dev_queue_xmit(nskb);
432 421
433 return NETDEV_TX_OK; 422 return NETDEV_TX_OK;
434} 423}
435 424
436/* ethtool operations *******************************************************/ 425/* ethtool operations *******************************************************/
437static int
438dsa_slave_get_link_ksettings(struct net_device *dev,
439 struct ethtool_link_ksettings *cmd)
440{
441 struct dsa_slave_priv *p = netdev_priv(dev);
442
443 if (!p->phy)
444 return -EOPNOTSUPP;
445
446 phy_ethtool_ksettings_get(p->phy, cmd);
447
448 return 0;
449}
450
451static int
452dsa_slave_set_link_ksettings(struct net_device *dev,
453 const struct ethtool_link_ksettings *cmd)
454{
455 struct dsa_slave_priv *p = netdev_priv(dev);
456
457 if (p->phy != NULL)
458 return phy_ethtool_ksettings_set(p->phy, cmd);
459
460 return -EOPNOTSUPP;
461}
462 426
463static void dsa_slave_get_drvinfo(struct net_device *dev, 427static void dsa_slave_get_drvinfo(struct net_device *dev,
464 struct ethtool_drvinfo *drvinfo) 428 struct ethtool_drvinfo *drvinfo)
@@ -470,11 +434,11 @@ static void dsa_slave_get_drvinfo(struct net_device *dev,
470 434
471static int dsa_slave_get_regs_len(struct net_device *dev) 435static int dsa_slave_get_regs_len(struct net_device *dev)
472{ 436{
473 struct dsa_slave_priv *p = netdev_priv(dev); 437 struct dsa_port *dp = dsa_slave_to_port(dev);
474 struct dsa_switch *ds = p->dp->ds; 438 struct dsa_switch *ds = dp->ds;
475 439
476 if (ds->ops->get_regs_len) 440 if (ds->ops->get_regs_len)
477 return ds->ops->get_regs_len(ds, p->dp->index); 441 return ds->ops->get_regs_len(ds, dp->index);
478 442
479 return -EOPNOTSUPP; 443 return -EOPNOTSUPP;
480} 444}
@@ -482,39 +446,27 @@ static int dsa_slave_get_regs_len(struct net_device *dev)
482static void 446static void
483dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) 447dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p)
484{ 448{
485 struct dsa_slave_priv *p = netdev_priv(dev); 449 struct dsa_port *dp = dsa_slave_to_port(dev);
486 struct dsa_switch *ds = p->dp->ds; 450 struct dsa_switch *ds = dp->ds;
487 451
488 if (ds->ops->get_regs) 452 if (ds->ops->get_regs)
489 ds->ops->get_regs(ds, p->dp->index, regs, _p); 453 ds->ops->get_regs(ds, dp->index, regs, _p);
490}
491
492static int dsa_slave_nway_reset(struct net_device *dev)
493{
494 struct dsa_slave_priv *p = netdev_priv(dev);
495
496 if (p->phy != NULL)
497 return genphy_restart_aneg(p->phy);
498
499 return -EOPNOTSUPP;
500} 454}
501 455
502static u32 dsa_slave_get_link(struct net_device *dev) 456static u32 dsa_slave_get_link(struct net_device *dev)
503{ 457{
504 struct dsa_slave_priv *p = netdev_priv(dev); 458 if (!dev->phydev)
459 return -ENODEV;
505 460
506 if (p->phy != NULL) { 461 genphy_update_link(dev->phydev);
507 genphy_update_link(p->phy);
508 return p->phy->link;
509 }
510 462
511 return -EOPNOTSUPP; 463 return dev->phydev->link;
512} 464}
513 465
514static int dsa_slave_get_eeprom_len(struct net_device *dev) 466static int dsa_slave_get_eeprom_len(struct net_device *dev)
515{ 467{
516 struct dsa_slave_priv *p = netdev_priv(dev); 468 struct dsa_port *dp = dsa_slave_to_port(dev);
517 struct dsa_switch *ds = p->dp->ds; 469 struct dsa_switch *ds = dp->ds;
518 470
519 if (ds->cd && ds->cd->eeprom_len) 471 if (ds->cd && ds->cd->eeprom_len)
520 return ds->cd->eeprom_len; 472 return ds->cd->eeprom_len;
@@ -528,8 +480,8 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev)
528static int dsa_slave_get_eeprom(struct net_device *dev, 480static int dsa_slave_get_eeprom(struct net_device *dev,
529 struct ethtool_eeprom *eeprom, u8 *data) 481 struct ethtool_eeprom *eeprom, u8 *data)
530{ 482{
531 struct dsa_slave_priv *p = netdev_priv(dev); 483 struct dsa_port *dp = dsa_slave_to_port(dev);
532 struct dsa_switch *ds = p->dp->ds; 484 struct dsa_switch *ds = dp->ds;
533 485
534 if (ds->ops->get_eeprom) 486 if (ds->ops->get_eeprom)
535 return ds->ops->get_eeprom(ds, eeprom, data); 487 return ds->ops->get_eeprom(ds, eeprom, data);
@@ -540,8 +492,8 @@ static int dsa_slave_get_eeprom(struct net_device *dev,
540static int dsa_slave_set_eeprom(struct net_device *dev, 492static int dsa_slave_set_eeprom(struct net_device *dev,
541 struct ethtool_eeprom *eeprom, u8 *data) 493 struct ethtool_eeprom *eeprom, u8 *data)
542{ 494{
543 struct dsa_slave_priv *p = netdev_priv(dev); 495 struct dsa_port *dp = dsa_slave_to_port(dev);
544 struct dsa_switch *ds = p->dp->ds; 496 struct dsa_switch *ds = dp->ds;
545 497
546 if (ds->ops->set_eeprom) 498 if (ds->ops->set_eeprom)
547 return ds->ops->set_eeprom(ds, eeprom, data); 499 return ds->ops->set_eeprom(ds, eeprom, data);
@@ -552,8 +504,8 @@ static int dsa_slave_set_eeprom(struct net_device *dev,
552static void dsa_slave_get_strings(struct net_device *dev, 504static void dsa_slave_get_strings(struct net_device *dev,
553 uint32_t stringset, uint8_t *data) 505 uint32_t stringset, uint8_t *data)
554{ 506{
555 struct dsa_slave_priv *p = netdev_priv(dev); 507 struct dsa_port *dp = dsa_slave_to_port(dev);
556 struct dsa_switch *ds = p->dp->ds; 508 struct dsa_switch *ds = dp->ds;
557 509
558 if (stringset == ETH_SS_STATS) { 510 if (stringset == ETH_SS_STATS) {
559 int len = ETH_GSTRING_LEN; 511 int len = ETH_GSTRING_LEN;
@@ -563,80 +515,7 @@ static void dsa_slave_get_strings(struct net_device *dev,
563 strncpy(data + 2 * len, "rx_packets", len); 515 strncpy(data + 2 * len, "rx_packets", len);
564 strncpy(data + 3 * len, "rx_bytes", len); 516 strncpy(data + 3 * len, "rx_bytes", len);
565 if (ds->ops->get_strings) 517 if (ds->ops->get_strings)
566 ds->ops->get_strings(ds, p->dp->index, data + 4 * len); 518 ds->ops->get_strings(ds, dp->index, data + 4 * len);
567 }
568}
569
570static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev,
571 struct ethtool_stats *stats,
572 uint64_t *data)
573{
574 struct dsa_switch_tree *dst = dev->dsa_ptr;
575 struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
576 struct dsa_switch *ds = cpu_dp->ds;
577 s8 cpu_port = cpu_dp->index;
578 int count = 0;
579
580 if (cpu_dp->ethtool_ops.get_sset_count) {
581 count = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS);
582 cpu_dp->ethtool_ops.get_ethtool_stats(dev, stats, data);
583 }
584
585 if (ds->ops->get_ethtool_stats)
586 ds->ops->get_ethtool_stats(ds, cpu_port, data + count);
587}
588
589static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset)
590{
591 struct dsa_switch_tree *dst = dev->dsa_ptr;
592 struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
593 struct dsa_switch *ds = cpu_dp->ds;
594 int count = 0;
595
596 if (cpu_dp->ethtool_ops.get_sset_count)
597 count += cpu_dp->ethtool_ops.get_sset_count(dev, sset);
598
599 if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
600 count += ds->ops->get_sset_count(ds);
601
602 return count;
603}
604
605static void dsa_cpu_port_get_strings(struct net_device *dev,
606 uint32_t stringset, uint8_t *data)
607{
608 struct dsa_switch_tree *dst = dev->dsa_ptr;
609 struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
610 struct dsa_switch *ds = cpu_dp->ds;
611 s8 cpu_port = cpu_dp->index;
612 int len = ETH_GSTRING_LEN;
613 int mcount = 0, count;
614 unsigned int i;
615 uint8_t pfx[4];
616 uint8_t *ndata;
617
618 snprintf(pfx, sizeof(pfx), "p%.2d", cpu_port);
619 /* We do not want to be NULL-terminated, since this is a prefix */
620 pfx[sizeof(pfx) - 1] = '_';
621
622 if (cpu_dp->ethtool_ops.get_sset_count) {
623 mcount = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS);
624 cpu_dp->ethtool_ops.get_strings(dev, stringset, data);
625 }
626
627 if (stringset == ETH_SS_STATS && ds->ops->get_strings) {
628 ndata = data + mcount * len;
629 /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
630 * the output after to prepend our CPU port prefix we
631 * constructed earlier
632 */
633 ds->ops->get_strings(ds, cpu_port, ndata);
634 count = ds->ops->get_sset_count(ds);
635 for (i = 0; i < count; i++) {
636 memmove(ndata + (i * len + sizeof(pfx)),
637 ndata + i * len, len - sizeof(pfx));
638 memcpy(ndata + i * len, pfx, sizeof(pfx));
639 }
640 } 519 }
641} 520}
642 521
@@ -644,8 +523,9 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev,
644 struct ethtool_stats *stats, 523 struct ethtool_stats *stats,
645 uint64_t *data) 524 uint64_t *data)
646{ 525{
526 struct dsa_port *dp = dsa_slave_to_port(dev);
647 struct dsa_slave_priv *p = netdev_priv(dev); 527 struct dsa_slave_priv *p = netdev_priv(dev);
648 struct dsa_switch *ds = p->dp->ds; 528 struct dsa_switch *ds = dp->ds;
649 struct pcpu_sw_netstats *s; 529 struct pcpu_sw_netstats *s;
650 unsigned int start; 530 unsigned int start;
651 int i; 531 int i;
@@ -667,13 +547,13 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev,
667 data[3] += rx_bytes; 547 data[3] += rx_bytes;
668 } 548 }
669 if (ds->ops->get_ethtool_stats) 549 if (ds->ops->get_ethtool_stats)
670 ds->ops->get_ethtool_stats(ds, p->dp->index, data + 4); 550 ds->ops->get_ethtool_stats(ds, dp->index, data + 4);
671} 551}
672 552
673static int dsa_slave_get_sset_count(struct net_device *dev, int sset) 553static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
674{ 554{
675 struct dsa_slave_priv *p = netdev_priv(dev); 555 struct dsa_port *dp = dsa_slave_to_port(dev);
676 struct dsa_switch *ds = p->dp->ds; 556 struct dsa_switch *ds = dp->ds;
677 557
678 if (sset == ETH_SS_STATS) { 558 if (sset == ETH_SS_STATS) {
679 int count; 559 int count;
@@ -690,77 +570,77 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
690 570
691static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w) 571static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w)
692{ 572{
693 struct dsa_slave_priv *p = netdev_priv(dev); 573 struct dsa_port *dp = dsa_slave_to_port(dev);
694 struct dsa_switch *ds = p->dp->ds; 574 struct dsa_switch *ds = dp->ds;
695 575
696 if (ds->ops->get_wol) 576 if (ds->ops->get_wol)
697 ds->ops->get_wol(ds, p->dp->index, w); 577 ds->ops->get_wol(ds, dp->index, w);
698} 578}
699 579
700static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w) 580static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
701{ 581{
702 struct dsa_slave_priv *p = netdev_priv(dev); 582 struct dsa_port *dp = dsa_slave_to_port(dev);
703 struct dsa_switch *ds = p->dp->ds; 583 struct dsa_switch *ds = dp->ds;
704 int ret = -EOPNOTSUPP; 584 int ret = -EOPNOTSUPP;
705 585
706 if (ds->ops->set_wol) 586 if (ds->ops->set_wol)
707 ret = ds->ops->set_wol(ds, p->dp->index, w); 587 ret = ds->ops->set_wol(ds, dp->index, w);
708 588
709 return ret; 589 return ret;
710} 590}
711 591
712static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) 592static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
713{ 593{
714 struct dsa_slave_priv *p = netdev_priv(dev); 594 struct dsa_port *dp = dsa_slave_to_port(dev);
715 struct dsa_switch *ds = p->dp->ds; 595 struct dsa_switch *ds = dp->ds;
716 int ret; 596 int ret;
717 597
718 /* Port's PHY and MAC both need to be EEE capable */ 598 /* Port's PHY and MAC both need to be EEE capable */
719 if (!p->phy) 599 if (!dev->phydev)
720 return -ENODEV; 600 return -ENODEV;
721 601
722 if (!ds->ops->set_mac_eee) 602 if (!ds->ops->set_mac_eee)
723 return -EOPNOTSUPP; 603 return -EOPNOTSUPP;
724 604
725 ret = ds->ops->set_mac_eee(ds, p->dp->index, e); 605 ret = ds->ops->set_mac_eee(ds, dp->index, e);
726 if (ret) 606 if (ret)
727 return ret; 607 return ret;
728 608
729 if (e->eee_enabled) { 609 if (e->eee_enabled) {
730 ret = phy_init_eee(p->phy, 0); 610 ret = phy_init_eee(dev->phydev, 0);
731 if (ret) 611 if (ret)
732 return ret; 612 return ret;
733 } 613 }
734 614
735 return phy_ethtool_set_eee(p->phy, e); 615 return phy_ethtool_set_eee(dev->phydev, e);
736} 616}
737 617
738static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) 618static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
739{ 619{
740 struct dsa_slave_priv *p = netdev_priv(dev); 620 struct dsa_port *dp = dsa_slave_to_port(dev);
741 struct dsa_switch *ds = p->dp->ds; 621 struct dsa_switch *ds = dp->ds;
742 int ret; 622 int ret;
743 623
744 /* Port's PHY and MAC both need to be EEE capable */ 624 /* Port's PHY and MAC both need to be EEE capable */
745 if (!p->phy) 625 if (!dev->phydev)
746 return -ENODEV; 626 return -ENODEV;
747 627
748 if (!ds->ops->get_mac_eee) 628 if (!ds->ops->get_mac_eee)
749 return -EOPNOTSUPP; 629 return -EOPNOTSUPP;
750 630
751 ret = ds->ops->get_mac_eee(ds, p->dp->index, e); 631 ret = ds->ops->get_mac_eee(ds, dp->index, e);
752 if (ret) 632 if (ret)
753 return ret; 633 return ret;
754 634
755 return phy_ethtool_get_eee(p->phy, e); 635 return phy_ethtool_get_eee(dev->phydev, e);
756} 636}
757 637
758#ifdef CONFIG_NET_POLL_CONTROLLER 638#ifdef CONFIG_NET_POLL_CONTROLLER
759static int dsa_slave_netpoll_setup(struct net_device *dev, 639static int dsa_slave_netpoll_setup(struct net_device *dev,
760 struct netpoll_info *ni) 640 struct netpoll_info *ni)
761{ 641{
642 struct net_device *master = dsa_slave_to_master(dev);
762 struct dsa_slave_priv *p = netdev_priv(dev); 643 struct dsa_slave_priv *p = netdev_priv(dev);
763 struct net_device *master = dsa_master_netdev(p);
764 struct netpoll *netpoll; 644 struct netpoll *netpoll;
765 int err = 0; 645 int err = 0;
766 646
@@ -800,18 +680,18 @@ static void dsa_slave_poll_controller(struct net_device *dev)
800static int dsa_slave_get_phys_port_name(struct net_device *dev, 680static int dsa_slave_get_phys_port_name(struct net_device *dev,
801 char *name, size_t len) 681 char *name, size_t len)
802{ 682{
803 struct dsa_slave_priv *p = netdev_priv(dev); 683 struct dsa_port *dp = dsa_slave_to_port(dev);
804 684
805 if (snprintf(name, len, "p%d", p->dp->index) >= len) 685 if (snprintf(name, len, "p%d", dp->index) >= len)
806 return -EINVAL; 686 return -EINVAL;
807 687
808 return 0; 688 return 0;
809} 689}
810 690
811static struct dsa_mall_tc_entry * 691static struct dsa_mall_tc_entry *
812dsa_slave_mall_tc_entry_find(struct dsa_slave_priv *p, 692dsa_slave_mall_tc_entry_find(struct net_device *dev, unsigned long cookie)
813 unsigned long cookie)
814{ 693{
694 struct dsa_slave_priv *p = netdev_priv(dev);
815 struct dsa_mall_tc_entry *mall_tc_entry; 695 struct dsa_mall_tc_entry *mall_tc_entry;
816 696
817 list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list) 697 list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list)
@@ -825,14 +705,15 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
825 struct tc_cls_matchall_offload *cls, 705 struct tc_cls_matchall_offload *cls,
826 bool ingress) 706 bool ingress)
827{ 707{
708 struct dsa_port *dp = dsa_slave_to_port(dev);
828 struct dsa_slave_priv *p = netdev_priv(dev); 709 struct dsa_slave_priv *p = netdev_priv(dev);
829 struct dsa_mall_tc_entry *mall_tc_entry; 710 struct dsa_mall_tc_entry *mall_tc_entry;
830 __be16 protocol = cls->common.protocol; 711 __be16 protocol = cls->common.protocol;
831 struct dsa_switch *ds = p->dp->ds;
832 struct net *net = dev_net(dev); 712 struct net *net = dev_net(dev);
833 struct dsa_slave_priv *to_p; 713 struct dsa_switch *ds = dp->ds;
834 struct net_device *to_dev; 714 struct net_device *to_dev;
835 const struct tc_action *a; 715 const struct tc_action *a;
716 struct dsa_port *to_dp;
836 int err = -EOPNOTSUPP; 717 int err = -EOPNOTSUPP;
837 LIST_HEAD(actions); 718 LIST_HEAD(actions);
838 int ifindex; 719 int ifindex;
@@ -865,13 +746,12 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
865 mall_tc_entry->type = DSA_PORT_MALL_MIRROR; 746 mall_tc_entry->type = DSA_PORT_MALL_MIRROR;
866 mirror = &mall_tc_entry->mirror; 747 mirror = &mall_tc_entry->mirror;
867 748
868 to_p = netdev_priv(to_dev); 749 to_dp = dsa_slave_to_port(to_dev);
869 750
870 mirror->to_local_port = to_p->dp->index; 751 mirror->to_local_port = to_dp->index;
871 mirror->ingress = ingress; 752 mirror->ingress = ingress;
872 753
873 err = ds->ops->port_mirror_add(ds, p->dp->index, mirror, 754 err = ds->ops->port_mirror_add(ds, dp->index, mirror, ingress);
874 ingress);
875 if (err) { 755 if (err) {
876 kfree(mall_tc_entry); 756 kfree(mall_tc_entry);
877 return err; 757 return err;
@@ -886,14 +766,14 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
886static void dsa_slave_del_cls_matchall(struct net_device *dev, 766static void dsa_slave_del_cls_matchall(struct net_device *dev,
887 struct tc_cls_matchall_offload *cls) 767 struct tc_cls_matchall_offload *cls)
888{ 768{
889 struct dsa_slave_priv *p = netdev_priv(dev); 769 struct dsa_port *dp = dsa_slave_to_port(dev);
890 struct dsa_mall_tc_entry *mall_tc_entry; 770 struct dsa_mall_tc_entry *mall_tc_entry;
891 struct dsa_switch *ds = p->dp->ds; 771 struct dsa_switch *ds = dp->ds;
892 772
893 if (!ds->ops->port_mirror_del) 773 if (!ds->ops->port_mirror_del)
894 return; 774 return;
895 775
896 mall_tc_entry = dsa_slave_mall_tc_entry_find(p, cls->cookie); 776 mall_tc_entry = dsa_slave_mall_tc_entry_find(dev, cls->cookie);
897 if (!mall_tc_entry) 777 if (!mall_tc_entry)
898 return; 778 return;
899 779
@@ -901,8 +781,7 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev,
901 781
902 switch (mall_tc_entry->type) { 782 switch (mall_tc_entry->type) {
903 case DSA_PORT_MALL_MIRROR: 783 case DSA_PORT_MALL_MIRROR:
904 ds->ops->port_mirror_del(ds, p->dp->index, 784 ds->ops->port_mirror_del(ds, dp->index, &mall_tc_entry->mirror);
905 &mall_tc_entry->mirror);
906 break; 785 break;
907 default: 786 default:
908 WARN_ON(1); 787 WARN_ON(1);
@@ -912,17 +791,9 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev,
912} 791}
913 792
914static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev, 793static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev,
915 struct tc_cls_matchall_offload *cls) 794 struct tc_cls_matchall_offload *cls,
795 bool ingress)
916{ 796{
917 bool ingress;
918
919 if (is_classid_clsact_ingress(cls->common.classid))
920 ingress = true;
921 else if (is_classid_clsact_egress(cls->common.classid))
922 ingress = false;
923 else
924 return -EOPNOTSUPP;
925
926 if (cls->common.chain_index) 797 if (cls->common.chain_index)
927 return -EOPNOTSUPP; 798 return -EOPNOTSUPP;
928 799
@@ -937,12 +808,63 @@ static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev,
937 } 808 }
938} 809}
939 810
811static int dsa_slave_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
812 void *cb_priv, bool ingress)
813{
814 struct net_device *dev = cb_priv;
815
816 if (!tc_can_offload(dev))
817 return -EOPNOTSUPP;
818
819 switch (type) {
820 case TC_SETUP_CLSMATCHALL:
821 return dsa_slave_setup_tc_cls_matchall(dev, type_data, ingress);
822 default:
823 return -EOPNOTSUPP;
824 }
825}
826
827static int dsa_slave_setup_tc_block_cb_ig(enum tc_setup_type type,
828 void *type_data, void *cb_priv)
829{
830 return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, true);
831}
832
833static int dsa_slave_setup_tc_block_cb_eg(enum tc_setup_type type,
834 void *type_data, void *cb_priv)
835{
836 return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, false);
837}
838
839static int dsa_slave_setup_tc_block(struct net_device *dev,
840 struct tc_block_offload *f)
841{
842 tc_setup_cb_t *cb;
843
844 if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
845 cb = dsa_slave_setup_tc_block_cb_ig;
846 else if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
847 cb = dsa_slave_setup_tc_block_cb_eg;
848 else
849 return -EOPNOTSUPP;
850
851 switch (f->command) {
852 case TC_BLOCK_BIND:
853 return tcf_block_cb_register(f->block, cb, dev, dev);
854 case TC_BLOCK_UNBIND:
855 tcf_block_cb_unregister(f->block, cb, dev);
856 return 0;
857 default:
858 return -EOPNOTSUPP;
859 }
860}
861
940static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type, 862static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
941 void *type_data) 863 void *type_data)
942{ 864{
943 switch (type) { 865 switch (type) {
944 case TC_SETUP_CLSMATCHALL: 866 case TC_SETUP_BLOCK:
945 return dsa_slave_setup_tc_cls_matchall(dev, type_data); 867 return dsa_slave_setup_tc_block(dev, type_data);
946 default: 868 default:
947 return -EOPNOTSUPP; 869 return -EOPNOTSUPP;
948 } 870 }
@@ -976,42 +898,35 @@ static void dsa_slave_get_stats64(struct net_device *dev,
976 } 898 }
977} 899}
978 900
979void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops)
980{
981 ops->get_sset_count = dsa_cpu_port_get_sset_count;
982 ops->get_ethtool_stats = dsa_cpu_port_get_ethtool_stats;
983 ops->get_strings = dsa_cpu_port_get_strings;
984}
985
986static int dsa_slave_get_rxnfc(struct net_device *dev, 901static int dsa_slave_get_rxnfc(struct net_device *dev,
987 struct ethtool_rxnfc *nfc, u32 *rule_locs) 902 struct ethtool_rxnfc *nfc, u32 *rule_locs)
988{ 903{
989 struct dsa_slave_priv *p = netdev_priv(dev); 904 struct dsa_port *dp = dsa_slave_to_port(dev);
990 struct dsa_switch *ds = p->dp->ds; 905 struct dsa_switch *ds = dp->ds;
991 906
992 if (!ds->ops->get_rxnfc) 907 if (!ds->ops->get_rxnfc)
993 return -EOPNOTSUPP; 908 return -EOPNOTSUPP;
994 909
995 return ds->ops->get_rxnfc(ds, p->dp->index, nfc, rule_locs); 910 return ds->ops->get_rxnfc(ds, dp->index, nfc, rule_locs);
996} 911}
997 912
998static int dsa_slave_set_rxnfc(struct net_device *dev, 913static int dsa_slave_set_rxnfc(struct net_device *dev,
999 struct ethtool_rxnfc *nfc) 914 struct ethtool_rxnfc *nfc)
1000{ 915{
1001 struct dsa_slave_priv *p = netdev_priv(dev); 916 struct dsa_port *dp = dsa_slave_to_port(dev);
1002 struct dsa_switch *ds = p->dp->ds; 917 struct dsa_switch *ds = dp->ds;
1003 918
1004 if (!ds->ops->set_rxnfc) 919 if (!ds->ops->set_rxnfc)
1005 return -EOPNOTSUPP; 920 return -EOPNOTSUPP;
1006 921
1007 return ds->ops->set_rxnfc(ds, p->dp->index, nfc); 922 return ds->ops->set_rxnfc(ds, dp->index, nfc);
1008} 923}
1009 924
1010static const struct ethtool_ops dsa_slave_ethtool_ops = { 925static const struct ethtool_ops dsa_slave_ethtool_ops = {
1011 .get_drvinfo = dsa_slave_get_drvinfo, 926 .get_drvinfo = dsa_slave_get_drvinfo,
1012 .get_regs_len = dsa_slave_get_regs_len, 927 .get_regs_len = dsa_slave_get_regs_len,
1013 .get_regs = dsa_slave_get_regs, 928 .get_regs = dsa_slave_get_regs,
1014 .nway_reset = dsa_slave_nway_reset, 929 .nway_reset = phy_ethtool_nway_reset,
1015 .get_link = dsa_slave_get_link, 930 .get_link = dsa_slave_get_link,
1016 .get_eeprom_len = dsa_slave_get_eeprom_len, 931 .get_eeprom_len = dsa_slave_get_eeprom_len,
1017 .get_eeprom = dsa_slave_get_eeprom, 932 .get_eeprom = dsa_slave_get_eeprom,
@@ -1023,8 +938,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
1023 .get_wol = dsa_slave_get_wol, 938 .get_wol = dsa_slave_get_wol,
1024 .set_eee = dsa_slave_set_eee, 939 .set_eee = dsa_slave_set_eee,
1025 .get_eee = dsa_slave_get_eee, 940 .get_eee = dsa_slave_get_eee,
1026 .get_link_ksettings = dsa_slave_get_link_ksettings, 941 .get_link_ksettings = phy_ethtool_get_link_ksettings,
1027 .set_link_ksettings = dsa_slave_set_link_ksettings, 942 .set_link_ksettings = phy_ethtool_set_link_ksettings,
1028 .get_rxnfc = dsa_slave_get_rxnfc, 943 .get_rxnfc = dsa_slave_get_rxnfc,
1029 .set_rxnfc = dsa_slave_set_rxnfc, 944 .set_rxnfc = dsa_slave_set_rxnfc,
1030}; 945};
@@ -1064,78 +979,81 @@ static struct device_type dsa_type = {
1064 979
1065static void dsa_slave_adjust_link(struct net_device *dev) 980static void dsa_slave_adjust_link(struct net_device *dev)
1066{ 981{
982 struct dsa_port *dp = dsa_slave_to_port(dev);
1067 struct dsa_slave_priv *p = netdev_priv(dev); 983 struct dsa_slave_priv *p = netdev_priv(dev);
1068 struct dsa_switch *ds = p->dp->ds; 984 struct dsa_switch *ds = dp->ds;
1069 unsigned int status_changed = 0; 985 unsigned int status_changed = 0;
1070 986
1071 if (p->old_link != p->phy->link) { 987 if (p->old_link != dev->phydev->link) {
1072 status_changed = 1; 988 status_changed = 1;
1073 p->old_link = p->phy->link; 989 p->old_link = dev->phydev->link;
1074 } 990 }
1075 991
1076 if (p->old_duplex != p->phy->duplex) { 992 if (p->old_duplex != dev->phydev->duplex) {
1077 status_changed = 1; 993 status_changed = 1;
1078 p->old_duplex = p->phy->duplex; 994 p->old_duplex = dev->phydev->duplex;
1079 } 995 }
1080 996
1081 if (p->old_pause != p->phy->pause) { 997 if (p->old_pause != dev->phydev->pause) {
1082 status_changed = 1; 998 status_changed = 1;
1083 p->old_pause = p->phy->pause; 999 p->old_pause = dev->phydev->pause;
1084 } 1000 }
1085 1001
1086 if (ds->ops->adjust_link && status_changed) 1002 if (ds->ops->adjust_link && status_changed)
1087 ds->ops->adjust_link(ds, p->dp->index, p->phy); 1003 ds->ops->adjust_link(ds, dp->index, dev->phydev);
1088 1004
1089 if (status_changed) 1005 if (status_changed)
1090 phy_print_status(p->phy); 1006 phy_print_status(dev->phydev);
1091} 1007}
1092 1008
1093static int dsa_slave_fixed_link_update(struct net_device *dev, 1009static int dsa_slave_fixed_link_update(struct net_device *dev,
1094 struct fixed_phy_status *status) 1010 struct fixed_phy_status *status)
1095{ 1011{
1096 struct dsa_slave_priv *p;
1097 struct dsa_switch *ds; 1012 struct dsa_switch *ds;
1013 struct dsa_port *dp;
1098 1014
1099 if (dev) { 1015 if (dev) {
1100 p = netdev_priv(dev); 1016 dp = dsa_slave_to_port(dev);
1101 ds = p->dp->ds; 1017 ds = dp->ds;
1102 if (ds->ops->fixed_link_update) 1018 if (ds->ops->fixed_link_update)
1103 ds->ops->fixed_link_update(ds, p->dp->index, status); 1019 ds->ops->fixed_link_update(ds, dp->index, status);
1104 } 1020 }
1105 1021
1106 return 0; 1022 return 0;
1107} 1023}
1108 1024
1109/* slave device setup *******************************************************/ 1025/* slave device setup *******************************************************/
1110static int dsa_slave_phy_connect(struct dsa_slave_priv *p, 1026static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr)
1111 struct net_device *slave_dev,
1112 int addr)
1113{ 1027{
1114 struct dsa_switch *ds = p->dp->ds; 1028 struct dsa_port *dp = dsa_slave_to_port(slave_dev);
1029 struct dsa_slave_priv *p = netdev_priv(slave_dev);
1030 struct dsa_switch *ds = dp->ds;
1115 1031
1116 p->phy = mdiobus_get_phy(ds->slave_mii_bus, addr); 1032 slave_dev->phydev = mdiobus_get_phy(ds->slave_mii_bus, addr);
1117 if (!p->phy) { 1033 if (!slave_dev->phydev) {
1118 netdev_err(slave_dev, "no phy at %d\n", addr); 1034 netdev_err(slave_dev, "no phy at %d\n", addr);
1119 return -ENODEV; 1035 return -ENODEV;
1120 } 1036 }
1121 1037
1122 /* Use already configured phy mode */ 1038 /* Use already configured phy mode */
1123 if (p->phy_interface == PHY_INTERFACE_MODE_NA) 1039 if (p->phy_interface == PHY_INTERFACE_MODE_NA)
1124 p->phy_interface = p->phy->interface; 1040 p->phy_interface = slave_dev->phydev->interface;
1125 return phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, 1041
1126 p->phy_interface); 1042 return phy_connect_direct(slave_dev, slave_dev->phydev,
1043 dsa_slave_adjust_link, p->phy_interface);
1127} 1044}
1128 1045
1129static int dsa_slave_phy_setup(struct dsa_slave_priv *p, 1046static int dsa_slave_phy_setup(struct net_device *slave_dev)
1130 struct net_device *slave_dev)
1131{ 1047{
1132 struct dsa_switch *ds = p->dp->ds; 1048 struct dsa_port *dp = dsa_slave_to_port(slave_dev);
1133 struct device_node *phy_dn, *port_dn; 1049 struct dsa_slave_priv *p = netdev_priv(slave_dev);
1050 struct device_node *port_dn = dp->dn;
1051 struct dsa_switch *ds = dp->ds;
1052 struct device_node *phy_dn;
1134 bool phy_is_fixed = false; 1053 bool phy_is_fixed = false;
1135 u32 phy_flags = 0; 1054 u32 phy_flags = 0;
1136 int mode, ret; 1055 int mode, ret;
1137 1056
1138 port_dn = p->dp->dn;
1139 mode = of_get_phy_mode(port_dn); 1057 mode = of_get_phy_mode(port_dn);
1140 if (mode < 0) 1058 if (mode < 0)
1141 mode = PHY_INTERFACE_MODE_NA; 1059 mode = PHY_INTERFACE_MODE_NA;
@@ -1156,52 +1074,35 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
1156 } 1074 }
1157 1075
1158 if (ds->ops->get_phy_flags) 1076 if (ds->ops->get_phy_flags)
1159 phy_flags = ds->ops->get_phy_flags(ds, p->dp->index); 1077 phy_flags = ds->ops->get_phy_flags(ds, dp->index);
1160 1078
1161 if (phy_dn) { 1079 if (phy_dn) {
1162 int phy_id = of_mdio_parse_addr(&slave_dev->dev, phy_dn); 1080 slave_dev->phydev = of_phy_connect(slave_dev, phy_dn,
1163 1081 dsa_slave_adjust_link,
1164 /* If this PHY address is part of phys_mii_mask, which means 1082 phy_flags,
1165 * that we need to divert reads and writes to/from it, then we 1083 p->phy_interface);
1166 * want to bind this device using the slave MII bus created by
1167 * DSA to make that happen.
1168 */
1169 if (!phy_is_fixed && phy_id >= 0 &&
1170 (ds->phys_mii_mask & (1 << phy_id))) {
1171 ret = dsa_slave_phy_connect(p, slave_dev, phy_id);
1172 if (ret) {
1173 netdev_err(slave_dev, "failed to connect to phy%d: %d\n", phy_id, ret);
1174 of_node_put(phy_dn);
1175 return ret;
1176 }
1177 } else {
1178 p->phy = of_phy_connect(slave_dev, phy_dn,
1179 dsa_slave_adjust_link,
1180 phy_flags,
1181 p->phy_interface);
1182 }
1183
1184 of_node_put(phy_dn); 1084 of_node_put(phy_dn);
1185 } 1085 }
1186 1086
1187 if (p->phy && phy_is_fixed) 1087 if (slave_dev->phydev && phy_is_fixed)
1188 fixed_phy_set_link_update(p->phy, dsa_slave_fixed_link_update); 1088 fixed_phy_set_link_update(slave_dev->phydev,
1089 dsa_slave_fixed_link_update);
1189 1090
1190 /* We could not connect to a designated PHY, so use the switch internal 1091 /* We could not connect to a designated PHY, so use the switch internal
1191 * MDIO bus instead 1092 * MDIO bus instead
1192 */ 1093 */
1193 if (!p->phy) { 1094 if (!slave_dev->phydev) {
1194 ret = dsa_slave_phy_connect(p, slave_dev, p->dp->index); 1095 ret = dsa_slave_phy_connect(slave_dev, dp->index);
1195 if (ret) { 1096 if (ret) {
1196 netdev_err(slave_dev, "failed to connect to port %d: %d\n", 1097 netdev_err(slave_dev, "failed to connect to port %d: %d\n",
1197 p->dp->index, ret); 1098 dp->index, ret);
1198 if (phy_is_fixed) 1099 if (phy_is_fixed)
1199 of_phy_deregister_fixed_link(port_dn); 1100 of_phy_deregister_fixed_link(port_dn);
1200 return ret; 1101 return ret;
1201 } 1102 }
1202 } 1103 }
1203 1104
1204 phy_attached_info(p->phy); 1105 phy_attached_info(slave_dev->phydev);
1205 1106
1206 return 0; 1107 return 0;
1207} 1108}
@@ -1221,12 +1122,12 @@ int dsa_slave_suspend(struct net_device *slave_dev)
1221 1122
1222 netif_device_detach(slave_dev); 1123 netif_device_detach(slave_dev);
1223 1124
1224 if (p->phy) { 1125 if (slave_dev->phydev) {
1225 phy_stop(p->phy); 1126 phy_stop(slave_dev->phydev);
1226 p->old_pause = -1; 1127 p->old_pause = -1;
1227 p->old_link = -1; 1128 p->old_link = -1;
1228 p->old_duplex = -1; 1129 p->old_duplex = -1;
1229 phy_suspend(p->phy); 1130 phy_suspend(slave_dev->phydev);
1230 } 1131 }
1231 1132
1232 return 0; 1133 return 0;
@@ -1234,31 +1135,40 @@ int dsa_slave_suspend(struct net_device *slave_dev)
1234 1135
1235int dsa_slave_resume(struct net_device *slave_dev) 1136int dsa_slave_resume(struct net_device *slave_dev)
1236{ 1137{
1237 struct dsa_slave_priv *p = netdev_priv(slave_dev);
1238
1239 netif_device_attach(slave_dev); 1138 netif_device_attach(slave_dev);
1240 1139
1241 if (p->phy) { 1140 if (slave_dev->phydev) {
1242 phy_resume(p->phy); 1141 phy_resume(slave_dev->phydev);
1243 phy_start(p->phy); 1142 phy_start(slave_dev->phydev);
1244 } 1143 }
1245 1144
1246 return 0; 1145 return 0;
1247} 1146}
1248 1147
1249int dsa_slave_create(struct dsa_port *port, const char *name) 1148static void dsa_slave_notify(struct net_device *dev, unsigned long val)
1149{
1150 struct net_device *master = dsa_slave_to_master(dev);
1151 struct dsa_port *dp = dsa_slave_to_port(dev);
1152 struct dsa_notifier_register_info rinfo = {
1153 .switch_number = dp->ds->index,
1154 .port_number = dp->index,
1155 .master = master,
1156 .info.dev = dev,
1157 };
1158
1159 call_dsa_notifiers(val, dev, &rinfo.info);
1160}
1161
1162int dsa_slave_create(struct dsa_port *port)
1250{ 1163{
1164 const struct dsa_port *cpu_dp = port->cpu_dp;
1165 struct net_device *master = cpu_dp->master;
1251 struct dsa_switch *ds = port->ds; 1166 struct dsa_switch *ds = port->ds;
1252 struct dsa_switch_tree *dst = ds->dst; 1167 const char *name = port->name;
1253 struct net_device *master;
1254 struct net_device *slave_dev; 1168 struct net_device *slave_dev;
1255 struct dsa_slave_priv *p; 1169 struct dsa_slave_priv *p;
1256 struct dsa_port *cpu_dp;
1257 int ret; 1170 int ret;
1258 1171
1259 cpu_dp = ds->dst->cpu_dp;
1260 master = cpu_dp->netdev;
1261
1262 if (!ds->num_tx_queues) 1172 if (!ds->num_tx_queues)
1263 ds->num_tx_queues = 1; 1173 ds->num_tx_queues = 1;
1264 1174
@@ -1294,51 +1204,58 @@ int dsa_slave_create(struct dsa_port *port, const char *name)
1294 } 1204 }
1295 p->dp = port; 1205 p->dp = port;
1296 INIT_LIST_HEAD(&p->mall_tc_list); 1206 INIT_LIST_HEAD(&p->mall_tc_list);
1297 p->xmit = dst->tag_ops->xmit; 1207 p->xmit = cpu_dp->tag_ops->xmit;
1298 1208
1299 p->old_pause = -1; 1209 p->old_pause = -1;
1300 p->old_link = -1; 1210 p->old_link = -1;
1301 p->old_duplex = -1; 1211 p->old_duplex = -1;
1302 1212
1303 port->netdev = slave_dev; 1213 port->slave = slave_dev;
1304 ret = register_netdev(slave_dev);
1305 if (ret) {
1306 netdev_err(master, "error %d registering interface %s\n",
1307 ret, slave_dev->name);
1308 port->netdev = NULL;
1309 free_percpu(p->stats64);
1310 free_netdev(slave_dev);
1311 return ret;
1312 }
1313 1214
1314 netif_carrier_off(slave_dev); 1215 netif_carrier_off(slave_dev);
1315 1216
1316 ret = dsa_slave_phy_setup(p, slave_dev); 1217 ret = dsa_slave_phy_setup(slave_dev);
1317 if (ret) { 1218 if (ret) {
1318 netdev_err(master, "error %d setting up slave phy\n", ret); 1219 netdev_err(master, "error %d setting up slave phy\n", ret);
1319 unregister_netdev(slave_dev); 1220 goto out_free;
1320 free_percpu(p->stats64); 1221 }
1321 free_netdev(slave_dev); 1222
1322 return ret; 1223 dsa_slave_notify(slave_dev, DSA_PORT_REGISTER);
1224
1225 ret = register_netdev(slave_dev);
1226 if (ret) {
1227 netdev_err(master, "error %d registering interface %s\n",
1228 ret, slave_dev->name);
1229 goto out_phy;
1323 } 1230 }
1324 1231
1325 return 0; 1232 return 0;
1233
1234out_phy:
1235 phy_disconnect(slave_dev->phydev);
1236 if (of_phy_is_fixed_link(port->dn))
1237 of_phy_deregister_fixed_link(port->dn);
1238out_free:
1239 free_percpu(p->stats64);
1240 free_netdev(slave_dev);
1241 port->slave = NULL;
1242 return ret;
1326} 1243}
1327 1244
1328void dsa_slave_destroy(struct net_device *slave_dev) 1245void dsa_slave_destroy(struct net_device *slave_dev)
1329{ 1246{
1247 struct dsa_port *dp = dsa_slave_to_port(slave_dev);
1330 struct dsa_slave_priv *p = netdev_priv(slave_dev); 1248 struct dsa_slave_priv *p = netdev_priv(slave_dev);
1331 struct device_node *port_dn; 1249 struct device_node *port_dn = dp->dn;
1332
1333 port_dn = p->dp->dn;
1334 1250
1335 netif_carrier_off(slave_dev); 1251 netif_carrier_off(slave_dev);
1336 if (p->phy) { 1252 if (slave_dev->phydev) {
1337 phy_disconnect(p->phy); 1253 phy_disconnect(slave_dev->phydev);
1338 1254
1339 if (of_phy_is_fixed_link(port_dn)) 1255 if (of_phy_is_fixed_link(port_dn))
1340 of_phy_deregister_fixed_link(port_dn); 1256 of_phy_deregister_fixed_link(port_dn);
1341 } 1257 }
1258 dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER);
1342 unregister_netdev(slave_dev); 1259 unregister_netdev(slave_dev);
1343 free_percpu(p->stats64); 1260 free_percpu(p->stats64);
1344 free_netdev(slave_dev); 1261 free_netdev(slave_dev);
@@ -1352,8 +1269,7 @@ static bool dsa_slave_dev_check(struct net_device *dev)
1352static int dsa_slave_changeupper(struct net_device *dev, 1269static int dsa_slave_changeupper(struct net_device *dev,
1353 struct netdev_notifier_changeupper_info *info) 1270 struct netdev_notifier_changeupper_info *info)
1354{ 1271{
1355 struct dsa_slave_priv *p = netdev_priv(dev); 1272 struct dsa_port *dp = dsa_slave_to_port(dev);
1356 struct dsa_port *dp = p->dp;
1357 int err = NOTIFY_DONE; 1273 int err = NOTIFY_DONE;
1358 1274
1359 if (netif_is_bridge_master(info->upper_dev)) { 1275 if (netif_is_bridge_master(info->upper_dev)) {
@@ -1374,7 +1290,7 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
1374{ 1290{
1375 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1291 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1376 1292
1377 if (dev->netdev_ops != &dsa_slave_netdev_ops) 1293 if (!dsa_slave_dev_check(dev))
1378 return NOTIFY_DONE; 1294 return NOTIFY_DONE;
1379 1295
1380 if (event == NETDEV_CHANGEUPPER) 1296 if (event == NETDEV_CHANGEUPPER)
@@ -1396,14 +1312,14 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
1396 container_of(work, struct dsa_switchdev_event_work, work); 1312 container_of(work, struct dsa_switchdev_event_work, work);
1397 struct net_device *dev = switchdev_work->dev; 1313 struct net_device *dev = switchdev_work->dev;
1398 struct switchdev_notifier_fdb_info *fdb_info; 1314 struct switchdev_notifier_fdb_info *fdb_info;
1399 struct dsa_slave_priv *p = netdev_priv(dev); 1315 struct dsa_port *dp = dsa_slave_to_port(dev);
1400 int err; 1316 int err;
1401 1317
1402 rtnl_lock(); 1318 rtnl_lock();
1403 switch (switchdev_work->event) { 1319 switch (switchdev_work->event) {
1404 case SWITCHDEV_FDB_ADD_TO_DEVICE: 1320 case SWITCHDEV_FDB_ADD_TO_DEVICE:
1405 fdb_info = &switchdev_work->fdb_info; 1321 fdb_info = &switchdev_work->fdb_info;
1406 err = dsa_port_fdb_add(p->dp, fdb_info->addr, fdb_info->vid); 1322 err = dsa_port_fdb_add(dp, fdb_info->addr, fdb_info->vid);
1407 if (err) { 1323 if (err) {
1408 netdev_dbg(dev, "fdb add failed err=%d\n", err); 1324 netdev_dbg(dev, "fdb add failed err=%d\n", err);
1409 break; 1325 break;
@@ -1414,7 +1330,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
1414 1330
1415 case SWITCHDEV_FDB_DEL_TO_DEVICE: 1331 case SWITCHDEV_FDB_DEL_TO_DEVICE:
1416 fdb_info = &switchdev_work->fdb_info; 1332 fdb_info = &switchdev_work->fdb_info;
1417 err = dsa_port_fdb_del(p->dp, fdb_info->addr, fdb_info->vid); 1333 err = dsa_port_fdb_del(dp, fdb_info->addr, fdb_info->vid);
1418 if (err) { 1334 if (err) {
1419 netdev_dbg(dev, "fdb del failed err=%d\n", err); 1335 netdev_dbg(dev, "fdb del failed err=%d\n", err);
1420 dev_close(dev); 1336 dev_close(dev);
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index e6c06aa349a6..29608d087a7c 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -121,7 +121,7 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
121 if (ds->index == info->sw_index) 121 if (ds->index == info->sw_index)
122 set_bit(info->port, group); 122 set_bit(info->port, group);
123 for (port = 0; port < ds->num_ports; port++) 123 for (port = 0; port < ds->num_ports; port++)
124 if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) 124 if (dsa_is_dsa_port(ds, port))
125 set_bit(port, group); 125 set_bit(port, group);
126 126
127 if (switchdev_trans_ph_prepare(trans)) { 127 if (switchdev_trans_ph_prepare(trans)) {
@@ -133,6 +133,8 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
133 if (err) 133 if (err)
134 return err; 134 return err;
135 } 135 }
136
137 return 0;
136 } 138 }
137 139
138 for_each_set_bit(port, group, ds->num_ports) 140 for_each_set_bit(port, group, ds->num_ports)
@@ -180,6 +182,8 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
180 if (err) 182 if (err)
181 return err; 183 return err;
182 } 184 }
185
186 return 0;
183 } 187 }
184 188
185 for_each_set_bit(port, members, ds->num_ports) 189 for_each_set_bit(port, members, ds->num_ports)
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index dbb016434ace..e6e0b7b6025c 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -59,9 +59,11 @@
59#define BRCM_EG_TC_MASK 0x7 59#define BRCM_EG_TC_MASK 0x7
60#define BRCM_EG_PID_MASK 0x1f 60#define BRCM_EG_PID_MASK 0x1f
61 61
62static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev) 62static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
63 struct net_device *dev,
64 unsigned int offset)
63{ 65{
64 struct dsa_slave_priv *p = netdev_priv(dev); 66 struct dsa_port *dp = dsa_slave_to_port(dev);
65 u16 queue = skb_get_queue_mapping(skb); 67 u16 queue = skb_get_queue_mapping(skb);
66 u8 *brcm_tag; 68 u8 *brcm_tag;
67 69
@@ -70,10 +72,10 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
70 72
71 skb_push(skb, BRCM_TAG_LEN); 73 skb_push(skb, BRCM_TAG_LEN);
72 74
73 memmove(skb->data, skb->data + BRCM_TAG_LEN, 2 * ETH_ALEN); 75 if (offset)
76 memmove(skb->data, skb->data + BRCM_TAG_LEN, offset);
74 77
75 /* Build the tag after the MAC Source Address */ 78 brcm_tag = skb->data + offset;
76 brcm_tag = skb->data + 2 * ETH_ALEN;
77 79
78 /* Set the ingress opcode, traffic class, tag enforcment is 80 /* Set the ingress opcode, traffic class, tag enforcment is
79 * deprecated 81 * deprecated
@@ -82,27 +84,30 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
82 ((queue & BRCM_IG_TC_MASK) << BRCM_IG_TC_SHIFT); 84 ((queue & BRCM_IG_TC_MASK) << BRCM_IG_TC_SHIFT);
83 brcm_tag[1] = 0; 85 brcm_tag[1] = 0;
84 brcm_tag[2] = 0; 86 brcm_tag[2] = 0;
85 if (p->dp->index == 8) 87 if (dp->index == 8)
86 brcm_tag[2] = BRCM_IG_DSTMAP2_MASK; 88 brcm_tag[2] = BRCM_IG_DSTMAP2_MASK;
87 brcm_tag[3] = (1 << p->dp->index) & BRCM_IG_DSTMAP1_MASK; 89 brcm_tag[3] = (1 << dp->index) & BRCM_IG_DSTMAP1_MASK;
90
91 /* Now tell the master network device about the desired output queue
92 * as well
93 */
94 skb_set_queue_mapping(skb, BRCM_TAG_SET_PORT_QUEUE(dp->index, queue));
88 95
89 return skb; 96 return skb;
90} 97}
91 98
92static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, 99static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
93 struct packet_type *pt) 100 struct net_device *dev,
101 struct packet_type *pt,
102 unsigned int offset)
94{ 103{
95 struct dsa_switch_tree *dst = dev->dsa_ptr;
96 struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
97 struct dsa_switch *ds = cpu_dp->ds;
98 int source_port; 104 int source_port;
99 u8 *brcm_tag; 105 u8 *brcm_tag;
100 106
101 if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN))) 107 if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN)))
102 return NULL; 108 return NULL;
103 109
104 /* skb->data points to the EtherType, the tag is right before it */ 110 brcm_tag = skb->data - offset;
105 brcm_tag = skb->data - 2;
106 111
107 /* The opcode should never be different than 0b000 */ 112 /* The opcode should never be different than 0b000 */
108 if (unlikely((brcm_tag[0] >> BRCM_OPCODE_SHIFT) & BRCM_OPCODE_MASK)) 113 if (unlikely((brcm_tag[0] >> BRCM_OPCODE_SHIFT) & BRCM_OPCODE_MASK))
@@ -117,24 +122,67 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
117 /* Locate which port this is coming from */ 122 /* Locate which port this is coming from */
118 source_port = brcm_tag[3] & BRCM_EG_PID_MASK; 123 source_port = brcm_tag[3] & BRCM_EG_PID_MASK;
119 124
120 /* Validate port against switch setup, either the port is totally */ 125 skb->dev = dsa_master_find_slave(dev, 0, source_port);
121 if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) 126 if (!skb->dev)
122 return NULL; 127 return NULL;
123 128
124 /* Remove Broadcom tag and update checksum */ 129 /* Remove Broadcom tag and update checksum */
125 skb_pull_rcsum(skb, BRCM_TAG_LEN); 130 skb_pull_rcsum(skb, BRCM_TAG_LEN);
126 131
132 return skb;
133}
134
135#ifdef CONFIG_NET_DSA_TAG_BRCM
136static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb,
137 struct net_device *dev)
138{
139 /* Build the tag after the MAC Source Address */
140 return brcm_tag_xmit_ll(skb, dev, 2 * ETH_ALEN);
141}
142
143
144static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
145 struct packet_type *pt)
146{
147 struct sk_buff *nskb;
148
149 /* skb->data points to the EtherType, the tag is right before it */
150 nskb = brcm_tag_rcv_ll(skb, dev, pt, 2);
151 if (!nskb)
152 return nskb;
153
127 /* Move the Ethernet DA and SA */ 154 /* Move the Ethernet DA and SA */
128 memmove(skb->data - ETH_HLEN, 155 memmove(nskb->data - ETH_HLEN,
129 skb->data - ETH_HLEN - BRCM_TAG_LEN, 156 nskb->data - ETH_HLEN - BRCM_TAG_LEN,
130 2 * ETH_ALEN); 157 2 * ETH_ALEN);
131 158
132 skb->dev = ds->ports[source_port].netdev; 159 return nskb;
133
134 return skb;
135} 160}
136 161
137const struct dsa_device_ops brcm_netdev_ops = { 162const struct dsa_device_ops brcm_netdev_ops = {
138 .xmit = brcm_tag_xmit, 163 .xmit = brcm_tag_xmit,
139 .rcv = brcm_tag_rcv, 164 .rcv = brcm_tag_rcv,
140}; 165};
166#endif
167
168#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND
169static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb,
170 struct net_device *dev)
171{
172 /* tag is prepended to the packet */
173 return brcm_tag_xmit_ll(skb, dev, 0);
174}
175
176static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb,
177 struct net_device *dev,
178 struct packet_type *pt)
179{
180 /* tag is prepended to the packet */
181 return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN);
182}
183
184const struct dsa_device_ops brcm_prepend_netdev_ops = {
185 .xmit = brcm_tag_xmit_prepend,
186 .rcv = brcm_tag_rcv_prepend,
187};
188#endif
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index fbf9ca954773..cd13cfc542ce 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -18,7 +18,7 @@
18 18
19static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) 19static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
20{ 20{
21 struct dsa_slave_priv *p = netdev_priv(dev); 21 struct dsa_port *dp = dsa_slave_to_port(dev);
22 u8 *dsa_header; 22 u8 *dsa_header;
23 23
24 /* 24 /*
@@ -34,8 +34,8 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
34 * Construct tagged FROM_CPU DSA tag from 802.1q tag. 34 * Construct tagged FROM_CPU DSA tag from 802.1q tag.
35 */ 35 */
36 dsa_header = skb->data + 2 * ETH_ALEN; 36 dsa_header = skb->data + 2 * ETH_ALEN;
37 dsa_header[0] = 0x60 | p->dp->ds->index; 37 dsa_header[0] = 0x60 | dp->ds->index;
38 dsa_header[1] = p->dp->index << 3; 38 dsa_header[1] = dp->index << 3;
39 39
40 /* 40 /*
41 * Move CFI field from byte 2 to byte 1. 41 * Move CFI field from byte 2 to byte 1.
@@ -55,8 +55,8 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
55 * Construct untagged FROM_CPU DSA tag. 55 * Construct untagged FROM_CPU DSA tag.
56 */ 56 */
57 dsa_header = skb->data + 2 * ETH_ALEN; 57 dsa_header = skb->data + 2 * ETH_ALEN;
58 dsa_header[0] = 0x40 | p->dp->ds->index; 58 dsa_header[0] = 0x40 | dp->ds->index;
59 dsa_header[1] = p->dp->index << 3; 59 dsa_header[1] = dp->index << 3;
60 dsa_header[2] = 0x00; 60 dsa_header[2] = 0x00;
61 dsa_header[3] = 0x00; 61 dsa_header[3] = 0x00;
62 } 62 }
@@ -67,8 +67,6 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
67static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, 67static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
68 struct packet_type *pt) 68 struct packet_type *pt)
69{ 69{
70 struct dsa_switch_tree *dst = dev->dsa_ptr;
71 struct dsa_switch *ds;
72 u8 *dsa_header; 70 u8 *dsa_header;
73 int source_device; 71 int source_device;
74 int source_port; 72 int source_port;
@@ -93,18 +91,8 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
93 source_device = dsa_header[0] & 0x1f; 91 source_device = dsa_header[0] & 0x1f;
94 source_port = (dsa_header[1] >> 3) & 0x1f; 92 source_port = (dsa_header[1] >> 3) & 0x1f;
95 93
96 /* 94 skb->dev = dsa_master_find_slave(dev, source_device, source_port);
97 * Check that the source device exists and that the source 95 if (!skb->dev)
98 * port is a registered DSA port.
99 */
100 if (source_device >= DSA_MAX_SWITCHES)
101 return NULL;
102
103 ds = dst->ds[source_device];
104 if (!ds)
105 return NULL;
106
107 if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
108 return NULL; 96 return NULL;
109 97
110 /* 98 /*
@@ -153,7 +141,7 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
153 2 * ETH_ALEN); 141 2 * ETH_ALEN);
154 } 142 }
155 143
156 skb->dev = ds->ports[source_port].netdev; 144 skb->offload_fwd_mark = 1;
157 145
158 return skb; 146 return skb;
159} 147}
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 76367ba1b2e2..4083326b806e 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -19,7 +19,7 @@
19 19
20static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) 20static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
21{ 21{
22 struct dsa_slave_priv *p = netdev_priv(dev); 22 struct dsa_port *dp = dsa_slave_to_port(dev);
23 u8 *edsa_header; 23 u8 *edsa_header;
24 24
25 /* 25 /*
@@ -43,8 +43,8 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
43 edsa_header[1] = ETH_P_EDSA & 0xff; 43 edsa_header[1] = ETH_P_EDSA & 0xff;
44 edsa_header[2] = 0x00; 44 edsa_header[2] = 0x00;
45 edsa_header[3] = 0x00; 45 edsa_header[3] = 0x00;
46 edsa_header[4] = 0x60 | p->dp->ds->index; 46 edsa_header[4] = 0x60 | dp->ds->index;
47 edsa_header[5] = p->dp->index << 3; 47 edsa_header[5] = dp->index << 3;
48 48
49 /* 49 /*
50 * Move CFI field from byte 6 to byte 5. 50 * Move CFI field from byte 6 to byte 5.
@@ -68,8 +68,8 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
68 edsa_header[1] = ETH_P_EDSA & 0xff; 68 edsa_header[1] = ETH_P_EDSA & 0xff;
69 edsa_header[2] = 0x00; 69 edsa_header[2] = 0x00;
70 edsa_header[3] = 0x00; 70 edsa_header[3] = 0x00;
71 edsa_header[4] = 0x40 | p->dp->ds->index; 71 edsa_header[4] = 0x40 | dp->ds->index;
72 edsa_header[5] = p->dp->index << 3; 72 edsa_header[5] = dp->index << 3;
73 edsa_header[6] = 0x00; 73 edsa_header[6] = 0x00;
74 edsa_header[7] = 0x00; 74 edsa_header[7] = 0x00;
75 } 75 }
@@ -80,8 +80,6 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
80static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, 80static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
81 struct packet_type *pt) 81 struct packet_type *pt)
82{ 82{
83 struct dsa_switch_tree *dst = dev->dsa_ptr;
84 struct dsa_switch *ds;
85 u8 *edsa_header; 83 u8 *edsa_header;
86 int source_device; 84 int source_device;
87 int source_port; 85 int source_port;
@@ -106,18 +104,8 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
106 source_device = edsa_header[0] & 0x1f; 104 source_device = edsa_header[0] & 0x1f;
107 source_port = (edsa_header[1] >> 3) & 0x1f; 105 source_port = (edsa_header[1] >> 3) & 0x1f;
108 106
109 /* 107 skb->dev = dsa_master_find_slave(dev, source_device, source_port);
110 * Check that the source device exists and that the source 108 if (!skb->dev)
111 * port is a registered DSA port.
112 */
113 if (source_device >= DSA_MAX_SWITCHES)
114 return NULL;
115
116 ds = dst->ds[source_device];
117 if (!ds)
118 return NULL;
119
120 if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
121 return NULL; 109 return NULL;
122 110
123 /* 111 /*
@@ -172,7 +160,7 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
172 2 * ETH_ALEN); 160 2 * ETH_ALEN);
173 } 161 }
174 162
175 skb->dev = ds->ports[source_port].netdev; 163 skb->offload_fwd_mark = 1;
176 164
177 return skb; 165 return skb;
178} 166}
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 010ca0a336c4..0f62effad88f 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -34,7 +34,7 @@
34 34
35static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev) 35static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
36{ 36{
37 struct dsa_slave_priv *p = netdev_priv(dev); 37 struct dsa_port *dp = dsa_slave_to_port(dev);
38 struct sk_buff *nskb; 38 struct sk_buff *nskb;
39 int padlen; 39 int padlen;
40 u8 *tag; 40 u8 *tag;
@@ -72,7 +72,7 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
72 72
73 tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN); 73 tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
74 tag[0] = 0; 74 tag[0] = 0;
75 tag[1] = 1 << p->dp->index; /* destination port */ 75 tag[1] = 1 << dp->index; /* destination port */
76 76
77 return nskb; 77 return nskb;
78} 78}
@@ -80,22 +80,19 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
80static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev, 80static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev,
81 struct packet_type *pt) 81 struct packet_type *pt)
82{ 82{
83 struct dsa_switch_tree *dst = dev->dsa_ptr;
84 struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
85 struct dsa_switch *ds = cpu_dp->ds;
86 u8 *tag; 83 u8 *tag;
87 int source_port; 84 int source_port;
88 85
89 tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; 86 tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
90 87
91 source_port = tag[0] & 7; 88 source_port = tag[0] & 7;
92 if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) 89
90 skb->dev = dsa_master_find_slave(dev, 0, source_port);
91 if (!skb->dev)
93 return NULL; 92 return NULL;
94 93
95 pskb_trim_rcsum(skb, skb->len - KSZ_EGRESS_TAG_LEN); 94 pskb_trim_rcsum(skb, skb->len - KSZ_EGRESS_TAG_LEN);
96 95
97 skb->dev = ds->ports[source_port].netdev;
98
99 return skb; 96 return skb;
100} 97}
101 98
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index 0b9826105e42..548c00254c07 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -11,6 +11,7 @@
11 * GNU General Public License for more details. 11 * GNU General Public License for more details.
12 * 12 *
13 */ 13 */
14#include <linux/dsa/lan9303.h>
14#include <linux/etherdevice.h> 15#include <linux/etherdevice.h>
15#include <linux/list.h> 16#include <linux/list.h>
16#include <linux/slab.h> 17#include <linux/slab.h>
@@ -39,10 +40,30 @@
39 */ 40 */
40 41
41#define LAN9303_TAG_LEN 4 42#define LAN9303_TAG_LEN 4
43# define LAN9303_TAG_TX_USE_ALR BIT(3)
44# define LAN9303_TAG_TX_STP_OVERRIDE BIT(4)
45# define LAN9303_TAG_RX_IGMP BIT(3)
46# define LAN9303_TAG_RX_STP BIT(4)
47# define LAN9303_TAG_RX_TRAPPED_TO_CPU (LAN9303_TAG_RX_IGMP | \
48 LAN9303_TAG_RX_STP)
49
50/* Decide whether to transmit using ALR lookup, or transmit directly to
51 * port using tag. ALR learning is performed only when using ALR lookup.
52 * If the two external ports are bridged and the frame is unicast,
53 * then use ALR lookup to allow ALR learning on CPU port.
54 * Otherwise transmit directly to port with STP state override.
55 * See also: lan9303_separate_ports() and lan9303.pdf 6.4.10.1
56 */
57static int lan9303_xmit_use_arl(struct dsa_port *dp, u8 *dest_addr)
58{
59 struct lan9303 *chip = dp->ds->priv;
60
61 return chip->is_bridged && !is_multicast_ether_addr(dest_addr);
62}
42 63
43static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) 64static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
44{ 65{
45 struct dsa_slave_priv *p = netdev_priv(dev); 66 struct dsa_port *dp = dsa_slave_to_port(dev);
46 u16 *lan9303_tag; 67 u16 *lan9303_tag;
47 68
48 /* insert a special VLAN tag between the MAC addresses 69 /* insert a special VLAN tag between the MAC addresses
@@ -62,26 +83,21 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
62 83
63 lan9303_tag = (u16 *)(skb->data + 2 * ETH_ALEN); 84 lan9303_tag = (u16 *)(skb->data + 2 * ETH_ALEN);
64 lan9303_tag[0] = htons(ETH_P_8021Q); 85 lan9303_tag[0] = htons(ETH_P_8021Q);
65 lan9303_tag[1] = htons(p->dp->index | BIT(4)); 86 lan9303_tag[1] = lan9303_xmit_use_arl(dp, skb->data) ?
87 LAN9303_TAG_TX_USE_ALR :
88 dp->index | LAN9303_TAG_TX_STP_OVERRIDE;
89 lan9303_tag[1] = htons(lan9303_tag[1]);
66 90
67 return skb; 91 return skb;
68} 92}
69 93
70static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, 94static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
71 struct packet_type *pt) 95 struct packet_type *pt)
72{ 96{
73 u16 *lan9303_tag; 97 u16 *lan9303_tag;
74 struct dsa_switch_tree *dst = dev->dsa_ptr; 98 u16 lan9303_tag1;
75 struct dsa_switch *ds;
76 unsigned int source_port; 99 unsigned int source_port;
77 100
78 ds = dst->ds[0];
79
80 if (unlikely(!ds)) {
81 dev_warn_ratelimited(&dev->dev, "Dropping packet, due to missing DSA switch device\n");
82 return NULL;
83 }
84
85 if (unlikely(!pskb_may_pull(skb, LAN9303_TAG_LEN))) { 101 if (unlikely(!pskb_may_pull(skb, LAN9303_TAG_LEN))) {
86 dev_warn_ratelimited(&dev->dev, 102 dev_warn_ratelimited(&dev->dev,
87 "Dropping packet, cannot pull\n"); 103 "Dropping packet, cannot pull\n");
@@ -101,27 +117,22 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
101 return NULL; 117 return NULL;
102 } 118 }
103 119
104 source_port = ntohs(lan9303_tag[1]) & 0x3; 120 lan9303_tag1 = ntohs(lan9303_tag[1]);
121 source_port = lan9303_tag1 & 0x3;
105 122
106 if (source_port >= ds->num_ports) { 123 skb->dev = dsa_master_find_slave(dev, 0, source_port);
124 if (!skb->dev) {
107 dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n"); 125 dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n");
108 return NULL; 126 return NULL;
109 } 127 }
110 128
111 if (!ds->ports[source_port].netdev) {
112 dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid netdev or device\n");
113 return NULL;
114 }
115
116 /* remove the special VLAN tag between the MAC addresses 129 /* remove the special VLAN tag between the MAC addresses
117 * and the current ethertype field. 130 * and the current ethertype field.
118 */ 131 */
119 skb_pull_rcsum(skb, 2 + 2); 132 skb_pull_rcsum(skb, 2 + 2);
120 memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN), 133 memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN),
121 2 * ETH_ALEN); 134 2 * ETH_ALEN);
122 135 skb->offload_fwd_mark = !(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU);
123 /* forward the packet to the dedicated interface */
124 skb->dev = ds->ports[source_port].netdev;
125 136
126 return skb; 137 return skb;
127} 138}
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index ec8ee5f43255..8475434af7d5 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -23,7 +23,7 @@
23static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, 23static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
24 struct net_device *dev) 24 struct net_device *dev)
25{ 25{
26 struct dsa_slave_priv *p = netdev_priv(dev); 26 struct dsa_port *dp = dsa_slave_to_port(dev);
27 u8 *mtk_tag; 27 u8 *mtk_tag;
28 28
29 if (skb_cow_head(skb, MTK_HDR_LEN) < 0) 29 if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
@@ -36,7 +36,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
36 /* Build the tag after the MAC Source Address */ 36 /* Build the tag after the MAC Source Address */
37 mtk_tag = skb->data + 2 * ETH_ALEN; 37 mtk_tag = skb->data + 2 * ETH_ALEN;
38 mtk_tag[0] = 0; 38 mtk_tag[0] = 0;
39 mtk_tag[1] = (1 << p->dp->index) & MTK_HDR_XMIT_DP_BIT_MASK; 39 mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
40 mtk_tag[2] = 0; 40 mtk_tag[2] = 0;
41 mtk_tag[3] = 0; 41 mtk_tag[3] = 0;
42 42
@@ -46,8 +46,6 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
46static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, 46static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
47 struct packet_type *pt) 47 struct packet_type *pt)
48{ 48{
49 struct dsa_switch_tree *dst = dev->dsa_ptr;
50 struct dsa_switch *ds;
51 int port; 49 int port;
52 __be16 *phdr, hdr; 50 __be16 *phdr, hdr;
53 51
@@ -68,20 +66,12 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
68 skb->data - ETH_HLEN - MTK_HDR_LEN, 66 skb->data - ETH_HLEN - MTK_HDR_LEN,
69 2 * ETH_ALEN); 67 2 * ETH_ALEN);
70 68
71 /* This protocol doesn't support cascading multiple
72 * switches so it's safe to assume the switch is first
73 * in the tree.
74 */
75 ds = dst->ds[0];
76 if (!ds)
77 return NULL;
78
79 /* Get source port information */ 69 /* Get source port information */
80 port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK); 70 port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK);
81 if (!ds->ports[port].netdev)
82 return NULL;
83 71
84 skb->dev = ds->ports[port].netdev; 72 skb->dev = dsa_master_find_slave(dev, 0, port);
73 if (!skb->dev)
74 return NULL;
85 75
86 return skb; 76 return skb;
87} 77}
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 1d4c70711c0f..613f4ee97771 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -38,7 +38,7 @@
38 38
39static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) 39static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
40{ 40{
41 struct dsa_slave_priv *p = netdev_priv(dev); 41 struct dsa_port *dp = dsa_slave_to_port(dev);
42 u16 *phdr, hdr; 42 u16 *phdr, hdr;
43 43
44 dev->stats.tx_packets++; 44 dev->stats.tx_packets++;
@@ -54,8 +54,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
54 54
55 /* Set the version field, and set destination port information */ 55 /* Set the version field, and set destination port information */
56 hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S | 56 hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S |
57 QCA_HDR_XMIT_FROM_CPU | 57 QCA_HDR_XMIT_FROM_CPU | BIT(dp->index);
58 BIT(p->dp->index);
59 58
60 *phdr = htons(hdr); 59 *phdr = htons(hdr);
61 60
@@ -65,9 +64,6 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
65static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, 64static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
66 struct packet_type *pt) 65 struct packet_type *pt)
67{ 66{
68 struct dsa_switch_tree *dst = dev->dsa_ptr;
69 struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
70 struct dsa_switch *ds;
71 u8 ver; 67 u8 ver;
72 int port; 68 int port;
73 __be16 *phdr, hdr; 69 __be16 *phdr, hdr;
@@ -92,20 +88,12 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
92 memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN, 88 memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN,
93 ETH_HLEN - QCA_HDR_LEN); 89 ETH_HLEN - QCA_HDR_LEN);
94 90
95 /* This protocol doesn't support cascading multiple switches so it's
96 * safe to assume the switch is first in the tree
97 */
98 ds = cpu_dp->ds;
99 if (!ds)
100 return NULL;
101
102 /* Get source port information */ 91 /* Get source port information */
103 port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK); 92 port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK);
104 if (!ds->ports[port].netdev)
105 return NULL;
106 93
107 /* Update skb & forward the frame accordingly */ 94 skb->dev = dsa_master_find_slave(dev, 0, port);
108 skb->dev = ds->ports[port].netdev; 95 if (!skb->dev)
96 return NULL;
109 97
110 return skb; 98 return skb;
111} 99}
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index d2fd4923aa3e..7d20e1f3de28 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -16,7 +16,7 @@
16 16
17static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) 17static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
18{ 18{
19 struct dsa_slave_priv *p = netdev_priv(dev); 19 struct dsa_port *dp = dsa_slave_to_port(dev);
20 struct sk_buff *nskb; 20 struct sk_buff *nskb;
21 int padlen; 21 int padlen;
22 u8 *trailer; 22 u8 *trailer;
@@ -48,7 +48,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
48 48
49 trailer = skb_put(nskb, 4); 49 trailer = skb_put(nskb, 4);
50 trailer[0] = 0x80; 50 trailer[0] = 0x80;
51 trailer[1] = 1 << p->dp->index; 51 trailer[1] = 1 << dp->index;
52 trailer[2] = 0x10; 52 trailer[2] = 0x10;
53 trailer[3] = 0x00; 53 trailer[3] = 0x00;
54 54
@@ -58,9 +58,6 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
58static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev, 58static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
59 struct packet_type *pt) 59 struct packet_type *pt)
60{ 60{
61 struct dsa_switch_tree *dst = dev->dsa_ptr;
62 struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
63 struct dsa_switch *ds = cpu_dp->ds;
64 u8 *trailer; 61 u8 *trailer;
65 int source_port; 62 int source_port;
66 63
@@ -73,13 +70,13 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
73 return NULL; 70 return NULL;
74 71
75 source_port = trailer[1] & 7; 72 source_port = trailer[1] & 7;
76 if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) 73
74 skb->dev = dsa_master_find_slave(dev, 0, source_port);
75 if (!skb->dev)
77 return NULL; 76 return NULL;
78 77
79 pskb_trim_rcsum(skb, skb->len - 4); 78 pskb_trim_rcsum(skb, skb->len - 4);
80 79
81 skb->dev = ds->ports[source_port].netdev;
82
83 return skb; 80 return skb;
84} 81}
85 82
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 172d8309f89e..b8cd43c9ed5b 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -328,12 +328,12 @@ out:
328 328
329/* Announce (supervision frame) timer function 329/* Announce (supervision frame) timer function
330 */ 330 */
331static void hsr_announce(unsigned long data) 331static void hsr_announce(struct timer_list *t)
332{ 332{
333 struct hsr_priv *hsr; 333 struct hsr_priv *hsr;
334 struct hsr_port *master; 334 struct hsr_port *master;
335 335
336 hsr = (struct hsr_priv *) data; 336 hsr = from_timer(hsr, t, announce_timer);
337 337
338 rcu_read_lock(); 338 rcu_read_lock();
339 master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); 339 master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
@@ -463,9 +463,8 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
463 hsr->sequence_nr = HSR_SEQNR_START; 463 hsr->sequence_nr = HSR_SEQNR_START;
464 hsr->sup_sequence_nr = HSR_SUP_SEQNR_START; 464 hsr->sup_sequence_nr = HSR_SUP_SEQNR_START;
465 465
466 setup_timer(&hsr->announce_timer, hsr_announce, (unsigned long)hsr); 466 timer_setup(&hsr->announce_timer, hsr_announce, 0);
467 467 timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0);
468 setup_timer(&hsr->prune_timer, hsr_prune_nodes, (unsigned long)hsr);
469 468
470 ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr); 469 ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr);
471 hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec; 470 hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec;
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 284a9b820df8..286ceb41ac0c 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -365,16 +365,14 @@ static struct hsr_port *get_late_port(struct hsr_priv *hsr,
365/* Remove stale sequence_nr records. Called by timer every 365/* Remove stale sequence_nr records. Called by timer every
366 * HSR_LIFE_CHECK_INTERVAL (two seconds or so). 366 * HSR_LIFE_CHECK_INTERVAL (two seconds or so).
367 */ 367 */
368void hsr_prune_nodes(unsigned long data) 368void hsr_prune_nodes(struct timer_list *t)
369{ 369{
370 struct hsr_priv *hsr; 370 struct hsr_priv *hsr = from_timer(hsr, t, prune_timer);
371 struct hsr_node *node; 371 struct hsr_node *node;
372 struct hsr_port *port; 372 struct hsr_port *port;
373 unsigned long timestamp; 373 unsigned long timestamp;
374 unsigned long time_a, time_b; 374 unsigned long time_a, time_b;
375 375
376 hsr = (struct hsr_priv *) data;
377
378 rcu_read_lock(); 376 rcu_read_lock();
379 list_for_each_entry_rcu(node, &hsr->node_db, mac_list) { 377 list_for_each_entry_rcu(node, &hsr->node_db, mac_list) {
380 /* Shorthand */ 378 /* Shorthand */
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index 4e04f0e868e9..370b45998121 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -33,7 +33,7 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port,
33int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, 33int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
34 u16 sequence_nr); 34 u16 sequence_nr);
35 35
36void hsr_prune_nodes(unsigned long data); 36void hsr_prune_nodes(struct timer_list *t);
37 37
38int hsr_create_self_node(struct list_head *self_node_db, 38int hsr_create_self_node(struct list_head *self_node_db,
39 unsigned char addr_a[ETH_ALEN], 39 unsigned char addr_a[ETH_ALEN],
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index ac7c96b73ad5..d8de3bcfb103 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __IEEE802154_6LOWPAN_I_H__ 2#ifndef __IEEE802154_6LOWPAN_I_H__
2#define __IEEE802154_6LOWPAN_I_H__ 3#define __IEEE802154_6LOWPAN_I_H__
3 4
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index f85b08baff16..85bf86ad6b18 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -80,12 +80,13 @@ static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
80 fq->daddr = *arg->dst; 80 fq->daddr = *arg->dst;
81} 81}
82 82
83static void lowpan_frag_expire(unsigned long data) 83static void lowpan_frag_expire(struct timer_list *t)
84{ 84{
85 struct inet_frag_queue *frag = from_timer(frag, t, timer);
85 struct frag_queue *fq; 86 struct frag_queue *fq;
86 struct net *net; 87 struct net *net;
87 88
88 fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); 89 fq = container_of(frag, struct frag_queue, q);
89 net = container_of(fq->q.net, struct net, ieee802154_lowpan.frags); 90 net = container_of(fq->q.net, struct net, ieee802154_lowpan.frags);
90 91
91 spin_lock(&fq->q.lock); 92 spin_lock(&fq->q.lock);
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index 9b92ade687a3..f05b7bdae2aa 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1obj-$(CONFIG_IEEE802154) += ieee802154.o 2obj-$(CONFIG_IEEE802154) += ieee802154.o
2obj-$(CONFIG_IEEE802154_SOCKET) += ieee802154_socket.o 3obj-$(CONFIG_IEEE802154_SOCKET) += ieee802154_socket.o
3obj-y += 6lowpan/ 4obj-y += 6lowpan/
diff --git a/net/ieee802154/core.h b/net/ieee802154/core.h
index 81141f58d079..1c19f575d574 100644
--- a/net/ieee802154/core.h
+++ b/net/ieee802154/core.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __IEEE802154_CORE_H 2#ifndef __IEEE802154_CORE_H
2#define __IEEE802154_CORE_H 3#define __IEEE802154_CORE_H
3 4
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 6bde9e5a5503..96636e3b7aa9 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -89,7 +89,7 @@ int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info)
89 return genlmsg_reply(msg, info); 89 return genlmsg_reply(msg, info);
90} 90}
91 91
92static const struct genl_ops ieee8021154_ops[] = { 92static const struct genl_ops ieee802154_ops[] = {
93 /* see nl-phy.c */ 93 /* see nl-phy.c */
94 IEEE802154_DUMP(IEEE802154_LIST_PHY, ieee802154_list_phy, 94 IEEE802154_DUMP(IEEE802154_LIST_PHY, ieee802154_list_phy,
95 ieee802154_dump_phy), 95 ieee802154_dump_phy),
@@ -137,8 +137,8 @@ struct genl_family nl802154_family __ro_after_init = {
137 .version = 1, 137 .version = 1,
138 .maxattr = IEEE802154_ATTR_MAX, 138 .maxattr = IEEE802154_ATTR_MAX,
139 .module = THIS_MODULE, 139 .module = THIS_MODULE,
140 .ops = ieee8021154_ops, 140 .ops = ieee802154_ops,
141 .n_ops = ARRAY_SIZE(ieee8021154_ops), 141 .n_ops = ARRAY_SIZE(ieee802154_ops),
142 .mcgrps = ieee802154_mcgrps, 142 .mcgrps = ieee802154_mcgrps,
143 .n_mcgrps = ARRAY_SIZE(ieee802154_mcgrps), 143 .n_mcgrps = ARRAY_SIZE(ieee802154_mcgrps),
144}; 144};
diff --git a/net/ieee802154/nl802154.h b/net/ieee802154/nl802154.h
index 3846a89d0958..8c4b6d08954c 100644
--- a/net/ieee802154/nl802154.h
+++ b/net/ieee802154/nl802154.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __IEEE802154_NL802154_H 2#ifndef __IEEE802154_NL802154_H
2#define __IEEE802154_NL802154_H 3#define __IEEE802154_NL802154_H
3 4
diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h
index 4441c63b3ea6..598f5af49775 100644
--- a/net/ieee802154/rdev-ops.h
+++ b/net/ieee802154/rdev-ops.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __CFG802154_RDEV_OPS 2#ifndef __CFG802154_RDEV_OPS
2#define __CFG802154_RDEV_OPS 3#define __CFG802154_RDEV_OPS
3 4
diff --git a/net/ieee802154/sysfs.h b/net/ieee802154/sysfs.h
index aa42e39ecbec..337545b639e9 100644
--- a/net/ieee802154/sysfs.h
+++ b/net/ieee802154/sysfs.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __IEEE802154_SYSFS_H 2#ifndef __IEEE802154_SYSFS_H
2#define __IEEE802154_SYSFS_H 3#define __IEEE802154_SYSFS_H
3 4
diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h
index 9a471e41ec73..19c2e5d60e76 100644
--- a/net/ieee802154/trace.h
+++ b/net/ieee802154/trace.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Based on net/wireless/trace.h */ 2/* Based on net/wireless/trace.h */
2 3
3#undef TRACE_SYSTEM 4#undef TRACE_SYSTEM
diff --git a/net/ife/ife.c b/net/ife/ife.c
index f360341c72eb..7d1ec76e7f43 100644
--- a/net/ife/ife.c
+++ b/net/ife/ife.c
@@ -137,6 +137,6 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval)
137EXPORT_SYMBOL_GPL(ife_tlv_meta_encode); 137EXPORT_SYMBOL_GPL(ife_tlv_meta_encode);
138 138
139MODULE_AUTHOR("Jamal Hadi Salim <jhs@mojatatu.com>"); 139MODULE_AUTHOR("Jamal Hadi Salim <jhs@mojatatu.com>");
140MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>"); 140MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>");
141MODULE_DESCRIPTION("Inter-FE LFB action"); 141MODULE_DESCRIPTION("Inter-FE LFB action");
142MODULE_LICENSE("GPL"); 142MODULE_LICENSE("GPL");
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 91a2557942fa..f48fe6fc7e8c 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -70,11 +70,9 @@ config IP_MULTIPLE_TABLES
70 address into account. Furthermore, the TOS (Type-Of-Service) field 70 address into account. Furthermore, the TOS (Type-Of-Service) field
71 of the packet can be used for routing decisions as well. 71 of the packet can be used for routing decisions as well.
72 72
73 If you are interested in this, please see the preliminary 73 If you need more information, see the Linux Advanced
74 documentation at <http://www.compendium.com.ar/policy-routing.txt> 74 Routing and Traffic Control documentation at
75 and <ftp://post.tepkom.ru/pub/vol2/Linux/docs/advanced-routing.tex>. 75 <http://lartc.org/howto/lartc.rpdb.html>
76 You will need supporting software from
77 <ftp://ftp.tux.org/pub/net/ip-routing/>.
78 76
79 If unsure, say N. 77 If unsure, say N.
80 78
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index afcb435adfbe..c6c8ad1d4b6d 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the Linux TCP/IP (INET) layer. 3# Makefile for the Linux TCP/IP (INET) layer.
3# 4#
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e31108e5ef79..f00499a46927 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -195,7 +195,7 @@ int inet_listen(struct socket *sock, int backlog)
195{ 195{
196 struct sock *sk = sock->sk; 196 struct sock *sk = sock->sk;
197 unsigned char old_state; 197 unsigned char old_state;
198 int err; 198 int err, tcp_fastopen;
199 199
200 lock_sock(sk); 200 lock_sock(sk);
201 201
@@ -217,11 +217,12 @@ int inet_listen(struct socket *sock, int backlog)
217 * because the socket was in TCP_LISTEN state previously but 217 * because the socket was in TCP_LISTEN state previously but
218 * was shutdown() rather than close(). 218 * was shutdown() rather than close().
219 */ 219 */
220 if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && 220 tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
221 (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && 221 if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
222 (tcp_fastopen & TFO_SERVER_ENABLE) &&
222 !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { 223 !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
223 fastopen_queue_tune(sk, backlog); 224 fastopen_queue_tune(sk, backlog);
224 tcp_fastopen_init_key_once(true); 225 tcp_fastopen_init_key_once(sock_net(sk));
225 } 226 }
226 227
227 err = inet_csk_listen_start(sk, backlog); 228 err = inet_csk_listen_start(sk, backlog);
@@ -826,6 +827,7 @@ int inet_shutdown(struct socket *sock, int how)
826 err = -ENOTCONN; 827 err = -ENOTCONN;
827 /* Hack to wake up other listeners, who can poll for 828 /* Hack to wake up other listeners, who can poll for
828 POLLHUP, even on eg. unconnected UDP sockets -- RR */ 829 POLLHUP, even on eg. unconnected UDP sockets -- RR */
830 /* fall through */
829 default: 831 default:
830 sk->sk_shutdown |= how; 832 sk->sk_shutdown |= how;
831 if (sk->sk_prot->shutdown) 833 if (sk->sk_prot->shutdown)
@@ -839,7 +841,7 @@ int inet_shutdown(struct socket *sock, int how)
839 case TCP_LISTEN: 841 case TCP_LISTEN:
840 if (!(how & RCV_SHUTDOWN)) 842 if (!(how & RCV_SHUTDOWN))
841 break; 843 break;
842 /* Fall through */ 844 /* fall through */
843 case TCP_SYN_SENT: 845 case TCP_SYN_SENT:
844 err = sk->sk_prot->disconnect(sk, O_NONBLOCK); 846 err = sk->sk_prot->disconnect(sk, O_NONBLOCK);
845 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; 847 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
@@ -1221,9 +1223,10 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
1221struct sk_buff *inet_gso_segment(struct sk_buff *skb, 1223struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1222 netdev_features_t features) 1224 netdev_features_t features)
1223{ 1225{
1224 bool fixedid = false, gso_partial, encap; 1226 bool udpfrag = false, fixedid = false, gso_partial, encap;
1225 struct sk_buff *segs = ERR_PTR(-EINVAL); 1227 struct sk_buff *segs = ERR_PTR(-EINVAL);
1226 const struct net_offload *ops; 1228 const struct net_offload *ops;
1229 unsigned int offset = 0;
1227 struct iphdr *iph; 1230 struct iphdr *iph;
1228 int proto, tot_len; 1231 int proto, tot_len;
1229 int nhoff; 1232 int nhoff;
@@ -1258,6 +1261,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1258 segs = ERR_PTR(-EPROTONOSUPPORT); 1261 segs = ERR_PTR(-EPROTONOSUPPORT);
1259 1262
1260 if (!skb->encapsulation || encap) { 1263 if (!skb->encapsulation || encap) {
1264 udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
1261 fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID); 1265 fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
1262 1266
1263 /* fixed ID is invalid if DF bit is not set */ 1267 /* fixed ID is invalid if DF bit is not set */
@@ -1277,7 +1281,13 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1277 skb = segs; 1281 skb = segs;
1278 do { 1282 do {
1279 iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); 1283 iph = (struct iphdr *)(skb_mac_header(skb) + nhoff);
1280 if (skb_is_gso(skb)) { 1284 if (udpfrag) {
1285 iph->frag_off = htons(offset >> 3);
1286 if (skb->next)
1287 iph->frag_off |= htons(IP_MF);
1288 offset += skb->len - nhoff - ihl;
1289 tot_len = skb->len - nhoff;
1290 } else if (skb_is_gso(skb)) {
1281 if (!fixedid) { 1291 if (!fixedid) {
1282 iph->id = htons(id); 1292 iph->id = htons(id);
1283 id += skb_shinfo(skb)->gso_segs; 1293 id += skb_shinfo(skb)->gso_segs;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 37db44f60718..4dd95cdd8070 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -240,7 +240,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
240 if (err == -EINPROGRESS) 240 if (err == -EINPROGRESS)
241 goto out; 241 goto out;
242 242
243 if (err == -EBUSY) 243 if (err == -ENOSPC)
244 err = NET_XMIT_DROP; 244 err = NET_XMIT_DROP;
245 goto out_free; 245 goto out_free;
246 } 246 }
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 7c45b8896709..a8d7c5a9fb05 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1180,6 +1180,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1180 case SIOCSARP: 1180 case SIOCSARP:
1181 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 1181 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1182 return -EPERM; 1182 return -EPERM;
1183 /* fall through */
1183 case SIOCGARP: 1184 case SIOCGARP:
1184 err = copy_from_user(&r, arg, sizeof(struct arpreq)); 1185 err = copy_from_user(&r, arg, sizeof(struct arpreq));
1185 if (err) 1186 if (err)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2ae8f54cb321..82178cc69c96 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1951,7 +1951,7 @@ int cipso_v4_req_setattr(struct request_sock *req,
1951 buf = NULL; 1951 buf = NULL;
1952 1952
1953 req_inet = inet_rsk(req); 1953 req_inet = inet_rsk(req);
1954 opt = xchg(&req_inet->opt, opt); 1954 opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt);
1955 if (opt) 1955 if (opt)
1956 kfree_rcu(opt, rcu); 1956 kfree_rcu(opt, rcu);
1957 1957
@@ -1973,11 +1973,13 @@ req_setattr_failure:
1973 * values on failure. 1973 * values on failure.
1974 * 1974 *
1975 */ 1975 */
1976static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr) 1976static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr)
1977{ 1977{
1978 struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1);
1978 int hdr_delta = 0; 1979 int hdr_delta = 0;
1979 struct ip_options_rcu *opt = *opt_ptr;
1980 1980
1981 if (!opt || opt->opt.cipso == 0)
1982 return 0;
1981 if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) { 1983 if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) {
1982 u8 cipso_len; 1984 u8 cipso_len;
1983 u8 cipso_off; 1985 u8 cipso_off;
@@ -2039,14 +2041,10 @@ static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
2039 */ 2041 */
2040void cipso_v4_sock_delattr(struct sock *sk) 2042void cipso_v4_sock_delattr(struct sock *sk)
2041{ 2043{
2042 int hdr_delta;
2043 struct ip_options_rcu *opt;
2044 struct inet_sock *sk_inet; 2044 struct inet_sock *sk_inet;
2045 int hdr_delta;
2045 2046
2046 sk_inet = inet_sk(sk); 2047 sk_inet = inet_sk(sk);
2047 opt = rcu_dereference_protected(sk_inet->inet_opt, 1);
2048 if (!opt || opt->opt.cipso == 0)
2049 return;
2050 2048
2051 hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt); 2049 hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
2052 if (sk_inet->is_icsk && hdr_delta > 0) { 2050 if (sk_inet->is_icsk && hdr_delta > 0) {
@@ -2066,15 +2064,7 @@ void cipso_v4_sock_delattr(struct sock *sk)
2066 */ 2064 */
2067void cipso_v4_req_delattr(struct request_sock *req) 2065void cipso_v4_req_delattr(struct request_sock *req)
2068{ 2066{
2069 struct ip_options_rcu *opt; 2067 cipso_v4_delopt(&inet_rsk(req)->ireq_opt);
2070 struct inet_request_sock *req_inet;
2071
2072 req_inet = inet_rsk(req);
2073 opt = req_inet->opt;
2074 if (!opt || opt->opt.cipso == 0)
2075 return;
2076
2077 cipso_v4_delopt(&req_inet->opt);
2078} 2068}
2079 2069
2080/** 2070/**
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index d7adc0616599..a4573bccd6da 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -137,22 +137,12 @@ static void inet_hash_remove(struct in_ifaddr *ifa)
137 */ 137 */
138struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) 138struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139{ 139{
140 u32 hash = inet_addr_hash(net, addr);
141 struct net_device *result = NULL; 140 struct net_device *result = NULL;
142 struct in_ifaddr *ifa; 141 struct in_ifaddr *ifa;
143 142
144 rcu_read_lock(); 143 rcu_read_lock();
145 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) { 144 ifa = inet_lookup_ifaddr_rcu(net, addr);
146 if (ifa->ifa_local == addr) { 145 if (!ifa) {
147 struct net_device *dev = ifa->ifa_dev->dev;
148
149 if (!net_eq(dev_net(dev), net))
150 continue;
151 result = dev;
152 break;
153 }
154 }
155 if (!result) {
156 struct flowi4 fl4 = { .daddr = addr }; 146 struct flowi4 fl4 = { .daddr = addr };
157 struct fib_result res = { 0 }; 147 struct fib_result res = { 0 };
158 struct fib_table *local; 148 struct fib_table *local;
@@ -165,6 +155,8 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
165 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) && 155 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166 res.type == RTN_LOCAL) 156 res.type == RTN_LOCAL)
167 result = FIB_RES_DEV(res); 157 result = FIB_RES_DEV(res);
158 } else {
159 result = ifa->ifa_dev->dev;
168 } 160 }
169 if (result && devref) 161 if (result && devref)
170 dev_hold(result); 162 dev_hold(result);
@@ -173,6 +165,20 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
173} 165}
174EXPORT_SYMBOL(__ip_dev_find); 166EXPORT_SYMBOL(__ip_dev_find);
175 167
168/* called under RCU lock */
169struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
170{
171 u32 hash = inet_addr_hash(net, addr);
172 struct in_ifaddr *ifa;
173
174 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
175 if (ifa->ifa_local == addr &&
176 net_eq(dev_net(ifa->ifa_dev->dev), net))
177 return ifa;
178
179 return NULL;
180}
181
176static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); 182static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177 183
178static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); 184static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
@@ -438,7 +444,7 @@ static void check_lifetime(struct work_struct *work);
438static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime); 444static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
439 445
440static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, 446static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
441 u32 portid) 447 u32 portid, struct netlink_ext_ack *extack)
442{ 448{
443 struct in_device *in_dev = ifa->ifa_dev; 449 struct in_device *in_dev = ifa->ifa_dev;
444 struct in_ifaddr *ifa1, **ifap, **last_primary; 450 struct in_ifaddr *ifa1, **ifap, **last_primary;
@@ -483,6 +489,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
483 */ 489 */
484 ivi.ivi_addr = ifa->ifa_address; 490 ivi.ivi_addr = ifa->ifa_address;
485 ivi.ivi_dev = ifa->ifa_dev; 491 ivi.ivi_dev = ifa->ifa_dev;
492 ivi.extack = extack;
486 ret = blocking_notifier_call_chain(&inetaddr_validator_chain, 493 ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
487 NETDEV_UP, &ivi); 494 NETDEV_UP, &ivi);
488 ret = notifier_to_errno(ret); 495 ret = notifier_to_errno(ret);
@@ -515,7 +522,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
515 522
516static int inet_insert_ifa(struct in_ifaddr *ifa) 523static int inet_insert_ifa(struct in_ifaddr *ifa)
517{ 524{
518 return __inet_insert_ifa(ifa, NULL, 0); 525 return __inet_insert_ifa(ifa, NULL, 0, NULL);
519} 526}
520 527
521static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) 528static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
@@ -896,7 +903,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
896 return ret; 903 return ret;
897 } 904 }
898 } 905 }
899 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); 906 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
907 extack);
900 } else { 908 } else {
901 inet_free_ifa(ifa); 909 inet_free_ifa(ifa);
902 910
@@ -1516,6 +1524,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1516 if (inetdev_valid_mtu(dev->mtu)) 1524 if (inetdev_valid_mtu(dev->mtu))
1517 break; 1525 break;
1518 /* disable IP when MTU is not enough */ 1526 /* disable IP when MTU is not enough */
1527 /* fall through */
1519 case NETDEV_UNREGISTER: 1528 case NETDEV_UNREGISTER:
1520 inetdev_destroy(in_dev); 1529 inetdev_destroy(in_dev);
1521 break; 1530 break;
@@ -1751,7 +1760,7 @@ static int inet_validate_link_af(const struct net_device *dev,
1751 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1760 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1752 int err, rem; 1761 int err, rem;
1753 1762
1754 if (dev && !__in_dev_get_rtnl(dev)) 1763 if (dev && !__in_dev_get_rcu(dev))
1755 return -EAFNOSUPPORT; 1764 return -EAFNOSUPPORT;
1756 1765
1757 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL); 1766 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
@@ -1775,7 +1784,7 @@ static int inet_validate_link_af(const struct net_device *dev,
1775 1784
1776static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) 1785static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1777{ 1786{
1778 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1787 struct in_device *in_dev = __in_dev_get_rcu(dev);
1779 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1788 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1780 int rem; 1789 int rem;
1781 1790
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b00e4a43b4dc..d57aa64fa7c7 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -432,7 +432,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
432 case -EINPROGRESS: 432 case -EINPROGRESS:
433 goto error; 433 goto error;
434 434
435 case -EBUSY: 435 case -ENOSPC:
436 err = NET_XMIT_DROP; 436 err = NET_XMIT_DROP;
437 break; 437 break;
438 438
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 37819ab4cc74..f52d27a422c3 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -73,6 +73,11 @@ fail:
73 fib_free_table(main_table); 73 fib_free_table(main_table);
74 return -ENOMEM; 74 return -ENOMEM;
75} 75}
76
77static bool fib4_has_custom_rules(struct net *net)
78{
79 return false;
80}
76#else 81#else
77 82
78struct fib_table *fib_new_table(struct net *net, u32 id) 83struct fib_table *fib_new_table(struct net *net, u32 id)
@@ -128,6 +133,11 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
128 } 133 }
129 return NULL; 134 return NULL;
130} 135}
136
137static bool fib4_has_custom_rules(struct net *net)
138{
139 return net->ipv4.fib_has_custom_rules;
140}
131#endif /* CONFIG_IP_MULTIPLE_TABLES */ 141#endif /* CONFIG_IP_MULTIPLE_TABLES */
132 142
133static void fib_replace_table(struct net *net, struct fib_table *old, 143static void fib_replace_table(struct net *net, struct fib_table *old,
@@ -345,9 +355,6 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
345 if (res.type != RTN_UNICAST && 355 if (res.type != RTN_UNICAST &&
346 (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev))) 356 (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
347 goto e_inval; 357 goto e_inval;
348 if (!rpf && !fib_num_tclassid_users(net) &&
349 (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev)))
350 goto last_resort;
351 fib_combine_itag(itag, &res); 358 fib_combine_itag(itag, &res);
352 dev_match = false; 359 dev_match = false;
353 360
@@ -402,13 +409,28 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
402 struct in_device *idev, u32 *itag) 409 struct in_device *idev, u32 *itag)
403{ 410{
404 int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); 411 int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
412 struct net *net = dev_net(dev);
405 413
406 if (!r && !fib_num_tclassid_users(dev_net(dev)) && 414 if (!r && !fib_num_tclassid_users(net) &&
407 IN_DEV_ACCEPT_LOCAL(idev) &&
408 (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) { 415 (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) {
416 if (IN_DEV_ACCEPT_LOCAL(idev))
417 goto ok;
418 /* with custom local routes in place, checking local addresses
419 * only will be too optimistic, with custom rules, checking
420 * local addresses only can be too strict, e.g. due to vrf
421 */
422 if (net->ipv4.fib_has_custom_local_routes ||
423 fib4_has_custom_rules(net))
424 goto full_check;
425 if (inet_lookup_ifaddr_rcu(net, src))
426 return -EINVAL;
427
428ok:
409 *itag = 0; 429 *itag = 0;
410 return 0; 430 return 0;
411 } 431 }
432
433full_check:
412 return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag); 434 return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag);
413} 435}
414 436
@@ -759,6 +781,8 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
759 } 781 }
760 782
761 err = fib_table_insert(net, tb, &cfg, extack); 783 err = fib_table_insert(net, tb, &cfg, extack);
784 if (!err && cfg.fc_type == RTN_LOCAL)
785 net->ipv4.fib_has_custom_local_routes = true;
762errout: 786errout:
763 return err; 787 return err;
764} 788}
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 5b2af19cfb5b..e6ff282bb7f4 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef _FIB_LOOKUP_H 2#ifndef _FIB_LOOKUP_H
2#define _FIB_LOOKUP_H 3#define _FIB_LOOKUP_H
3 4
diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c
index cfd420b0572c..b804ccbdb241 100644
--- a/net/ipv4/fib_notifier.c
+++ b/net/ipv4/fib_notifier.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/rtnetlink.h> 2#include <linux/rtnetlink.h>
2#include <linux/notifier.h> 3#include <linux/notifier.h>
3#include <linux/socket.h> 4#include <linux/socket.h>
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 57a5d48acee8..f04d944f8abe 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -601,17 +601,9 @@ static void fib_rebalance(struct fib_info *fi)
601 atomic_set(&nexthop_nh->nh_upper_bound, upper_bound); 601 atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
602 } endfor_nexthops(fi); 602 } endfor_nexthops(fi);
603} 603}
604
605static inline void fib_add_weight(struct fib_info *fi,
606 const struct fib_nh *nh)
607{
608 fi->fib_weight += nh->nh_weight;
609}
610
611#else /* CONFIG_IP_ROUTE_MULTIPATH */ 604#else /* CONFIG_IP_ROUTE_MULTIPATH */
612 605
613#define fib_rebalance(fi) do { } while (0) 606#define fib_rebalance(fi) do { } while (0)
614#define fib_add_weight(fi, nh) do { } while (0)
615 607
616#endif /* CONFIG_IP_ROUTE_MULTIPATH */ 608#endif /* CONFIG_IP_ROUTE_MULTIPATH */
617 609
@@ -718,7 +710,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
718 bool ecn_ca = false; 710 bool ecn_ca = false;
719 711
720 nla_strlcpy(tmp, nla, sizeof(tmp)); 712 nla_strlcpy(tmp, nla, sizeof(tmp));
721 val = tcp_ca_get_key_by_name(tmp, &ecn_ca); 713 val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
722 } else { 714 } else {
723 val = nla_get_u32(nla); 715 val = nla_get_u32(nla);
724 } 716 }
@@ -774,8 +766,8 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
774 * | 766 * |
775 * |-> {local prefix} (terminal node) 767 * |-> {local prefix} (terminal node)
776 */ 768 */
777static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, 769static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
778 struct fib_nh *nh, struct netlink_ext_ack *extack) 770 struct netlink_ext_ack *extack)
779{ 771{
780 int err = 0; 772 int err = 0;
781 struct net *net; 773 struct net *net;
@@ -1038,7 +1030,7 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
1038 char tmp[TCP_CA_NAME_MAX]; 1030 char tmp[TCP_CA_NAME_MAX];
1039 1031
1040 nla_strlcpy(tmp, nla, sizeof(tmp)); 1032 nla_strlcpy(tmp, nla, sizeof(tmp));
1041 val = tcp_ca_get_key_by_name(tmp, &ecn_ca); 1033 val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
1042 if (val == TCP_CA_UNSPEC) 1034 if (val == TCP_CA_UNSPEC)
1043 return -EINVAL; 1035 return -EINVAL;
1044 } else { 1036 } else {
@@ -1258,7 +1250,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
1258 int linkdown = 0; 1250 int linkdown = 0;
1259 1251
1260 change_nexthops(fi) { 1252 change_nexthops(fi) {
1261 err = fib_check_nh(cfg, fi, nexthop_nh, extack); 1253 err = fib_check_nh(cfg, nexthop_nh, extack);
1262 if (err != 0) 1254 if (err != 0)
1263 goto failure; 1255 goto failure;
1264 if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN) 1256 if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
@@ -1275,7 +1267,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
1275 1267
1276 change_nexthops(fi) { 1268 change_nexthops(fi) {
1277 fib_info_update_nh_saddr(net, nexthop_nh); 1269 fib_info_update_nh_saddr(net, nexthop_nh);
1278 fib_add_weight(fi, nexthop_nh);
1279 } endfor_nexthops(fi) 1270 } endfor_nexthops(fi)
1280 1271
1281 fib_rebalance(fi); 1272 fib_rebalance(fi);
@@ -1365,8 +1356,6 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
1365 nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) 1356 nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
1366 goto nla_put_failure; 1357 goto nla_put_failure;
1367 if (fi->fib_nhs == 1) { 1358 if (fi->fib_nhs == 1) {
1368 struct in_device *in_dev;
1369
1370 if (fi->fib_nh->nh_gw && 1359 if (fi->fib_nh->nh_gw &&
1371 nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) 1360 nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
1372 goto nla_put_failure; 1361 goto nla_put_failure;
@@ -1374,10 +1363,14 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
1374 nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif)) 1363 nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
1375 goto nla_put_failure; 1364 goto nla_put_failure;
1376 if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) { 1365 if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) {
1377 in_dev = __in_dev_get_rtnl(fi->fib_nh->nh_dev); 1366 struct in_device *in_dev;
1367
1368 rcu_read_lock();
1369 in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev);
1378 if (in_dev && 1370 if (in_dev &&
1379 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) 1371 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
1380 rtm->rtm_flags |= RTNH_F_DEAD; 1372 rtm->rtm_flags |= RTNH_F_DEAD;
1373 rcu_read_unlock();
1381 } 1374 }
1382 if (fi->fib_nh->nh_flags & RTNH_F_OFFLOAD) 1375 if (fi->fib_nh->nh_flags & RTNH_F_OFFLOAD)
1383 rtm->rtm_flags |= RTNH_F_OFFLOAD; 1376 rtm->rtm_flags |= RTNH_F_OFFLOAD;
@@ -1400,18 +1393,20 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
1400 goto nla_put_failure; 1393 goto nla_put_failure;
1401 1394
1402 for_nexthops(fi) { 1395 for_nexthops(fi) {
1403 struct in_device *in_dev;
1404
1405 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 1396 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1406 if (!rtnh) 1397 if (!rtnh)
1407 goto nla_put_failure; 1398 goto nla_put_failure;
1408 1399
1409 rtnh->rtnh_flags = nh->nh_flags & 0xFF; 1400 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1410 if (nh->nh_flags & RTNH_F_LINKDOWN) { 1401 if (nh->nh_flags & RTNH_F_LINKDOWN) {
1411 in_dev = __in_dev_get_rtnl(nh->nh_dev); 1402 struct in_device *in_dev;
1403
1404 rcu_read_lock();
1405 in_dev = __in_dev_get_rcu(nh->nh_dev);
1412 if (in_dev && 1406 if (in_dev &&
1413 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) 1407 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
1414 rtnh->rtnh_flags |= RTNH_F_DEAD; 1408 rtnh->rtnh_flags |= RTNH_F_DEAD;
1409 rcu_read_unlock();
1415 } 1410 }
1416 rtnh->rtnh_hops = nh->nh_weight - 1; 1411 rtnh->rtnh_hops = nh->nh_weight - 1;
1417 rtnh->rtnh_ifindex = nh->nh_oif; 1412 rtnh->rtnh_ifindex = nh->nh_oif;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index c636650a6a70..5ddc4aefff12 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -87,32 +87,32 @@
87 87
88static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, 88static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
89 enum fib_event_type event_type, u32 dst, 89 enum fib_event_type event_type, u32 dst,
90 int dst_len, struct fib_info *fi, 90 int dst_len, struct fib_alias *fa)
91 u8 tos, u8 type, u32 tb_id)
92{ 91{
93 struct fib_entry_notifier_info info = { 92 struct fib_entry_notifier_info info = {
94 .dst = dst, 93 .dst = dst,
95 .dst_len = dst_len, 94 .dst_len = dst_len,
96 .fi = fi, 95 .fi = fa->fa_info,
97 .tos = tos, 96 .tos = fa->fa_tos,
98 .type = type, 97 .type = fa->fa_type,
99 .tb_id = tb_id, 98 .tb_id = fa->tb_id,
100 }; 99 };
101 return call_fib4_notifier(nb, net, event_type, &info.info); 100 return call_fib4_notifier(nb, net, event_type, &info.info);
102} 101}
103 102
104static int call_fib_entry_notifiers(struct net *net, 103static int call_fib_entry_notifiers(struct net *net,
105 enum fib_event_type event_type, u32 dst, 104 enum fib_event_type event_type, u32 dst,
106 int dst_len, struct fib_info *fi, 105 int dst_len, struct fib_alias *fa,
107 u8 tos, u8 type, u32 tb_id) 106 struct netlink_ext_ack *extack)
108{ 107{
109 struct fib_entry_notifier_info info = { 108 struct fib_entry_notifier_info info = {
109 .info.extack = extack,
110 .dst = dst, 110 .dst = dst,
111 .dst_len = dst_len, 111 .dst_len = dst_len,
112 .fi = fi, 112 .fi = fa->fa_info,
113 .tos = tos, 113 .tos = fa->fa_tos,
114 .type = type, 114 .type = fa->fa_type,
115 .tb_id = tb_id, 115 .tb_id = fa->tb_id,
116 }; 116 };
117 return call_fib4_notifiers(net, event_type, &info.info); 117 return call_fib4_notifiers(net, event_type, &info.info);
118} 118}
@@ -1216,9 +1216,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
1216 new_fa->fa_default = -1; 1216 new_fa->fa_default = -1;
1217 1217
1218 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, 1218 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1219 key, plen, fi, 1219 key, plen, new_fa, extack);
1220 new_fa->fa_tos, cfg->fc_type,
1221 tb->tb_id);
1222 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, 1220 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1223 tb->tb_id, &cfg->fc_nlinfo, nlflags); 1221 tb->tb_id, &cfg->fc_nlinfo, nlflags);
1224 1222
@@ -1273,8 +1271,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
1273 tb->tb_num_default++; 1271 tb->tb_num_default++;
1274 1272
1275 rt_cache_flush(cfg->fc_nlinfo.nl_net); 1273 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1276 call_fib_entry_notifiers(net, event, key, plen, fi, tos, cfg->fc_type, 1274 call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
1277 tb->tb_id);
1278 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, 1275 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
1279 &cfg->fc_nlinfo, nlflags); 1276 &cfg->fc_nlinfo, nlflags);
1280succeeded: 1277succeeded:
@@ -1574,8 +1571,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
1574 return -ESRCH; 1571 return -ESRCH;
1575 1572
1576 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen, 1573 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen,
1577 fa_to_delete->fa_info, tos, 1574 fa_to_delete, extack);
1578 fa_to_delete->fa_type, tb->tb_id);
1579 rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, 1575 rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
1580 &cfg->fc_nlinfo, 0); 1576 &cfg->fc_nlinfo, 0);
1581 1577
@@ -1892,9 +1888,8 @@ int fib_table_flush(struct net *net, struct fib_table *tb)
1892 1888
1893 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, 1889 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
1894 n->key, 1890 n->key,
1895 KEYLENGTH - fa->fa_slen, 1891 KEYLENGTH - fa->fa_slen, fa,
1896 fi, fa->fa_tos, fa->fa_type, 1892 NULL);
1897 tb->tb_id);
1898 hlist_del_rcu(&fa->fa_list); 1893 hlist_del_rcu(&fa->fa_list);
1899 fib_release_info(fa->fa_info); 1894 fib_release_info(fa->fa_info);
1900 alias_free_mem_rcu(fa); 1895 alias_free_mem_rcu(fa);
@@ -1932,8 +1927,7 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l,
1932 continue; 1927 continue;
1933 1928
1934 call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key, 1929 call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key,
1935 KEYLENGTH - fa->fa_slen, fi, fa->fa_tos, 1930 KEYLENGTH - fa->fa_slen, fa);
1936 fa->fa_type, fa->tb_id);
1937 } 1931 }
1938} 1932}
1939 1933
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 416bb304a281..1859c473b21a 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -86,7 +86,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
86 greh = (struct gre_base_hdr *)skb_transport_header(skb); 86 greh = (struct gre_base_hdr *)skb_transport_header(skb);
87 pcsum = (__sum16 *)(greh + 1); 87 pcsum = (__sum16 *)(greh + 1);
88 88
89 if (gso_partial) { 89 if (gso_partial && skb_is_gso(skb)) {
90 unsigned int partial_adj; 90 unsigned int partial_adj;
91 91
92 /* Adjust checksum to account for the fact that 92 /* Adjust checksum to account for the fact that
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 681e33998e03..1617604c9284 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -782,7 +782,7 @@ static bool icmp_tag_validation(int proto)
782} 782}
783 783
784/* 784/*
785 * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and 785 * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEEDED, ICMP_QUENCH, and
786 * ICMP_PARAMETERPROB. 786 * ICMP_PARAMETERPROB.
787 */ 787 */
788 788
@@ -810,7 +810,8 @@ static bool icmp_unreach(struct sk_buff *skb)
810 if (iph->ihl < 5) /* Mangled header, drop. */ 810 if (iph->ihl < 5) /* Mangled header, drop. */
811 goto out_err; 811 goto out_err;
812 812
813 if (icmph->type == ICMP_DEST_UNREACH) { 813 switch (icmph->type) {
814 case ICMP_DEST_UNREACH:
814 switch (icmph->code & 15) { 815 switch (icmph->code & 15) {
815 case ICMP_NET_UNREACH: 816 case ICMP_NET_UNREACH:
816 case ICMP_HOST_UNREACH: 817 case ICMP_HOST_UNREACH:
@@ -846,8 +847,16 @@ static bool icmp_unreach(struct sk_buff *skb)
846 } 847 }
847 if (icmph->code > NR_ICMP_UNREACH) 848 if (icmph->code > NR_ICMP_UNREACH)
848 goto out; 849 goto out;
849 } else if (icmph->type == ICMP_PARAMETERPROB) 850 break;
851 case ICMP_PARAMETERPROB:
850 info = ntohl(icmph->un.gateway) >> 24; 852 info = ntohl(icmph->un.gateway) >> 24;
853 break;
854 case ICMP_TIME_EXCEEDED:
855 __ICMP_INC_STATS(net, ICMP_MIB_INTIMEEXCDS);
856 if (icmph->code == ICMP_EXC_FRAGTIME)
857 goto out;
858 break;
859 }
851 860
852 /* 861 /*
853 * Throw it at our lower layers 862 * Throw it at our lower layers
@@ -959,8 +968,9 @@ static bool icmp_timestamp(struct sk_buff *skb)
959 */ 968 */
960 icmp_param.data.times[1] = inet_current_timestamp(); 969 icmp_param.data.times[1] = inet_current_timestamp();
961 icmp_param.data.times[2] = icmp_param.data.times[1]; 970 icmp_param.data.times[2] = icmp_param.data.times[1];
962 if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4)) 971
963 BUG(); 972 BUG_ON(skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4));
973
964 icmp_param.data.icmph = *icmp_hdr(skb); 974 icmp_param.data.icmph = *icmp_hdr(skb);
965 icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY; 975 icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY;
966 icmp_param.data.icmph.code = 0; 976 icmp_param.data.icmph.code = 0;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index ab183af0b5b6..d1f8f302dbf3 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -752,18 +752,18 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
752 return ip_local_out(net, skb->sk, skb); 752 return ip_local_out(net, skb->sk, skb);
753} 753}
754 754
755static void igmp_gq_timer_expire(unsigned long data) 755static void igmp_gq_timer_expire(struct timer_list *t)
756{ 756{
757 struct in_device *in_dev = (struct in_device *)data; 757 struct in_device *in_dev = from_timer(in_dev, t, mr_gq_timer);
758 758
759 in_dev->mr_gq_running = 0; 759 in_dev->mr_gq_running = 0;
760 igmpv3_send_report(in_dev, NULL); 760 igmpv3_send_report(in_dev, NULL);
761 in_dev_put(in_dev); 761 in_dev_put(in_dev);
762} 762}
763 763
764static void igmp_ifc_timer_expire(unsigned long data) 764static void igmp_ifc_timer_expire(struct timer_list *t)
765{ 765{
766 struct in_device *in_dev = (struct in_device *)data; 766 struct in_device *in_dev = from_timer(in_dev, t, mr_ifc_timer);
767 767
768 igmpv3_send_cr(in_dev); 768 igmpv3_send_cr(in_dev);
769 if (in_dev->mr_ifc_count) { 769 if (in_dev->mr_ifc_count) {
@@ -784,9 +784,9 @@ static void igmp_ifc_event(struct in_device *in_dev)
784} 784}
785 785
786 786
787static void igmp_timer_expire(unsigned long data) 787static void igmp_timer_expire(struct timer_list *t)
788{ 788{
789 struct ip_mc_list *im = (struct ip_mc_list *)data; 789 struct ip_mc_list *im = from_timer(im, t, timer);
790 struct in_device *in_dev = im->interface; 790 struct in_device *in_dev = im->interface;
791 791
792 spin_lock(&im->lock); 792 spin_lock(&im->lock);
@@ -1385,7 +1385,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1385 refcount_set(&im->refcnt, 1); 1385 refcount_set(&im->refcnt, 1);
1386 spin_lock_init(&im->lock); 1386 spin_lock_init(&im->lock);
1387#ifdef CONFIG_IP_MULTICAST 1387#ifdef CONFIG_IP_MULTICAST
1388 setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im); 1388 timer_setup(&im->timer, igmp_timer_expire, 0);
1389 im->unsolicit_count = net->ipv4.sysctl_igmp_qrv; 1389 im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
1390#endif 1390#endif
1391 1391
@@ -1695,10 +1695,8 @@ void ip_mc_init_dev(struct in_device *in_dev)
1695 ASSERT_RTNL(); 1695 ASSERT_RTNL();
1696 1696
1697#ifdef CONFIG_IP_MULTICAST 1697#ifdef CONFIG_IP_MULTICAST
1698 setup_timer(&in_dev->mr_gq_timer, igmp_gq_timer_expire, 1698 timer_setup(&in_dev->mr_gq_timer, igmp_gq_timer_expire, 0);
1699 (unsigned long)in_dev); 1699 timer_setup(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, 0);
1700 setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire,
1701 (unsigned long)in_dev);
1702 in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv; 1700 in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
1703#endif 1701#endif
1704 1702
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index b9c64b40a83a..4ca46dc08e63 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -39,11 +39,11 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
39 * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, 39 * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
40 * and 0.0.0.0 equals to 0.0.0.0 only 40 * and 0.0.0.0 equals to 0.0.0.0 only
41 */ 41 */
42static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, 42static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
43 const struct in6_addr *sk2_rcv_saddr6, 43 const struct in6_addr *sk2_rcv_saddr6,
44 __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, 44 __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
45 bool sk1_ipv6only, bool sk2_ipv6only, 45 bool sk1_ipv6only, bool sk2_ipv6only,
46 bool match_wildcard) 46 bool match_wildcard)
47{ 47{
48 int addr_type = ipv6_addr_type(sk1_rcv_saddr6); 48 int addr_type = ipv6_addr_type(sk1_rcv_saddr6);
49 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; 49 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
@@ -52,29 +52,29 @@ static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
52 if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { 52 if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
53 if (!sk2_ipv6only) { 53 if (!sk2_ipv6only) {
54 if (sk1_rcv_saddr == sk2_rcv_saddr) 54 if (sk1_rcv_saddr == sk2_rcv_saddr)
55 return 1; 55 return true;
56 if (!sk1_rcv_saddr || !sk2_rcv_saddr) 56 if (!sk1_rcv_saddr || !sk2_rcv_saddr)
57 return match_wildcard; 57 return match_wildcard;
58 } 58 }
59 return 0; 59 return false;
60 } 60 }
61 61
62 if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) 62 if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
63 return 1; 63 return true;
64 64
65 if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && 65 if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
66 !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) 66 !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
67 return 1; 67 return true;
68 68
69 if (addr_type == IPV6_ADDR_ANY && match_wildcard && 69 if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
70 !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) 70 !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
71 return 1; 71 return true;
72 72
73 if (sk2_rcv_saddr6 && 73 if (sk2_rcv_saddr6 &&
74 ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6)) 74 ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6))
75 return 1; 75 return true;
76 76
77 return 0; 77 return false;
78} 78}
79#endif 79#endif
80 80
@@ -82,20 +82,20 @@ static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
82 * match_wildcard == false: addresses must be exactly the same, i.e. 82 * match_wildcard == false: addresses must be exactly the same, i.e.
83 * 0.0.0.0 only equals to 0.0.0.0 83 * 0.0.0.0 only equals to 0.0.0.0
84 */ 84 */
85static int ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, 85static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
86 bool sk2_ipv6only, bool match_wildcard) 86 bool sk2_ipv6only, bool match_wildcard)
87{ 87{
88 if (!sk2_ipv6only) { 88 if (!sk2_ipv6only) {
89 if (sk1_rcv_saddr == sk2_rcv_saddr) 89 if (sk1_rcv_saddr == sk2_rcv_saddr)
90 return 1; 90 return true;
91 if (!sk1_rcv_saddr || !sk2_rcv_saddr) 91 if (!sk1_rcv_saddr || !sk2_rcv_saddr)
92 return match_wildcard; 92 return match_wildcard;
93 } 93 }
94 return 0; 94 return false;
95} 95}
96 96
97int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, 97bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
98 bool match_wildcard) 98 bool match_wildcard)
99{ 99{
100#if IS_ENABLED(CONFIG_IPV6) 100#if IS_ENABLED(CONFIG_IPV6)
101 if (sk->sk_family == AF_INET6) 101 if (sk->sk_family == AF_INET6)
@@ -266,7 +266,7 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
266#if IS_ENABLED(CONFIG_IPV6) 266#if IS_ENABLED(CONFIG_IPV6)
267 if (tb->fast_sk_family == AF_INET6) 267 if (tb->fast_sk_family == AF_INET6)
268 return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr, 268 return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr,
269 &sk->sk_v6_rcv_saddr, 269 inet6_rcv_saddr(sk),
270 tb->fast_rcv_saddr, 270 tb->fast_rcv_saddr,
271 sk->sk_rcv_saddr, 271 sk->sk_rcv_saddr,
272 tb->fast_ipv6_only, 272 tb->fast_ipv6_only,
@@ -321,13 +321,14 @@ tb_found:
321 goto fail_unlock; 321 goto fail_unlock;
322 } 322 }
323success: 323success:
324 if (!hlist_empty(&tb->owners)) { 324 if (hlist_empty(&tb->owners)) {
325 tb->fastreuse = reuse; 325 tb->fastreuse = reuse;
326 if (sk->sk_reuseport) { 326 if (sk->sk_reuseport) {
327 tb->fastreuseport = FASTREUSEPORT_ANY; 327 tb->fastreuseport = FASTREUSEPORT_ANY;
328 tb->fastuid = uid; 328 tb->fastuid = uid;
329 tb->fast_rcv_saddr = sk->sk_rcv_saddr; 329 tb->fast_rcv_saddr = sk->sk_rcv_saddr;
330 tb->fast_ipv6_only = ipv6_only_sock(sk); 330 tb->fast_ipv6_only = ipv6_only_sock(sk);
331 tb->fast_sk_family = sk->sk_family;
331#if IS_ENABLED(CONFIG_IPV6) 332#if IS_ENABLED(CONFIG_IPV6)
332 tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; 333 tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
333#endif 334#endif
@@ -354,6 +355,7 @@ success:
354 tb->fastuid = uid; 355 tb->fastuid = uid;
355 tb->fast_rcv_saddr = sk->sk_rcv_saddr; 356 tb->fast_rcv_saddr = sk->sk_rcv_saddr;
356 tb->fast_ipv6_only = ipv6_only_sock(sk); 357 tb->fast_ipv6_only = ipv6_only_sock(sk);
358 tb->fast_sk_family = sk->sk_family;
357#if IS_ENABLED(CONFIG_IPV6) 359#if IS_ENABLED(CONFIG_IPV6)
358 tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; 360 tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
359#endif 361#endif
@@ -473,6 +475,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
473 } 475 }
474 spin_unlock_bh(&queue->fastopenq.lock); 476 spin_unlock_bh(&queue->fastopenq.lock);
475 } 477 }
478 mem_cgroup_sk_alloc(newsk);
476out: 479out:
477 release_sock(sk); 480 release_sock(sk);
478 if (req) 481 if (req)
@@ -492,17 +495,15 @@ EXPORT_SYMBOL(inet_csk_accept);
492 * to optimize. 495 * to optimize.
493 */ 496 */
494void inet_csk_init_xmit_timers(struct sock *sk, 497void inet_csk_init_xmit_timers(struct sock *sk,
495 void (*retransmit_handler)(unsigned long), 498 void (*retransmit_handler)(struct timer_list *t),
496 void (*delack_handler)(unsigned long), 499 void (*delack_handler)(struct timer_list *t),
497 void (*keepalive_handler)(unsigned long)) 500 void (*keepalive_handler)(struct timer_list *t))
498{ 501{
499 struct inet_connection_sock *icsk = inet_csk(sk); 502 struct inet_connection_sock *icsk = inet_csk(sk);
500 503
501 setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler, 504 timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0);
502 (unsigned long)sk); 505 timer_setup(&icsk->icsk_delack_timer, delack_handler, 0);
503 setup_timer(&icsk->icsk_delack_timer, delack_handler, 506 timer_setup(&sk->sk_timer, keepalive_handler, 0);
504 (unsigned long)sk);
505 setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk);
506 icsk->icsk_pending = icsk->icsk_ack.pending = 0; 507 icsk->icsk_pending = icsk->icsk_ack.pending = 0;
507} 508}
508EXPORT_SYMBOL(inet_csk_init_xmit_timers); 509EXPORT_SYMBOL(inet_csk_init_xmit_timers);
@@ -537,9 +538,11 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
537{ 538{
538 const struct inet_request_sock *ireq = inet_rsk(req); 539 const struct inet_request_sock *ireq = inet_rsk(req);
539 struct net *net = read_pnet(&ireq->ireq_net); 540 struct net *net = read_pnet(&ireq->ireq_net);
540 struct ip_options_rcu *opt = ireq->opt; 541 struct ip_options_rcu *opt;
541 struct rtable *rt; 542 struct rtable *rt;
542 543
544 opt = ireq_opt_deref(ireq);
545
543 flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, 546 flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
544 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 547 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
545 sk->sk_protocol, inet_sk_flowi_flags(sk), 548 sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -573,10 +576,9 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
573 struct flowi4 *fl4; 576 struct flowi4 *fl4;
574 struct rtable *rt; 577 struct rtable *rt;
575 578
579 opt = rcu_dereference(ireq->ireq_opt);
576 fl4 = &newinet->cork.fl.u.ip4; 580 fl4 = &newinet->cork.fl.u.ip4;
577 581
578 rcu_read_lock();
579 opt = rcu_dereference(newinet->inet_opt);
580 flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, 582 flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
581 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 583 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
582 sk->sk_protocol, inet_sk_flowi_flags(sk), 584 sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -589,13 +591,11 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
589 goto no_route; 591 goto no_route;
590 if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) 592 if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
591 goto route_err; 593 goto route_err;
592 rcu_read_unlock();
593 return &rt->dst; 594 return &rt->dst;
594 595
595route_err: 596route_err:
596 ip_rt_put(rt); 597 ip_rt_put(rt);
597no_route: 598no_route:
598 rcu_read_unlock();
599 __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); 599 __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
600 return NULL; 600 return NULL;
601} 601}
@@ -674,9 +674,9 @@ void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req
674} 674}
675EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put); 675EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
676 676
677static void reqsk_timer_handler(unsigned long data) 677static void reqsk_timer_handler(struct timer_list *t)
678{ 678{
679 struct request_sock *req = (struct request_sock *)data; 679 struct request_sock *req = from_timer(req, t, rsk_timer);
680 struct sock *sk_listener = req->rsk_listener; 680 struct sock *sk_listener = req->rsk_listener;
681 struct net *net = sock_net(sk_listener); 681 struct net *net = sock_net(sk_listener);
682 struct inet_connection_sock *icsk = inet_csk(sk_listener); 682 struct inet_connection_sock *icsk = inet_csk(sk_listener);
@@ -747,8 +747,7 @@ static void reqsk_queue_hash_req(struct request_sock *req,
747 req->num_timeout = 0; 747 req->num_timeout = 0;
748 req->sk = NULL; 748 req->sk = NULL;
749 749
750 setup_pinned_timer(&req->rsk_timer, reqsk_timer_handler, 750 timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
751 (unsigned long)req);
752 mod_timer(&req->rsk_timer, jiffies + timeout); 751 mod_timer(&req->rsk_timer, jiffies + timeout);
753 752
754 inet_ehash_insert(req_to_sk(req), NULL); 753 inet_ehash_insert(req_to_sk(req), NULL);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index af74d0433453..26a3d0315728 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -147,7 +147,7 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
147 spin_unlock(&hb->chain_lock); 147 spin_unlock(&hb->chain_lock);
148 148
149 hlist_for_each_entry_safe(fq, n, &expired, list_evictor) 149 hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
150 f->frag_expire((unsigned long) fq); 150 f->frag_expire(&fq->timer);
151 151
152 return evicted; 152 return evicted;
153} 153}
@@ -164,7 +164,7 @@ static void inet_frag_worker(struct work_struct *work)
164 164
165 local_bh_disable(); 165 local_bh_disable();
166 166
167 for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) { 167 for (i = READ_ONCE(f->next_bucket); budget; --budget) {
168 evicted += inet_evict_bucket(f, &f->hash[i]); 168 evicted += inet_evict_bucket(f, &f->hash[i]);
169 i = (i + 1) & (INETFRAGS_HASHSZ - 1); 169 i = (i + 1) & (INETFRAGS_HASHSZ - 1);
170 if (evicted > INETFRAGS_EVICT_MAX) 170 if (evicted > INETFRAGS_EVICT_MAX)
@@ -366,7 +366,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
366 f->constructor(q, arg); 366 f->constructor(q, arg);
367 add_frag_mem_limit(nf, f->qsize); 367 add_frag_mem_limit(nf, f->qsize);
368 368
369 setup_timer(&q->timer, f->frag_expire, (unsigned long)q); 369 timer_setup(&q->timer, f->frag_expire, 0);
370 spin_lock_init(&q->lock); 370 spin_lock_init(&q->lock);
371 refcount_set(&q->refcnt, 1); 371 refcount_set(&q->refcnt, 1);
372 372
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 597bb4cfe805..e7d15fb0d94d 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -456,10 +456,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
456 return reuseport_add_sock(sk, sk2); 456 return reuseport_add_sock(sk, sk2);
457 } 457 }
458 458
459 /* Initial allocation may have already happened via setsockopt */ 459 return reuseport_alloc(sk);
460 if (!rcu_access_pointer(sk->sk_reuseport_cb))
461 return reuseport_alloc(sk);
462 return 0;
463} 460}
464 461
465int __inet_hash(struct sock *sk, struct sock *osk) 462int __inet_hash(struct sock *sk, struct sock *osk)
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 5b039159e67a..c690cd0d9b3f 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -9,7 +9,6 @@
9 */ 9 */
10 10
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/kmemcheck.h>
13#include <linux/slab.h> 12#include <linux/slab.h>
14#include <linux/module.h> 13#include <linux/module.h>
15#include <net/inet_hashtables.h> 14#include <net/inet_hashtables.h>
@@ -142,9 +141,9 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
142} 141}
143EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); 142EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
144 143
145static void tw_timer_handler(unsigned long data) 144static void tw_timer_handler(struct timer_list *t)
146{ 145{
147 struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data; 146 struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer);
148 147
149 if (tw->tw_kill) 148 if (tw->tw_kill)
150 __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); 149 __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
@@ -167,8 +166,6 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
167 if (tw) { 166 if (tw) {
168 const struct inet_sock *inet = inet_sk(sk); 167 const struct inet_sock *inet = inet_sk(sk);
169 168
170 kmemcheck_annotate_bitfield(tw, flags);
171
172 tw->tw_dr = dr; 169 tw->tw_dr = dr;
173 /* Give us an identity. */ 170 /* Give us an identity. */
174 tw->tw_daddr = inet->inet_daddr; 171 tw->tw_daddr = inet->inet_daddr;
@@ -188,8 +185,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
188 tw->tw_prot = sk->sk_prot_creator; 185 tw->tw_prot = sk->sk_prot_creator;
189 atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie)); 186 atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
190 twsk_net_set(tw, sock_net(sk)); 187 twsk_net_set(tw, sock_net(sk));
191 setup_pinned_timer(&tw->tw_timer, tw_timer_handler, 188 timer_setup(&tw->tw_timer, tw_timer_handler, TIMER_PINNED);
192 (unsigned long)tw);
193 /* 189 /*
194 * Because we use RCU lookups, we should not set tw_refcnt 190 * Because we use RCU lookups, we should not set tw_refcnt
195 * to a non null value before everything is setup for this 191 * to a non null value before everything is setup for this
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index e7eb590c86ce..914d56928578 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -128,9 +128,9 @@ static struct inet_peer *lookup(const struct inetpeer_addr *daddr,
128 break; 128 break;
129 } 129 }
130 if (cmp == -1) 130 if (cmp == -1)
131 pp = &(*pp)->rb_left; 131 pp = &next->rb_left;
132 else 132 else
133 pp = &(*pp)->rb_right; 133 pp = &next->rb_right;
134 } 134 }
135 *parent_p = parent; 135 *parent_p = parent;
136 *pp_p = pp; 136 *pp_p = pp;
@@ -284,14 +284,17 @@ EXPORT_SYMBOL(inet_peer_xrlim_allow);
284 284
285void inetpeer_invalidate_tree(struct inet_peer_base *base) 285void inetpeer_invalidate_tree(struct inet_peer_base *base)
286{ 286{
287 struct inet_peer *p, *n; 287 struct rb_node *p = rb_first(&base->rb_root);
288 288
289 rbtree_postorder_for_each_entry_safe(p, n, &base->rb_root, rb_node) { 289 while (p) {
290 inet_putpeer(p); 290 struct inet_peer *peer = rb_entry(p, struct inet_peer, rb_node);
291
292 p = rb_next(p);
293 rb_erase(&peer->rb_node, &base->rb_root);
294 inet_putpeer(peer);
291 cond_resched(); 295 cond_resched();
292 } 296 }
293 297
294 base->rb_root = RB_ROOT;
295 base->total = 0; 298 base->total = 0;
296} 299}
297EXPORT_SYMBOL(inetpeer_invalidate_tree); 300EXPORT_SYMBOL(inetpeer_invalidate_tree);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 9f0a7b96646f..2dd21c3281a1 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket 4 * operating system. INET is implemented using the BSD Socket
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 46408c220d9d..bbf1b94942c0 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket 4 * operating system. INET is implemented using the BSD Socket
@@ -190,12 +191,13 @@ static bool frag_expire_skip_icmp(u32 user)
190/* 191/*
191 * Oops, a fragment queue timed out. Kill it and send an ICMP reply. 192 * Oops, a fragment queue timed out. Kill it and send an ICMP reply.
192 */ 193 */
193static void ip_expire(unsigned long arg) 194static void ip_expire(struct timer_list *t)
194{ 195{
196 struct inet_frag_queue *frag = from_timer(frag, t, timer);
195 struct ipq *qp; 197 struct ipq *qp;
196 struct net *net; 198 struct net *net;
197 199
198 qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); 200 qp = container_of(frag, struct ipq, q);
199 net = container_of(qp->q.net, struct net, ipv4.frags); 201 net = container_of(qp->q.net, struct net, ipv4.frags);
200 202
201 rcu_read_lock(); 203 rcu_read_lock();
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0162fb955b33..bb6239169b1a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -259,7 +259,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
259 struct ip_tunnel *tunnel; 259 struct ip_tunnel *tunnel;
260 struct erspanhdr *ershdr; 260 struct erspanhdr *ershdr;
261 const struct iphdr *iph; 261 const struct iphdr *iph;
262 __be32 session_id;
263 __be32 index; 262 __be32 index;
264 int len; 263 int len;
265 264
@@ -275,8 +274,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
275 /* The original GRE header does not have key field, 274 /* The original GRE header does not have key field,
276 * Use ERSPAN 10-bit session ID as key. 275 * Use ERSPAN 10-bit session ID as key.
277 */ 276 */
278 session_id = cpu_to_be32(ntohs(ershdr->session_id)); 277 tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
279 tpi->key = session_id;
280 index = ershdr->md.index; 278 index = ershdr->md.index;
281 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, 279 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
282 tpi->flags | TUNNEL_KEY, 280 tpi->flags | TUNNEL_KEY,
@@ -581,8 +579,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
581 if (gre_handle_offloads(skb, false)) 579 if (gre_handle_offloads(skb, false))
582 goto err_free_rt; 580 goto err_free_rt;
583 581
584 if (skb->len > dev->mtu) { 582 if (skb->len > dev->mtu + dev->hard_header_len) {
585 pskb_trim(skb, dev->mtu); 583 pskb_trim(skb, dev->mtu + dev->hard_header_len);
586 truncate = true; 584 truncate = true;
587 } 585 }
588 586
@@ -733,8 +731,8 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
733 if (skb_cow_head(skb, dev->needed_headroom)) 731 if (skb_cow_head(skb, dev->needed_headroom))
734 goto free_skb; 732 goto free_skb;
735 733
736 if (skb->len > dev->mtu) { 734 if (skb->len > dev->mtu + dev->hard_header_len) {
737 pskb_trim(skb, dev->mtu); 735 pskb_trim(skb, dev->mtu + dev->hard_header_len);
738 truncate = true; 736 truncate = true;
739 } 737 }
740 738
@@ -775,20 +773,46 @@ free_skb:
775 return NETDEV_TX_OK; 773 return NETDEV_TX_OK;
776} 774}
777 775
776static void ipgre_link_update(struct net_device *dev, bool set_mtu)
777{
778 struct ip_tunnel *tunnel = netdev_priv(dev);
779 int len;
780
781 len = tunnel->tun_hlen;
782 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
783 len = tunnel->tun_hlen - len;
784 tunnel->hlen = tunnel->hlen + len;
785
786 dev->needed_headroom = dev->needed_headroom + len;
787 if (set_mtu)
788 dev->mtu = max_t(int, dev->mtu - len, 68);
789
790 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
791 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
792 tunnel->encap.type == TUNNEL_ENCAP_NONE) {
793 dev->features |= NETIF_F_GSO_SOFTWARE;
794 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
795 }
796 dev->features |= NETIF_F_LLTX;
797 }
798}
799
778static int ipgre_tunnel_ioctl(struct net_device *dev, 800static int ipgre_tunnel_ioctl(struct net_device *dev,
779 struct ifreq *ifr, int cmd) 801 struct ifreq *ifr, int cmd)
780{ 802{
781 int err;
782 struct ip_tunnel_parm p; 803 struct ip_tunnel_parm p;
804 int err;
783 805
784 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 806 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
785 return -EFAULT; 807 return -EFAULT;
808
786 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { 809 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
787 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || 810 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
788 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || 811 p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) ||
789 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) 812 ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING)))
790 return -EINVAL; 813 return -EINVAL;
791 } 814 }
815
792 p.i_flags = gre_flags_to_tnl_flags(p.i_flags); 816 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
793 p.o_flags = gre_flags_to_tnl_flags(p.o_flags); 817 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
794 818
@@ -796,11 +820,22 @@ static int ipgre_tunnel_ioctl(struct net_device *dev,
796 if (err) 820 if (err)
797 return err; 821 return err;
798 822
823 if (cmd == SIOCCHGTUNNEL) {
824 struct ip_tunnel *t = netdev_priv(dev);
825
826 t->parms.i_flags = p.i_flags;
827 t->parms.o_flags = p.o_flags;
828
829 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
830 ipgre_link_update(dev, true);
831 }
832
799 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags); 833 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
800 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags); 834 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
801 835
802 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 836 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
803 return -EFAULT; 837 return -EFAULT;
838
804 return 0; 839 return 0;
805} 840}
806 841
@@ -1013,15 +1048,14 @@ static int __net_init ipgre_init_net(struct net *net)
1013 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); 1048 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1014} 1049}
1015 1050
1016static void __net_exit ipgre_exit_net(struct net *net) 1051static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1017{ 1052{
1018 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id); 1053 ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1019 ip_tunnel_delete_net(itn, &ipgre_link_ops);
1020} 1054}
1021 1055
1022static struct pernet_operations ipgre_net_ops = { 1056static struct pernet_operations ipgre_net_ops = {
1023 .init = ipgre_init_net, 1057 .init = ipgre_init_net,
1024 .exit = ipgre_exit_net, 1058 .exit_batch = ipgre_exit_batch_net,
1025 .id = &ipgre_net_id, 1059 .id = &ipgre_net_id,
1026 .size = sizeof(struct ip_tunnel_net), 1060 .size = sizeof(struct ip_tunnel_net),
1027}; 1061};
@@ -1223,6 +1257,7 @@ static int gre_tap_init(struct net_device *dev)
1223{ 1257{
1224 __gre_tunnel_init(dev); 1258 __gre_tunnel_init(dev);
1225 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1259 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1260 netif_keep_dst(dev);
1226 1261
1227 return ip_tunnel_init(dev); 1262 return ip_tunnel_init(dev);
1228} 1263}
@@ -1246,13 +1281,16 @@ static int erspan_tunnel_init(struct net_device *dev)
1246 1281
1247 tunnel->tun_hlen = 8; 1282 tunnel->tun_hlen = 8;
1248 tunnel->parms.iph.protocol = IPPROTO_GRE; 1283 tunnel->parms.iph.protocol = IPPROTO_GRE;
1249 t_hlen = tunnel->hlen + sizeof(struct iphdr) + sizeof(struct erspanhdr); 1284 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1285 sizeof(struct erspanhdr);
1286 t_hlen = tunnel->hlen + sizeof(struct iphdr);
1250 1287
1251 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4; 1288 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
1252 dev->mtu = ETH_DATA_LEN - t_hlen - 4; 1289 dev->mtu = ETH_DATA_LEN - t_hlen - 4;
1253 dev->features |= GRE_FEATURES; 1290 dev->features |= GRE_FEATURES;
1254 dev->hw_features |= GRE_FEATURES; 1291 dev->hw_features |= GRE_FEATURES;
1255 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1292 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1293 netif_keep_dst(dev);
1256 1294
1257 return ip_tunnel_init(dev); 1295 return ip_tunnel_init(dev);
1258} 1296}
@@ -1306,9 +1344,9 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1306 struct netlink_ext_ack *extack) 1344 struct netlink_ext_ack *extack)
1307{ 1345{
1308 struct ip_tunnel *t = netdev_priv(dev); 1346 struct ip_tunnel *t = netdev_priv(dev);
1309 struct ip_tunnel_parm p;
1310 struct ip_tunnel_encap ipencap; 1347 struct ip_tunnel_encap ipencap;
1311 __u32 fwmark = t->fwmark; 1348 __u32 fwmark = t->fwmark;
1349 struct ip_tunnel_parm p;
1312 int err; 1350 int err;
1313 1351
1314 if (ipgre_netlink_encap_parms(data, &ipencap)) { 1352 if (ipgre_netlink_encap_parms(data, &ipencap)) {
@@ -1321,7 +1359,18 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1321 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); 1359 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1322 if (err < 0) 1360 if (err < 0)
1323 return err; 1361 return err;
1324 return ip_tunnel_changelink(dev, tb, &p, fwmark); 1362
1363 err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1364 if (err < 0)
1365 return err;
1366
1367 t->parms.i_flags = p.i_flags;
1368 t->parms.o_flags = p.o_flags;
1369
1370 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
1371 ipgre_link_update(dev, !tb[IFLA_MTU]);
1372
1373 return 0;
1325} 1374}
1326 1375
1327static size_t ipgre_get_size(const struct net_device *dev) 1376static size_t ipgre_get_size(const struct net_device *dev)
@@ -1540,15 +1589,14 @@ static int __net_init ipgre_tap_init_net(struct net *net)
1540 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0"); 1589 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1541} 1590}
1542 1591
1543static void __net_exit ipgre_tap_exit_net(struct net *net) 1592static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1544{ 1593{
1545 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id); 1594 ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1546 ip_tunnel_delete_net(itn, &ipgre_tap_ops);
1547} 1595}
1548 1596
1549static struct pernet_operations ipgre_tap_net_ops = { 1597static struct pernet_operations ipgre_tap_net_ops = {
1550 .init = ipgre_tap_init_net, 1598 .init = ipgre_tap_init_net,
1551 .exit = ipgre_tap_exit_net, 1599 .exit_batch = ipgre_tap_exit_batch_net,
1552 .id = &gre_tap_net_id, 1600 .id = &gre_tap_net_id,
1553 .size = sizeof(struct ip_tunnel_net), 1601 .size = sizeof(struct ip_tunnel_net),
1554}; 1602};
@@ -1559,16 +1607,14 @@ static int __net_init erspan_init_net(struct net *net)
1559 &erspan_link_ops, "erspan0"); 1607 &erspan_link_ops, "erspan0");
1560} 1608}
1561 1609
1562static void __net_exit erspan_exit_net(struct net *net) 1610static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1563{ 1611{
1564 struct ip_tunnel_net *itn = net_generic(net, erspan_net_id); 1612 ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1565
1566 ip_tunnel_delete_net(itn, &erspan_link_ops);
1567} 1613}
1568 1614
1569static struct pernet_operations erspan_net_ops = { 1615static struct pernet_operations erspan_net_ops = {
1570 .init = erspan_init_net, 1616 .init = erspan_init_net,
1571 .exit = erspan_exit_net, 1617 .exit_batch = erspan_exit_batch_net,
1572 .id = &erspan_net_id, 1618 .id = &erspan_net_id,
1573 .size = sizeof(struct ip_tunnel_net), 1619 .size = sizeof(struct ip_tunnel_net),
1574}; 1620};
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index fa2dc8f692c6..57fc13c6ab2b 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -311,9 +311,10 @@ drop:
311static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 311static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
312{ 312{
313 const struct iphdr *iph = ip_hdr(skb); 313 const struct iphdr *iph = ip_hdr(skb);
314 struct rtable *rt; 314 int (*edemux)(struct sk_buff *skb);
315 struct net_device *dev = skb->dev; 315 struct net_device *dev = skb->dev;
316 void (*edemux)(struct sk_buff *skb); 316 struct rtable *rt;
317 int err;
317 318
318 /* if ingress device is enslaved to an L3 master device pass the 319 /* if ingress device is enslaved to an L3 master device pass the
319 * skb to its handler for processing 320 * skb to its handler for processing
@@ -331,7 +332,9 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
331 332
332 ipprot = rcu_dereference(inet_protos[protocol]); 333 ipprot = rcu_dereference(inet_protos[protocol]);
333 if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) { 334 if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
334 edemux(skb); 335 err = edemux(skb);
336 if (unlikely(err))
337 goto drop_error;
335 /* must reload iph, skb->head might have changed */ 338 /* must reload iph, skb->head might have changed */
336 iph = ip_hdr(skb); 339 iph = ip_hdr(skb);
337 } 340 }
@@ -342,13 +345,10 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
342 * how the packet travels inside Linux networking. 345 * how the packet travels inside Linux networking.
343 */ 346 */
344 if (!skb_valid_dst(skb)) { 347 if (!skb_valid_dst(skb)) {
345 int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, 348 err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
346 iph->tos, dev); 349 iph->tos, dev);
347 if (unlikely(err)) { 350 if (unlikely(err))
348 if (err == -EXDEV) 351 goto drop_error;
349 __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
350 goto drop;
351 }
352 } 352 }
353 353
354#ifdef CONFIG_IP_ROUTE_CLASSID 354#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -399,6 +399,11 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
399drop: 399drop:
400 kfree_skb(skb); 400 kfree_skb(skb);
401 return NET_RX_DROP; 401 return NET_RX_DROP;
402
403drop_error:
404 if (err == -EXDEV)
405 __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
406 goto drop;
402} 407}
403 408
404/* 409/*
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 525ae88d1e58..ed194d46c00e 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket 4 * operating system. INET is implemented using the BSD Socket
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index a599aa83fdad..60fb1eb7d7d8 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket 4 * operating system. INET is implemented using the BSD Socket
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index e9805ad664ac..fe6fee728ce4 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -1061,16 +1061,22 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1061 } 1061 }
1062} 1062}
1063 1063
1064void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) 1064void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1065 struct rtnl_link_ops *ops)
1065{ 1066{
1067 struct ip_tunnel_net *itn;
1068 struct net *net;
1066 LIST_HEAD(list); 1069 LIST_HEAD(list);
1067 1070
1068 rtnl_lock(); 1071 rtnl_lock();
1069 ip_tunnel_destroy(itn, &list, ops); 1072 list_for_each_entry(net, net_list, exit_list) {
1073 itn = net_generic(net, id);
1074 ip_tunnel_destroy(itn, &list, ops);
1075 }
1070 unregister_netdevice_many(&list); 1076 unregister_netdevice_many(&list);
1071 rtnl_unlock(); 1077 rtnl_unlock();
1072} 1078}
1073EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); 1079EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1074 1080
1075int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], 1081int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1076 struct ip_tunnel_parm *p, __u32 fwmark) 1082 struct ip_tunnel_parm *p, __u32 fwmark)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 5ed63d250950..949f432a5f04 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -168,6 +168,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
168 struct ip_tunnel_parm *parms = &tunnel->parms; 168 struct ip_tunnel_parm *parms = &tunnel->parms;
169 struct dst_entry *dst = skb_dst(skb); 169 struct dst_entry *dst = skb_dst(skb);
170 struct net_device *tdev; /* Device to other host */ 170 struct net_device *tdev; /* Device to other host */
171 int pkt_len = skb->len;
171 int err; 172 int err;
172 int mtu; 173 int mtu;
173 174
@@ -197,15 +198,6 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
197 goto tx_error; 198 goto tx_error;
198 } 199 }
199 200
200 if (tunnel->err_count > 0) {
201 if (time_before(jiffies,
202 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
203 tunnel->err_count--;
204 dst_link_failure(skb);
205 } else
206 tunnel->err_count = 0;
207 }
208
209 mtu = dst_mtu(dst); 201 mtu = dst_mtu(dst);
210 if (skb->len > mtu) { 202 if (skb->len > mtu) {
211 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 203 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
@@ -229,7 +221,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
229 221
230 err = dst_output(tunnel->net, skb->sk, skb); 222 err = dst_output(tunnel->net, skb->sk, skb);
231 if (net_xmit_eval(err) == 0) 223 if (net_xmit_eval(err) == 0)
232 err = skb->len; 224 err = pkt_len;
233 iptunnel_xmit_stats(dev, err); 225 iptunnel_xmit_stats(dev, err);
234 return NETDEV_TX_OK; 226 return NETDEV_TX_OK;
235 227
@@ -452,15 +444,14 @@ static int __net_init vti_init_net(struct net *net)
452 return 0; 444 return 0;
453} 445}
454 446
455static void __net_exit vti_exit_net(struct net *net) 447static void __net_exit vti_exit_batch_net(struct list_head *list_net)
456{ 448{
457 struct ip_tunnel_net *itn = net_generic(net, vti_net_id); 449 ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops);
458 ip_tunnel_delete_net(itn, &vti_link_ops);
459} 450}
460 451
461static struct pernet_operations vti_net_ops = { 452static struct pernet_operations vti_net_ops = {
462 .init = vti_init_net, 453 .init = vti_init_net,
463 .exit = vti_exit_net, 454 .exit_batch = vti_exit_batch_net,
464 .id = &vti_net_id, 455 .id = &vti_net_id,
465 .size = sizeof(struct ip_tunnel_net), 456 .size = sizeof(struct ip_tunnel_net),
466}; 457};
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 4c5dfe6bd34d..abdebca848c9 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Automatic Configuration of IP -- use DHCP, BOOTP, RARP, or 3 * Automatic Configuration of IP -- use DHCP, BOOTP, RARP, or
3 * user-supplied information to configure own IP address and routes. 4 * user-supplied information to configure own IP address and routes.
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index fb1ad22b5e29..c891235b4966 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -128,43 +128,68 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly;
128 128
129static int ipip_err(struct sk_buff *skb, u32 info) 129static int ipip_err(struct sk_buff *skb, u32 info)
130{ 130{
131 131 /* All the routers (except for Linux) return only
132/* All the routers (except for Linux) return only 132 * 8 bytes of packet payload. It means, that precise relaying of
133 8 bytes of packet payload. It means, that precise relaying of 133 * ICMP in the real Internet is absolutely infeasible.
134 ICMP in the real Internet is absolutely infeasible. 134 */
135 */
136 struct net *net = dev_net(skb->dev); 135 struct net *net = dev_net(skb->dev);
137 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); 136 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
138 const struct iphdr *iph = (const struct iphdr *)skb->data; 137 const struct iphdr *iph = (const struct iphdr *)skb->data;
139 struct ip_tunnel *t;
140 int err;
141 const int type = icmp_hdr(skb)->type; 138 const int type = icmp_hdr(skb)->type;
142 const int code = icmp_hdr(skb)->code; 139 const int code = icmp_hdr(skb)->code;
140 struct ip_tunnel *t;
141 int err = 0;
142
143 switch (type) {
144 case ICMP_DEST_UNREACH:
145 switch (code) {
146 case ICMP_SR_FAILED:
147 /* Impossible event. */
148 goto out;
149 default:
150 /* All others are translated to HOST_UNREACH.
151 * rfc2003 contains "deep thoughts" about NET_UNREACH,
152 * I believe they are just ether pollution. --ANK
153 */
154 break;
155 }
156 break;
157
158 case ICMP_TIME_EXCEEDED:
159 if (code != ICMP_EXC_TTL)
160 goto out;
161 break;
162
163 case ICMP_REDIRECT:
164 break;
165
166 default:
167 goto out;
168 }
143 169
144 err = -ENOENT;
145 t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, 170 t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
146 iph->daddr, iph->saddr, 0); 171 iph->daddr, iph->saddr, 0);
147 if (!t) 172 if (!t) {
173 err = -ENOENT;
148 goto out; 174 goto out;
175 }
149 176
150 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { 177 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
151 ipv4_update_pmtu(skb, dev_net(skb->dev), info, 178 ipv4_update_pmtu(skb, net, info, t->parms.link, 0,
152 t->parms.link, 0, iph->protocol, 0); 179 iph->protocol, 0);
153 err = 0;
154 goto out; 180 goto out;
155 } 181 }
156 182
157 if (type == ICMP_REDIRECT) { 183 if (type == ICMP_REDIRECT) {
158 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, 184 ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0);
159 iph->protocol, 0);
160 err = 0;
161 goto out; 185 goto out;
162 } 186 }
163 187
164 if (t->parms.iph.daddr == 0) 188 if (t->parms.iph.daddr == 0) {
189 err = -ENOENT;
165 goto out; 190 goto out;
191 }
166 192
167 err = 0;
168 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 193 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
169 goto out; 194 goto out;
170 195
@@ -634,15 +659,14 @@ static int __net_init ipip_init_net(struct net *net)
634 return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0"); 659 return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
635} 660}
636 661
637static void __net_exit ipip_exit_net(struct net *net) 662static void __net_exit ipip_exit_batch_net(struct list_head *list_net)
638{ 663{
639 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); 664 ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops);
640 ip_tunnel_delete_net(itn, &ipip_link_ops);
641} 665}
642 666
643static struct pernet_operations ipip_net_ops = { 667static struct pernet_operations ipip_net_ops = {
644 .init = ipip_init_net, 668 .init = ipip_init_net,
645 .exit = ipip_exit_net, 669 .exit_batch = ipip_exit_batch_net,
646 .id = &ipip_net_id, 670 .id = &ipip_net_id,
647 .size = sizeof(struct ip_tunnel_net), 671 .size = sizeof(struct ip_tunnel_net),
648}; 672};
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c9b3e6e069ae..fd5f19c988e4 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -67,6 +67,7 @@
67#include <net/fib_rules.h> 67#include <net/fib_rules.h>
68#include <linux/netconf.h> 68#include <linux/netconf.h>
69#include <net/nexthop.h> 69#include <net/nexthop.h>
70#include <net/switchdev.h>
70 71
71struct ipmr_rule { 72struct ipmr_rule {
72 struct fib_rule common; 73 struct fib_rule common;
@@ -111,7 +112,7 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
111 int cmd); 112 int cmd);
112static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); 113static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
113static void mroute_clean_tables(struct mr_table *mrt, bool all); 114static void mroute_clean_tables(struct mr_table *mrt, bool all);
114static void ipmr_expire_process(unsigned long arg); 115static void ipmr_expire_process(struct timer_list *t);
115 116
116#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 117#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
117#define ipmr_for_each_table(mrt, net) \ 118#define ipmr_for_each_table(mrt, net) \
@@ -264,6 +265,22 @@ static void __net_exit ipmr_rules_exit(struct net *net)
264 fib_rules_unregister(net->ipv4.mr_rules_ops); 265 fib_rules_unregister(net->ipv4.mr_rules_ops);
265 rtnl_unlock(); 266 rtnl_unlock();
266} 267}
268
269static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
270{
271 return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR);
272}
273
274static unsigned int ipmr_rules_seq_read(struct net *net)
275{
276 return fib_rules_seq_read(net, RTNL_FAMILY_IPMR);
277}
278
279bool ipmr_rule_default(const struct fib_rule *rule)
280{
281 return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT;
282}
283EXPORT_SYMBOL(ipmr_rule_default);
267#else 284#else
268#define ipmr_for_each_table(mrt, net) \ 285#define ipmr_for_each_table(mrt, net) \
269 for (mrt = net->ipv4.mrt; mrt; mrt = NULL) 286 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
@@ -298,6 +315,22 @@ static void __net_exit ipmr_rules_exit(struct net *net)
298 net->ipv4.mrt = NULL; 315 net->ipv4.mrt = NULL;
299 rtnl_unlock(); 316 rtnl_unlock();
300} 317}
318
319static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
320{
321 return 0;
322}
323
324static unsigned int ipmr_rules_seq_read(struct net *net)
325{
326 return 0;
327}
328
329bool ipmr_rule_default(const struct fib_rule *rule)
330{
331 return true;
332}
333EXPORT_SYMBOL(ipmr_rule_default);
301#endif 334#endif
302 335
303static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, 336static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
@@ -342,8 +375,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
342 INIT_LIST_HEAD(&mrt->mfc_cache_list); 375 INIT_LIST_HEAD(&mrt->mfc_cache_list);
343 INIT_LIST_HEAD(&mrt->mfc_unres_queue); 376 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
344 377
345 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, 378 timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
346 (unsigned long)mrt);
347 379
348 mrt->mroute_reg_vif_num = -1; 380 mrt->mroute_reg_vif_num = -1;
349#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 381#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -587,6 +619,82 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
587} 619}
588#endif 620#endif
589 621
622static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
623 struct net *net,
624 enum fib_event_type event_type,
625 struct vif_device *vif,
626 vifi_t vif_index, u32 tb_id)
627{
628 struct vif_entry_notifier_info info = {
629 .info = {
630 .family = RTNL_FAMILY_IPMR,
631 .net = net,
632 },
633 .dev = vif->dev,
634 .vif_index = vif_index,
635 .vif_flags = vif->flags,
636 .tb_id = tb_id,
637 };
638
639 return call_fib_notifier(nb, net, event_type, &info.info);
640}
641
642static int call_ipmr_vif_entry_notifiers(struct net *net,
643 enum fib_event_type event_type,
644 struct vif_device *vif,
645 vifi_t vif_index, u32 tb_id)
646{
647 struct vif_entry_notifier_info info = {
648 .info = {
649 .family = RTNL_FAMILY_IPMR,
650 .net = net,
651 },
652 .dev = vif->dev,
653 .vif_index = vif_index,
654 .vif_flags = vif->flags,
655 .tb_id = tb_id,
656 };
657
658 ASSERT_RTNL();
659 net->ipv4.ipmr_seq++;
660 return call_fib_notifiers(net, event_type, &info.info);
661}
662
663static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
664 struct net *net,
665 enum fib_event_type event_type,
666 struct mfc_cache *mfc, u32 tb_id)
667{
668 struct mfc_entry_notifier_info info = {
669 .info = {
670 .family = RTNL_FAMILY_IPMR,
671 .net = net,
672 },
673 .mfc = mfc,
674 .tb_id = tb_id
675 };
676
677 return call_fib_notifier(nb, net, event_type, &info.info);
678}
679
680static int call_ipmr_mfc_entry_notifiers(struct net *net,
681 enum fib_event_type event_type,
682 struct mfc_cache *mfc, u32 tb_id)
683{
684 struct mfc_entry_notifier_info info = {
685 .info = {
686 .family = RTNL_FAMILY_IPMR,
687 .net = net,
688 },
689 .mfc = mfc,
690 .tb_id = tb_id
691 };
692
693 ASSERT_RTNL();
694 net->ipv4.ipmr_seq++;
695 return call_fib_notifiers(net, event_type, &info.info);
696}
697
590/** 698/**
591 * vif_delete - Delete a VIF entry 699 * vif_delete - Delete a VIF entry
592 * @notify: Set to 1, if the caller is a notifier_call 700 * @notify: Set to 1, if the caller is a notifier_call
@@ -594,6 +702,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
594static int vif_delete(struct mr_table *mrt, int vifi, int notify, 702static int vif_delete(struct mr_table *mrt, int vifi, int notify,
595 struct list_head *head) 703 struct list_head *head)
596{ 704{
705 struct net *net = read_pnet(&mrt->net);
597 struct vif_device *v; 706 struct vif_device *v;
598 struct net_device *dev; 707 struct net_device *dev;
599 struct in_device *in_dev; 708 struct in_device *in_dev;
@@ -603,6 +712,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
603 712
604 v = &mrt->vif_table[vifi]; 713 v = &mrt->vif_table[vifi];
605 714
715 if (VIF_EXISTS(mrt, vifi))
716 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi,
717 mrt->id);
718
606 write_lock_bh(&mrt_lock); 719 write_lock_bh(&mrt_lock);
607 dev = v->dev; 720 dev = v->dev;
608 v->dev = NULL; 721 v->dev = NULL;
@@ -652,10 +765,11 @@ static void ipmr_cache_free_rcu(struct rcu_head *head)
652 kmem_cache_free(mrt_cachep, c); 765 kmem_cache_free(mrt_cachep, c);
653} 766}
654 767
655static inline void ipmr_cache_free(struct mfc_cache *c) 768void ipmr_cache_free(struct mfc_cache *c)
656{ 769{
657 call_rcu(&c->rcu, ipmr_cache_free_rcu); 770 call_rcu(&c->rcu, ipmr_cache_free_rcu);
658} 771}
772EXPORT_SYMBOL(ipmr_cache_free);
659 773
660/* Destroy an unresolved cache entry, killing queued skbs 774/* Destroy an unresolved cache entry, killing queued skbs
661 * and reporting error to netlink readers. 775 * and reporting error to netlink readers.
@@ -689,9 +803,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
689} 803}
690 804
691/* Timer process for the unresolved queue. */ 805/* Timer process for the unresolved queue. */
692static void ipmr_expire_process(unsigned long arg) 806static void ipmr_expire_process(struct timer_list *t)
693{ 807{
694 struct mr_table *mrt = (struct mr_table *)arg; 808 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
695 unsigned long now; 809 unsigned long now;
696 unsigned long expires; 810 unsigned long expires;
697 struct mfc_cache *c, *next; 811 struct mfc_cache *c, *next;
@@ -754,6 +868,9 @@ static int vif_add(struct net *net, struct mr_table *mrt,
754 struct vifctl *vifc, int mrtsock) 868 struct vifctl *vifc, int mrtsock)
755{ 869{
756 int vifi = vifc->vifc_vifi; 870 int vifi = vifc->vifc_vifi;
871 struct switchdev_attr attr = {
872 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
873 };
757 struct vif_device *v = &mrt->vif_table[vifi]; 874 struct vif_device *v = &mrt->vif_table[vifi];
758 struct net_device *dev; 875 struct net_device *dev;
759 struct in_device *in_dev; 876 struct in_device *in_dev;
@@ -828,6 +945,13 @@ static int vif_add(struct net *net, struct mr_table *mrt,
828 945
829 /* Fill in the VIF structures */ 946 /* Fill in the VIF structures */
830 947
948 attr.orig_dev = dev;
949 if (!switchdev_port_attr_get(dev, &attr)) {
950 memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len);
951 v->dev_parent_id.id_len = attr.u.ppid.id_len;
952 } else {
953 v->dev_parent_id.id_len = 0;
954 }
831 v->rate_limit = vifc->vifc_rate_limit; 955 v->rate_limit = vifc->vifc_rate_limit;
832 v->local = vifc->vifc_lcl_addr.s_addr; 956 v->local = vifc->vifc_lcl_addr.s_addr;
833 v->remote = vifc->vifc_rmt_addr.s_addr; 957 v->remote = vifc->vifc_rmt_addr.s_addr;
@@ -851,6 +975,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
851 if (vifi+1 > mrt->maxvif) 975 if (vifi+1 > mrt->maxvif)
852 mrt->maxvif = vifi+1; 976 mrt->maxvif = vifi+1;
853 write_unlock_bh(&mrt_lock); 977 write_unlock_bh(&mrt_lock);
978 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id);
854 return 0; 979 return 0;
855} 980}
856 981
@@ -949,6 +1074,7 @@ static struct mfc_cache *ipmr_cache_alloc(void)
949 if (c) { 1074 if (c) {
950 c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 1075 c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
951 c->mfc_un.res.minvif = MAXVIFS; 1076 c->mfc_un.res.minvif = MAXVIFS;
1077 refcount_set(&c->mfc_un.res.refcount, 1);
952 } 1078 }
953 return c; 1079 return c;
954} 1080}
@@ -1150,6 +1276,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
1150 1276
1151static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) 1277static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
1152{ 1278{
1279 struct net *net = read_pnet(&mrt->net);
1153 struct mfc_cache *c; 1280 struct mfc_cache *c;
1154 1281
1155 /* The entries are added/deleted only under RTNL */ 1282 /* The entries are added/deleted only under RTNL */
@@ -1161,8 +1288,9 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
1161 return -ENOENT; 1288 return -ENOENT;
1162 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); 1289 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
1163 list_del_rcu(&c->list); 1290 list_del_rcu(&c->list);
1291 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
1164 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1292 mroute_netlink_event(mrt, c, RTM_DELROUTE);
1165 ipmr_cache_free(c); 1293 ipmr_cache_put(c);
1166 1294
1167 return 0; 1295 return 0;
1168} 1296}
@@ -1189,6 +1317,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1189 if (!mrtsock) 1317 if (!mrtsock)
1190 c->mfc_flags |= MFC_STATIC; 1318 c->mfc_flags |= MFC_STATIC;
1191 write_unlock_bh(&mrt_lock); 1319 write_unlock_bh(&mrt_lock);
1320 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
1321 mrt->id);
1192 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1322 mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1193 return 0; 1323 return 0;
1194 } 1324 }
@@ -1238,6 +1368,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1238 ipmr_cache_resolve(net, mrt, uc, c); 1368 ipmr_cache_resolve(net, mrt, uc, c);
1239 ipmr_cache_free(uc); 1369 ipmr_cache_free(uc);
1240 } 1370 }
1371 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id);
1241 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1372 mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1242 return 0; 1373 return 0;
1243} 1374}
@@ -1245,6 +1376,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1245/* Close the multicast socket, and clear the vif tables etc */ 1376/* Close the multicast socket, and clear the vif tables etc */
1246static void mroute_clean_tables(struct mr_table *mrt, bool all) 1377static void mroute_clean_tables(struct mr_table *mrt, bool all)
1247{ 1378{
1379 struct net *net = read_pnet(&mrt->net);
1248 struct mfc_cache *c, *tmp; 1380 struct mfc_cache *c, *tmp;
1249 LIST_HEAD(list); 1381 LIST_HEAD(list);
1250 int i; 1382 int i;
@@ -1263,8 +1395,10 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
1263 continue; 1395 continue;
1264 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); 1396 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
1265 list_del_rcu(&c->list); 1397 list_del_rcu(&c->list);
1398 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
1399 mrt->id);
1266 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1400 mroute_netlink_event(mrt, c, RTM_DELROUTE);
1267 ipmr_cache_free(c); 1401 ipmr_cache_put(c);
1268 } 1402 }
1269 1403
1270 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1404 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
@@ -1393,6 +1527,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
1393 case MRT_ADD_MFC: 1527 case MRT_ADD_MFC:
1394 case MRT_DEL_MFC: 1528 case MRT_DEL_MFC:
1395 parent = -1; 1529 parent = -1;
1530 /* fall through */
1396 case MRT_ADD_MFC_PROXY: 1531 case MRT_ADD_MFC_PROXY:
1397 case MRT_DEL_MFC_PROXY: 1532 case MRT_DEL_MFC_PROXY:
1398 if (optlen != sizeof(mfc)) { 1533 if (optlen != sizeof(mfc)) {
@@ -1724,10 +1859,33 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
1724 return dst_output(net, sk, skb); 1859 return dst_output(net, sk, skb);
1725} 1860}
1726 1861
1862#ifdef CONFIG_NET_SWITCHDEV
1863static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
1864 int in_vifi, int out_vifi)
1865{
1866 struct vif_device *out_vif = &mrt->vif_table[out_vifi];
1867 struct vif_device *in_vif = &mrt->vif_table[in_vifi];
1868
1869 if (!skb->offload_mr_fwd_mark)
1870 return false;
1871 if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len)
1872 return false;
1873 return netdev_phys_item_id_same(&out_vif->dev_parent_id,
1874 &in_vif->dev_parent_id);
1875}
1876#else
1877static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
1878 int in_vifi, int out_vifi)
1879{
1880 return false;
1881}
1882#endif
1883
1727/* Processing handlers for ipmr_forward */ 1884/* Processing handlers for ipmr_forward */
1728 1885
1729static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, 1886static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1730 struct sk_buff *skb, struct mfc_cache *c, int vifi) 1887 int in_vifi, struct sk_buff *skb,
1888 struct mfc_cache *c, int vifi)
1731{ 1889{
1732 const struct iphdr *iph = ip_hdr(skb); 1890 const struct iphdr *iph = ip_hdr(skb);
1733 struct vif_device *vif = &mrt->vif_table[vifi]; 1891 struct vif_device *vif = &mrt->vif_table[vifi];
@@ -1748,6 +1906,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1748 goto out_free; 1906 goto out_free;
1749 } 1907 }
1750 1908
1909 if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi))
1910 goto out_free;
1911
1751 if (vif->flags & VIFF_TUNNEL) { 1912 if (vif->flags & VIFF_TUNNEL) {
1752 rt = ip_route_output_ports(net, &fl4, NULL, 1913 rt = ip_route_output_ports(net, &fl4, NULL,
1753 vif->remote, vif->local, 1914 vif->remote, vif->local,
@@ -1925,8 +2086,8 @@ forward:
1925 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2086 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1926 2087
1927 if (skb2) 2088 if (skb2)
1928 ipmr_queue_xmit(net, mrt, skb2, cache, 2089 ipmr_queue_xmit(net, mrt, true_vifi,
1929 psend); 2090 skb2, cache, psend);
1930 } 2091 }
1931 psend = ct; 2092 psend = ct;
1932 } 2093 }
@@ -1937,9 +2098,10 @@ last_forward:
1937 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2098 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1938 2099
1939 if (skb2) 2100 if (skb2)
1940 ipmr_queue_xmit(net, mrt, skb2, cache, psend); 2101 ipmr_queue_xmit(net, mrt, true_vifi, skb2,
2102 cache, psend);
1941 } else { 2103 } else {
1942 ipmr_queue_xmit(net, mrt, skb, cache, psend); 2104 ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend);
1943 return; 2105 return;
1944 } 2106 }
1945 } 2107 }
@@ -2156,6 +2318,9 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2156 nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0) 2318 nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
2157 return -EMSGSIZE; 2319 return -EMSGSIZE;
2158 2320
2321 if (c->mfc_flags & MFC_OFFLOAD)
2322 rtm->rtm_flags |= RTNH_F_OFFLOAD;
2323
2159 if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH))) 2324 if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
2160 return -EMSGSIZE; 2325 return -EMSGSIZE;
2161 2326
@@ -3048,14 +3213,87 @@ static const struct net_protocol pim_protocol = {
3048}; 3213};
3049#endif 3214#endif
3050 3215
3216static unsigned int ipmr_seq_read(struct net *net)
3217{
3218 ASSERT_RTNL();
3219
3220 return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net);
3221}
3222
3223static int ipmr_dump(struct net *net, struct notifier_block *nb)
3224{
3225 struct mr_table *mrt;
3226 int err;
3227
3228 err = ipmr_rules_dump(net, nb);
3229 if (err)
3230 return err;
3231
3232 ipmr_for_each_table(mrt, net) {
3233 struct vif_device *v = &mrt->vif_table[0];
3234 struct mfc_cache *mfc;
3235 int vifi;
3236
3237 /* Notifiy on table VIF entries */
3238 read_lock(&mrt_lock);
3239 for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
3240 if (!v->dev)
3241 continue;
3242
3243 call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD,
3244 v, vifi, mrt->id);
3245 }
3246 read_unlock(&mrt_lock);
3247
3248 /* Notify on table MFC entries */
3249 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
3250 call_ipmr_mfc_entry_notifier(nb, net,
3251 FIB_EVENT_ENTRY_ADD, mfc,
3252 mrt->id);
3253 }
3254
3255 return 0;
3256}
3257
3258static const struct fib_notifier_ops ipmr_notifier_ops_template = {
3259 .family = RTNL_FAMILY_IPMR,
3260 .fib_seq_read = ipmr_seq_read,
3261 .fib_dump = ipmr_dump,
3262 .owner = THIS_MODULE,
3263};
3264
3265static int __net_init ipmr_notifier_init(struct net *net)
3266{
3267 struct fib_notifier_ops *ops;
3268
3269 net->ipv4.ipmr_seq = 0;
3270
3271 ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net);
3272 if (IS_ERR(ops))
3273 return PTR_ERR(ops);
3274 net->ipv4.ipmr_notifier_ops = ops;
3275
3276 return 0;
3277}
3278
3279static void __net_exit ipmr_notifier_exit(struct net *net)
3280{
3281 fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops);
3282 net->ipv4.ipmr_notifier_ops = NULL;
3283}
3284
3051/* Setup for IP multicast routing */ 3285/* Setup for IP multicast routing */
3052static int __net_init ipmr_net_init(struct net *net) 3286static int __net_init ipmr_net_init(struct net *net)
3053{ 3287{
3054 int err; 3288 int err;
3055 3289
3290 err = ipmr_notifier_init(net);
3291 if (err)
3292 goto ipmr_notifier_fail;
3293
3056 err = ipmr_rules_init(net); 3294 err = ipmr_rules_init(net);
3057 if (err < 0) 3295 if (err < 0)
3058 goto fail; 3296 goto ipmr_rules_fail;
3059 3297
3060#ifdef CONFIG_PROC_FS 3298#ifdef CONFIG_PROC_FS
3061 err = -ENOMEM; 3299 err = -ENOMEM;
@@ -3072,7 +3310,9 @@ proc_cache_fail:
3072proc_vif_fail: 3310proc_vif_fail:
3073 ipmr_rules_exit(net); 3311 ipmr_rules_exit(net);
3074#endif 3312#endif
3075fail: 3313ipmr_rules_fail:
3314 ipmr_notifier_exit(net);
3315ipmr_notifier_fail:
3076 return err; 3316 return err;
3077} 3317}
3078 3318
@@ -3082,6 +3322,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
3082 remove_proc_entry("ip_mr_cache", net->proc_net); 3322 remove_proc_entry("ip_mr_cache", net->proc_net);
3083 remove_proc_entry("ip_mr_vif", net->proc_net); 3323 remove_proc_entry("ip_mr_vif", net->proc_net);
3084#endif 3324#endif
3325 ipmr_notifier_exit(net);
3085 ipmr_rules_exit(net); 3326 ipmr_rules_exit(net);
3086} 3327}
3087 3328
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index f462fee66ac8..adcdae358365 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the netfilter modules on top of IPv4. 3# Makefile for the netfilter modules on top of IPv4.
3# 4#
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 9e2770fd00be..f88221aebc9d 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -634,6 +634,25 @@ static void get_counters(const struct xt_table_info *t,
634 } 634 }
635} 635}
636 636
637static void get_old_counters(const struct xt_table_info *t,
638 struct xt_counters counters[])
639{
640 struct arpt_entry *iter;
641 unsigned int cpu, i;
642
643 for_each_possible_cpu(cpu) {
644 i = 0;
645 xt_entry_foreach(iter, t->entries, t->size) {
646 struct xt_counters *tmp;
647
648 tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
649 ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
650 ++i;
651 }
652 cond_resched();
653 }
654}
655
637static struct xt_counters *alloc_counters(const struct xt_table *table) 656static struct xt_counters *alloc_counters(const struct xt_table *table)
638{ 657{
639 unsigned int countersize; 658 unsigned int countersize;
@@ -910,8 +929,7 @@ static int __do_replace(struct net *net, const char *name,
910 (newinfo->number <= oldinfo->initial_entries)) 929 (newinfo->number <= oldinfo->initial_entries))
911 module_put(t->me); 930 module_put(t->me);
912 931
913 /* Get the old counters, and synchronize with replace */ 932 get_old_counters(oldinfo, counters);
914 get_counters(oldinfo, counters);
915 933
916 /* Decrease module usage counts and free resource */ 934 /* Decrease module usage counts and free resource */
917 loc_cpu_old_entry = oldinfo->entries; 935 loc_cpu_old_entry = oldinfo->entries;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 39286e543ee6..4cbe5e80f3bf 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -781,6 +781,26 @@ get_counters(const struct xt_table_info *t,
781 } 781 }
782} 782}
783 783
784static void get_old_counters(const struct xt_table_info *t,
785 struct xt_counters counters[])
786{
787 struct ipt_entry *iter;
788 unsigned int cpu, i;
789
790 for_each_possible_cpu(cpu) {
791 i = 0;
792 xt_entry_foreach(iter, t->entries, t->size) {
793 const struct xt_counters *tmp;
794
795 tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
796 ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
797 ++i; /* macro does multi eval of i */
798 }
799
800 cond_resched();
801 }
802}
803
784static struct xt_counters *alloc_counters(const struct xt_table *table) 804static struct xt_counters *alloc_counters(const struct xt_table *table)
785{ 805{
786 unsigned int countersize; 806 unsigned int countersize;
@@ -1070,8 +1090,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1070 (newinfo->number <= oldinfo->initial_entries)) 1090 (newinfo->number <= oldinfo->initial_entries))
1071 module_put(t->me); 1091 module_put(t->me);
1072 1092
1073 /* Get the old counters, and synchronize with replace */ 1093 get_old_counters(oldinfo, counters);
1074 get_counters(oldinfo, counters);
1075 1094
1076 /* Decrease module usage counts and free resource */ 1095 /* Decrease module usage counts and free resource */
1077 xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) 1096 xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 811689e523c3..f75fc6b53115 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -330,7 +330,8 @@ static unsigned int ipv4_synproxy_hook(void *priv,
330 if (synproxy == NULL) 330 if (synproxy == NULL)
331 return NF_ACCEPT; 331 return NF_ACCEPT;
332 332
333 if (nf_is_loopback_packet(skb)) 333 if (nf_is_loopback_packet(skb) ||
334 ip_hdr(skb)->protocol != IPPROTO_TCP)
334 return NF_ACCEPT; 335 return NF_ACCEPT;
335 336
336 thoff = ip_hdrlen(skb); 337 thoff = ip_hdrlen(skb);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index fe374da4bc13..89af9d88ca21 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -344,7 +344,7 @@ static void ipv4_hooks_unregister(struct net *net)
344 mutex_unlock(&register_ipv4_hooks); 344 mutex_unlock(&register_ipv4_hooks);
345} 345}
346 346
347struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { 347const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
348 .l3proto = PF_INET, 348 .l3proto = PF_INET,
349 .pkt_to_tuple = ipv4_pkt_to_tuple, 349 .pkt_to_tuple = ipv4_pkt_to_tuple,
350 .invert_tuple = ipv4_invert_tuple, 350 .invert_tuple = ipv4_invert_tuple,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index a046c298413a..1849fedd9b81 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -81,7 +81,6 @@ static int icmp_packet(struct nf_conn *ct,
81 const struct sk_buff *skb, 81 const struct sk_buff *skb,
82 unsigned int dataoff, 82 unsigned int dataoff,
83 enum ip_conntrack_info ctinfo, 83 enum ip_conntrack_info ctinfo,
84 u_int8_t pf,
85 unsigned int *timeout) 84 unsigned int *timeout)
86{ 85{
87 /* Do not immediately delete the connection after the first 86 /* Do not immediately delete the connection after the first
@@ -165,6 +164,12 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
165 return NF_ACCEPT; 164 return NF_ACCEPT;
166} 165}
167 166
167static void icmp_error_log(const struct sk_buff *skb, struct net *net,
168 u8 pf, const char *msg)
169{
170 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg);
171}
172
168/* Small and modified version of icmp_rcv */ 173/* Small and modified version of icmp_rcv */
169static int 174static int
170icmp_error(struct net *net, struct nf_conn *tmpl, 175icmp_error(struct net *net, struct nf_conn *tmpl,
@@ -177,18 +182,14 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
177 /* Not enough header? */ 182 /* Not enough header? */
178 icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); 183 icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
179 if (icmph == NULL) { 184 if (icmph == NULL) {
180 if (LOG_INVALID(net, IPPROTO_ICMP)) 185 icmp_error_log(skb, net, pf, "short packet");
181 nf_log_packet(net, PF_INET, 0, skb, NULL, NULL,
182 NULL, "nf_ct_icmp: short packet ");
183 return -NF_ACCEPT; 186 return -NF_ACCEPT;
184 } 187 }
185 188
186 /* See ip_conntrack_proto_tcp.c */ 189 /* See ip_conntrack_proto_tcp.c */
187 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 190 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
188 nf_ip_checksum(skb, hooknum, dataoff, 0)) { 191 nf_ip_checksum(skb, hooknum, dataoff, 0)) {
189 if (LOG_INVALID(net, IPPROTO_ICMP)) 192 icmp_error_log(skb, net, pf, "bad hw icmp checksum");
190 nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
191 "nf_ct_icmp: bad HW ICMP checksum ");
192 return -NF_ACCEPT; 193 return -NF_ACCEPT;
193 } 194 }
194 195
@@ -199,9 +200,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
199 * discarded. 200 * discarded.
200 */ 201 */
201 if (icmph->type > NR_ICMP_TYPES) { 202 if (icmph->type > NR_ICMP_TYPES) {
202 if (LOG_INVALID(net, IPPROTO_ICMP)) 203 icmp_error_log(skb, net, pf, "invalid icmp type");
203 nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
204 "nf_ct_icmp: invalid ICMP type ");
205 return -NF_ACCEPT; 204 return -NF_ACCEPT;
206 } 205 }
207 206
@@ -259,9 +258,14 @@ static int icmp_nlattr_to_tuple(struct nlattr *tb[],
259 return 0; 258 return 0;
260} 259}
261 260
262static int icmp_nlattr_tuple_size(void) 261static unsigned int icmp_nlattr_tuple_size(void)
263{ 262{
264 return nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1); 263 static unsigned int size __read_mostly;
264
265 if (!size)
266 size = nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
267
268 return size;
265} 269}
266#endif 270#endif
267 271
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index a0f37b208268..0443ca4120b0 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -276,7 +276,8 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
276 else 276 else
277 return NF_ACCEPT; 277 return NF_ACCEPT;
278 } 278 }
279 /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ 279 /* Only ICMPs can be IP_CT_IS_REPLY: */
280 /* fall through */
280 case IP_CT_NEW: 281 case IP_CT_NEW:
281 /* Seen it before? This can happen for loopback, retrans, 282 /* Seen it before? This can happen for loopback, retrans,
282 * or local packets. 283 * or local packets.
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index eeacbdaf7cdf..5cd06ba3535d 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -132,6 +132,8 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
132 if (ip_route_me_harder(net, nskb, RTN_UNSPEC)) 132 if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
133 goto free_nskb; 133 goto free_nskb;
134 134
135 niph = ip_hdr(nskb);
136
135 /* "Never happens" */ 137 /* "Never happens" */
136 if (nskb->len > dst_mtu(skb_dst(nskb))) 138 if (nskb->len > dst_mtu(skb_dst(nskb)))
137 goto free_nskb; 139 goto free_nskb;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 127153f1ed8a..9f37c4727861 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -212,7 +212,6 @@ static const struct snmp_mib snmp4_net_list[] = {
212 SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY), 212 SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY),
213 SNMP_MIB_ITEM("TCPSackRecovery", LINUX_MIB_TCPSACKRECOVERY), 213 SNMP_MIB_ITEM("TCPSackRecovery", LINUX_MIB_TCPSACKRECOVERY),
214 SNMP_MIB_ITEM("TCPSACKReneging", LINUX_MIB_TCPSACKRENEGING), 214 SNMP_MIB_ITEM("TCPSACKReneging", LINUX_MIB_TCPSACKRENEGING),
215 SNMP_MIB_ITEM("TCPFACKReorder", LINUX_MIB_TCPFACKREORDER),
216 SNMP_MIB_ITEM("TCPSACKReorder", LINUX_MIB_TCPSACKREORDER), 215 SNMP_MIB_ITEM("TCPSACKReorder", LINUX_MIB_TCPSACKREORDER),
217 SNMP_MIB_ITEM("TCPRenoReorder", LINUX_MIB_TCPRENOREORDER), 216 SNMP_MIB_ITEM("TCPRenoReorder", LINUX_MIB_TCPRENOREORDER),
218 SNMP_MIB_ITEM("TCPTSReorder", LINUX_MIB_TCPTSREORDER), 217 SNMP_MIB_ITEM("TCPTSReorder", LINUX_MIB_TCPTSREORDER),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 94d4cd2d5ea4..43b69af242e1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -495,7 +495,7 @@ u32 ip_idents_reserve(u32 hash, int segs)
495{ 495{
496 u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ; 496 u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
497 atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ; 497 atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
498 u32 old = ACCESS_ONCE(*p_tstamp); 498 u32 old = READ_ONCE(*p_tstamp);
499 u32 now = (u32)jiffies; 499 u32 now = (u32)jiffies;
500 u32 new, delta = 0; 500 u32 new, delta = 0;
501 501
@@ -651,9 +651,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
651 struct fnhe_hash_bucket *hash; 651 struct fnhe_hash_bucket *hash;
652 struct fib_nh_exception *fnhe; 652 struct fib_nh_exception *fnhe;
653 struct rtable *rt; 653 struct rtable *rt;
654 u32 genid, hval;
654 unsigned int i; 655 unsigned int i;
655 int depth; 656 int depth;
656 u32 hval = fnhe_hashfun(daddr); 657
658 genid = fnhe_genid(dev_net(nh->nh_dev));
659 hval = fnhe_hashfun(daddr);
657 660
658 spin_lock_bh(&fnhe_lock); 661 spin_lock_bh(&fnhe_lock);
659 662
@@ -676,12 +679,13 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
676 } 679 }
677 680
678 if (fnhe) { 681 if (fnhe) {
682 if (fnhe->fnhe_genid != genid)
683 fnhe->fnhe_genid = genid;
679 if (gw) 684 if (gw)
680 fnhe->fnhe_gw = gw; 685 fnhe->fnhe_gw = gw;
681 if (pmtu) { 686 if (pmtu)
682 fnhe->fnhe_pmtu = pmtu; 687 fnhe->fnhe_pmtu = pmtu;
683 fnhe->fnhe_expires = max(1UL, expires); 688 fnhe->fnhe_expires = max(1UL, expires);
684 }
685 /* Update all cached dsts too */ 689 /* Update all cached dsts too */
686 rt = rcu_dereference(fnhe->fnhe_rth_input); 690 rt = rcu_dereference(fnhe->fnhe_rth_input);
687 if (rt) 691 if (rt)
@@ -700,7 +704,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
700 fnhe->fnhe_next = hash->chain; 704 fnhe->fnhe_next = hash->chain;
701 rcu_assign_pointer(hash->chain, fnhe); 705 rcu_assign_pointer(hash->chain, fnhe);
702 } 706 }
703 fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev)); 707 fnhe->fnhe_genid = genid;
704 fnhe->fnhe_daddr = daddr; 708 fnhe->fnhe_daddr = daddr;
705 fnhe->fnhe_gw = gw; 709 fnhe->fnhe_gw = gw;
706 fnhe->fnhe_pmtu = pmtu; 710 fnhe->fnhe_pmtu = pmtu;
@@ -1250,7 +1254,7 @@ static void set_class_tag(struct rtable *rt, u32 tag)
1250static unsigned int ipv4_default_advmss(const struct dst_entry *dst) 1254static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1251{ 1255{
1252 unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr); 1256 unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
1253 unsigned int advmss = max_t(unsigned int, dst->dev->mtu - header_size, 1257 unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
1254 ip_rt_min_advmss); 1258 ip_rt_min_advmss);
1255 1259
1256 return min(advmss, IPV4_MAX_PMTU - header_size); 1260 return min(advmss, IPV4_MAX_PMTU - header_size);
@@ -1520,43 +1524,56 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
1520EXPORT_SYMBOL(rt_dst_alloc); 1524EXPORT_SYMBOL(rt_dst_alloc);
1521 1525
1522/* called in rcu_read_lock() section */ 1526/* called in rcu_read_lock() section */
1523static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, 1527int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1524 u8 tos, struct net_device *dev, int our) 1528 u8 tos, struct net_device *dev,
1529 struct in_device *in_dev, u32 *itag)
1525{ 1530{
1526 struct rtable *rth;
1527 struct in_device *in_dev = __in_dev_get_rcu(dev);
1528 unsigned int flags = RTCF_MULTICAST;
1529 u32 itag = 0;
1530 int err; 1531 int err;
1531 1532
1532 /* Primary sanity checks. */ 1533 /* Primary sanity checks. */
1533
1534 if (!in_dev) 1534 if (!in_dev)
1535 return -EINVAL; 1535 return -EINVAL;
1536 1536
1537 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || 1537 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
1538 skb->protocol != htons(ETH_P_IP)) 1538 skb->protocol != htons(ETH_P_IP))
1539 goto e_inval; 1539 return -EINVAL;
1540 1540
1541 if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev)) 1541 if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
1542 goto e_inval; 1542 return -EINVAL;
1543 1543
1544 if (ipv4_is_zeronet(saddr)) { 1544 if (ipv4_is_zeronet(saddr)) {
1545 if (!ipv4_is_local_multicast(daddr)) 1545 if (!ipv4_is_local_multicast(daddr))
1546 goto e_inval; 1546 return -EINVAL;
1547 } else { 1547 } else {
1548 err = fib_validate_source(skb, saddr, 0, tos, 0, dev, 1548 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1549 in_dev, &itag); 1549 in_dev, itag);
1550 if (err < 0) 1550 if (err < 0)
1551 goto e_err; 1551 return err;
1552 } 1552 }
1553 return 0;
1554}
1555
1556/* called in rcu_read_lock() section */
1557static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1558 u8 tos, struct net_device *dev, int our)
1559{
1560 struct in_device *in_dev = __in_dev_get_rcu(dev);
1561 unsigned int flags = RTCF_MULTICAST;
1562 struct rtable *rth;
1563 u32 itag = 0;
1564 int err;
1565
1566 err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
1567 if (err)
1568 return err;
1569
1553 if (our) 1570 if (our)
1554 flags |= RTCF_LOCAL; 1571 flags |= RTCF_LOCAL;
1555 1572
1556 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, 1573 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
1557 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); 1574 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
1558 if (!rth) 1575 if (!rth)
1559 goto e_nobufs; 1576 return -ENOBUFS;
1560 1577
1561#ifdef CONFIG_IP_ROUTE_CLASSID 1578#ifdef CONFIG_IP_ROUTE_CLASSID
1562 rth->dst.tclassid = itag; 1579 rth->dst.tclassid = itag;
@@ -1572,13 +1589,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1572 1589
1573 skb_dst_set(skb, &rth->dst); 1590 skb_dst_set(skb, &rth->dst);
1574 return 0; 1591 return 0;
1575
1576e_nobufs:
1577 return -ENOBUFS;
1578e_inval:
1579 return -EINVAL;
1580e_err:
1581 return err;
1582} 1592}
1583 1593
1584 1594
@@ -2507,7 +2517,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
2507 struct rtable *ort = (struct rtable *) dst_orig; 2517 struct rtable *ort = (struct rtable *) dst_orig;
2508 struct rtable *rt; 2518 struct rtable *rt;
2509 2519
2510 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0); 2520 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);
2511 if (rt) { 2521 if (rt) {
2512 struct dst_entry *new = &rt->dst; 2522 struct dst_entry *new = &rt->dst;
2513 2523
@@ -3032,7 +3042,6 @@ struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
3032 3042
3033int __init ip_rt_init(void) 3043int __init ip_rt_init(void)
3034{ 3044{
3035 int rc = 0;
3036 int cpu; 3045 int cpu;
3037 3046
3038 ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL); 3047 ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
@@ -3089,7 +3098,7 @@ int __init ip_rt_init(void)
3089#endif 3098#endif
3090 register_pernet_subsys(&rt_genid_ops); 3099 register_pernet_subsys(&rt_genid_ops);
3091 register_pernet_subsys(&ipv4_inetpeer_ops); 3100 register_pernet_subsys(&ipv4_inetpeer_ops);
3092 return rc; 3101 return 0;
3093} 3102}
3094 3103
3095#ifdef CONFIG_SYSCTL 3104#ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index b1bb1b3a1082..fda37f2862c9 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -355,7 +355,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
355 /* We throwed the options of the initial SYN away, so we hope 355 /* We throwed the options of the initial SYN away, so we hope
356 * the ACK carries the same options again (see RFC1122 4.2.3.8) 356 * the ACK carries the same options again (see RFC1122 4.2.3.8)
357 */ 357 */
358 ireq->opt = tcp_v4_save_options(sock_net(sk), skb); 358 RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(sock_net(sk), skb));
359 359
360 if (security_inet_conn_request(sk, skb, req)) { 360 if (security_inet_conn_request(sk, skb, req)) {
361 reqsk_free(req); 361 reqsk_free(req);
@@ -385,7 +385,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
385 /* Try to redo what tcp_v4_send_synack did. */ 385 /* Try to redo what tcp_v4_send_synack did. */
386 req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); 386 req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
387 387
388 tcp_select_initial_window(tcp_full_space(sk), req->mss, 388 tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
389 &req->rsk_rcv_wnd, &req->rsk_window_clamp, 389 &req->rsk_rcv_wnd, &req->rsk_window_clamp,
390 ireq->wscale_ok, &rcv_wscale, 390 ireq->wscale_ok, &rcv_wscale,
391 dst_metric(&rt->dst, RTAX_INITRWND)); 391 dst_metric(&rt->dst, RTAX_INITRWND));
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 0d3c038d7b04..93e172118a94 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem. 3 * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
3 * 4 *
@@ -25,6 +26,7 @@
25#include <net/inet_frag.h> 26#include <net/inet_frag.h>
26#include <net/ping.h> 27#include <net/ping.h>
27#include <net/protocol.h> 28#include <net/protocol.h>
29#include <net/netevent.h>
28 30
29static int zero; 31static int zero;
30static int one = 1; 32static int one = 1;
@@ -199,6 +201,8 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
199static int proc_tcp_congestion_control(struct ctl_table *ctl, int write, 201static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
200 void __user *buffer, size_t *lenp, loff_t *ppos) 202 void __user *buffer, size_t *lenp, loff_t *ppos)
201{ 203{
204 struct net *net = container_of(ctl->data, struct net,
205 ipv4.tcp_congestion_control);
202 char val[TCP_CA_NAME_MAX]; 206 char val[TCP_CA_NAME_MAX];
203 struct ctl_table tbl = { 207 struct ctl_table tbl = {
204 .data = val, 208 .data = val,
@@ -206,11 +210,11 @@ static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
206 }; 210 };
207 int ret; 211 int ret;
208 212
209 tcp_get_default_congestion_control(val); 213 tcp_get_default_congestion_control(net, val);
210 214
211 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 215 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
212 if (write && ret == 0) 216 if (write && ret == 0)
213 ret = tcp_set_default_congestion_control(val); 217 ret = tcp_set_default_congestion_control(net, val);
214 return ret; 218 return ret;
215} 219}
216 220
@@ -251,10 +255,12 @@ static int proc_allowed_congestion_control(struct ctl_table *ctl,
251 return ret; 255 return ret;
252} 256}
253 257
254static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, 258static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
255 void __user *buffer, size_t *lenp, 259 void __user *buffer, size_t *lenp,
256 loff_t *ppos) 260 loff_t *ppos)
257{ 261{
262 struct net *net = container_of(table->data, struct net,
263 ipv4.sysctl_tcp_fastopen);
258 struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; 264 struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
259 struct tcp_fastopen_context *ctxt; 265 struct tcp_fastopen_context *ctxt;
260 int ret; 266 int ret;
@@ -265,7 +271,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
265 return -ENOMEM; 271 return -ENOMEM;
266 272
267 rcu_read_lock(); 273 rcu_read_lock();
268 ctxt = rcu_dereference(tcp_fastopen_ctx); 274 ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
269 if (ctxt) 275 if (ctxt)
270 memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); 276 memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
271 else 277 else
@@ -282,12 +288,8 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
282 ret = -EINVAL; 288 ret = -EINVAL;
283 goto bad_key; 289 goto bad_key;
284 } 290 }
285 /* Generate a dummy secret but don't publish it. This 291 tcp_fastopen_reset_cipher(net, NULL, user_key,
286 * is needed so we don't regenerate a new key on the 292 TCP_FASTOPEN_KEY_LENGTH);
287 * first invocation of tcp_fastopen_cookie_gen
288 */
289 tcp_fastopen_init_key_once(false);
290 tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
291 } 293 }
292 294
293bad_key: 295bad_key:
@@ -358,11 +360,13 @@ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
358 void __user *buffer, 360 void __user *buffer,
359 size_t *lenp, loff_t *ppos) 361 size_t *lenp, loff_t *ppos)
360{ 362{
363 struct net *net = container_of(table->data, struct net,
364 ipv4.sysctl_tcp_fastopen_blackhole_timeout);
361 int ret; 365 int ret;
362 366
363 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 367 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
364 if (write && ret == 0) 368 if (write && ret == 0)
365 tcp_fastopen_active_timeout_reset(); 369 atomic_set(&net->ipv4.tfo_active_disable_times, 0);
366 370
367 return ret; 371 return ret;
368} 372}
@@ -385,15 +389,25 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
385 return ret; 389 return ret;
386} 390}
387 391
392#ifdef CONFIG_IP_ROUTE_MULTIPATH
393static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
394 void __user *buffer, size_t *lenp,
395 loff_t *ppos)
396{
397 struct net *net = container_of(table->data, struct net,
398 ipv4.sysctl_fib_multipath_hash_policy);
399 int ret;
400
401 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
402 if (write && ret == 0)
403 call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net);
404
405 return ret;
406}
407#endif
408
388static struct ctl_table ipv4_table[] = { 409static struct ctl_table ipv4_table[] = {
389 { 410 {
390 .procname = "tcp_retrans_collapse",
391 .data = &sysctl_tcp_retrans_collapse,
392 .maxlen = sizeof(int),
393 .mode = 0644,
394 .proc_handler = proc_dointvec
395 },
396 {
397 .procname = "tcp_max_orphans", 411 .procname = "tcp_max_orphans",
398 .data = &sysctl_tcp_max_orphans, 412 .data = &sysctl_tcp_max_orphans,
399 .maxlen = sizeof(int), 413 .maxlen = sizeof(int),
@@ -401,48 +415,6 @@ static struct ctl_table ipv4_table[] = {
401 .proc_handler = proc_dointvec 415 .proc_handler = proc_dointvec
402 }, 416 },
403 { 417 {
404 .procname = "tcp_fastopen",
405 .data = &sysctl_tcp_fastopen,
406 .maxlen = sizeof(int),
407 .mode = 0644,
408 .proc_handler = proc_dointvec,
409 },
410 {
411 .procname = "tcp_fastopen_key",
412 .mode = 0600,
413 .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
414 .proc_handler = proc_tcp_fastopen_key,
415 },
416 {
417 .procname = "tcp_fastopen_blackhole_timeout_sec",
418 .data = &sysctl_tcp_fastopen_blackhole_timeout,
419 .maxlen = sizeof(int),
420 .mode = 0644,
421 .proc_handler = proc_tfo_blackhole_detect_timeout,
422 .extra1 = &zero,
423 },
424 {
425 .procname = "tcp_abort_on_overflow",
426 .data = &sysctl_tcp_abort_on_overflow,
427 .maxlen = sizeof(int),
428 .mode = 0644,
429 .proc_handler = proc_dointvec
430 },
431 {
432 .procname = "tcp_stdurg",
433 .data = &sysctl_tcp_stdurg,
434 .maxlen = sizeof(int),
435 .mode = 0644,
436 .proc_handler = proc_dointvec
437 },
438 {
439 .procname = "tcp_rfc1337",
440 .data = &sysctl_tcp_rfc1337,
441 .maxlen = sizeof(int),
442 .mode = 0644,
443 .proc_handler = proc_dointvec
444 },
445 {
446 .procname = "inet_peer_threshold", 418 .procname = "inet_peer_threshold",
447 .data = &inet_peer_threshold, 419 .data = &inet_peer_threshold,
448 .maxlen = sizeof(int), 420 .maxlen = sizeof(int),
@@ -464,34 +436,6 @@ static struct ctl_table ipv4_table[] = {
464 .proc_handler = proc_dointvec_jiffies, 436 .proc_handler = proc_dointvec_jiffies,
465 }, 437 },
466 { 438 {
467 .procname = "tcp_fack",
468 .data = &sysctl_tcp_fack,
469 .maxlen = sizeof(int),
470 .mode = 0644,
471 .proc_handler = proc_dointvec
472 },
473 {
474 .procname = "tcp_recovery",
475 .data = &sysctl_tcp_recovery,
476 .maxlen = sizeof(int),
477 .mode = 0644,
478 .proc_handler = proc_dointvec,
479 },
480 {
481 .procname = "tcp_max_reordering",
482 .data = &sysctl_tcp_max_reordering,
483 .maxlen = sizeof(int),
484 .mode = 0644,
485 .proc_handler = proc_dointvec
486 },
487 {
488 .procname = "tcp_dsack",
489 .data = &sysctl_tcp_dsack,
490 .maxlen = sizeof(int),
491 .mode = 0644,
492 .proc_handler = proc_dointvec
493 },
494 {
495 .procname = "tcp_mem", 439 .procname = "tcp_mem",
496 .maxlen = sizeof(sysctl_tcp_mem), 440 .maxlen = sizeof(sysctl_tcp_mem),
497 .data = &sysctl_tcp_mem, 441 .data = &sysctl_tcp_mem,
@@ -499,113 +443,12 @@ static struct ctl_table ipv4_table[] = {
499 .proc_handler = proc_doulongvec_minmax, 443 .proc_handler = proc_doulongvec_minmax,
500 }, 444 },
501 { 445 {
502 .procname = "tcp_wmem",
503 .data = &sysctl_tcp_wmem,
504 .maxlen = sizeof(sysctl_tcp_wmem),
505 .mode = 0644,
506 .proc_handler = proc_dointvec_minmax,
507 .extra1 = &one,
508 },
509 {
510 .procname = "tcp_rmem",
511 .data = &sysctl_tcp_rmem,
512 .maxlen = sizeof(sysctl_tcp_rmem),
513 .mode = 0644,
514 .proc_handler = proc_dointvec_minmax,
515 .extra1 = &one,
516 },
517 {
518 .procname = "tcp_app_win",
519 .data = &sysctl_tcp_app_win,
520 .maxlen = sizeof(int),
521 .mode = 0644,
522 .proc_handler = proc_dointvec
523 },
524 {
525 .procname = "tcp_adv_win_scale",
526 .data = &sysctl_tcp_adv_win_scale,
527 .maxlen = sizeof(int),
528 .mode = 0644,
529 .proc_handler = proc_dointvec_minmax,
530 .extra1 = &tcp_adv_win_scale_min,
531 .extra2 = &tcp_adv_win_scale_max,
532 },
533 {
534 .procname = "tcp_frto",
535 .data = &sysctl_tcp_frto,
536 .maxlen = sizeof(int),
537 .mode = 0644,
538 .proc_handler = proc_dointvec
539 },
540 {
541 .procname = "tcp_min_rtt_wlen",
542 .data = &sysctl_tcp_min_rtt_wlen,
543 .maxlen = sizeof(int),
544 .mode = 0644,
545 .proc_handler = proc_dointvec
546 },
547 {
548 .procname = "tcp_low_latency", 446 .procname = "tcp_low_latency",
549 .data = &sysctl_tcp_low_latency, 447 .data = &sysctl_tcp_low_latency,
550 .maxlen = sizeof(int), 448 .maxlen = sizeof(int),
551 .mode = 0644, 449 .mode = 0644,
552 .proc_handler = proc_dointvec 450 .proc_handler = proc_dointvec
553 }, 451 },
554 {
555 .procname = "tcp_no_metrics_save",
556 .data = &sysctl_tcp_nometrics_save,
557 .maxlen = sizeof(int),
558 .mode = 0644,
559 .proc_handler = proc_dointvec,
560 },
561 {
562 .procname = "tcp_moderate_rcvbuf",
563 .data = &sysctl_tcp_moderate_rcvbuf,
564 .maxlen = sizeof(int),
565 .mode = 0644,
566 .proc_handler = proc_dointvec,
567 },
568 {
569 .procname = "tcp_tso_win_divisor",
570 .data = &sysctl_tcp_tso_win_divisor,
571 .maxlen = sizeof(int),
572 .mode = 0644,
573 .proc_handler = proc_dointvec,
574 },
575 {
576 .procname = "tcp_congestion_control",
577 .mode = 0644,
578 .maxlen = TCP_CA_NAME_MAX,
579 .proc_handler = proc_tcp_congestion_control,
580 },
581 {
582 .procname = "tcp_workaround_signed_windows",
583 .data = &sysctl_tcp_workaround_signed_windows,
584 .maxlen = sizeof(int),
585 .mode = 0644,
586 .proc_handler = proc_dointvec
587 },
588 {
589 .procname = "tcp_limit_output_bytes",
590 .data = &sysctl_tcp_limit_output_bytes,
591 .maxlen = sizeof(int),
592 .mode = 0644,
593 .proc_handler = proc_dointvec
594 },
595 {
596 .procname = "tcp_challenge_ack_limit",
597 .data = &sysctl_tcp_challenge_ack_limit,
598 .maxlen = sizeof(int),
599 .mode = 0644,
600 .proc_handler = proc_dointvec
601 },
602 {
603 .procname = "tcp_slow_start_after_idle",
604 .data = &sysctl_tcp_slow_start_after_idle,
605 .maxlen = sizeof(int),
606 .mode = 0644,
607 .proc_handler = proc_dointvec
608 },
609#ifdef CONFIG_NETLABEL 452#ifdef CONFIG_NETLABEL
610 { 453 {
611 .procname = "cipso_cache_enable", 454 .procname = "cipso_cache_enable",
@@ -649,65 +492,6 @@ static struct ctl_table ipv4_table[] = {
649 .proc_handler = proc_allowed_congestion_control, 492 .proc_handler = proc_allowed_congestion_control,
650 }, 493 },
651 { 494 {
652 .procname = "tcp_thin_linear_timeouts",
653 .data = &sysctl_tcp_thin_linear_timeouts,
654 .maxlen = sizeof(int),
655 .mode = 0644,
656 .proc_handler = proc_dointvec
657 },
658 {
659 .procname = "tcp_early_retrans",
660 .data = &sysctl_tcp_early_retrans,
661 .maxlen = sizeof(int),
662 .mode = 0644,
663 .proc_handler = proc_dointvec_minmax,
664 .extra1 = &zero,
665 .extra2 = &four,
666 },
667 {
668 .procname = "tcp_min_tso_segs",
669 .data = &sysctl_tcp_min_tso_segs,
670 .maxlen = sizeof(int),
671 .mode = 0644,
672 .proc_handler = proc_dointvec_minmax,
673 .extra1 = &one,
674 .extra2 = &gso_max_segs,
675 },
676 {
677 .procname = "tcp_pacing_ss_ratio",
678 .data = &sysctl_tcp_pacing_ss_ratio,
679 .maxlen = sizeof(int),
680 .mode = 0644,
681 .proc_handler = proc_dointvec_minmax,
682 .extra1 = &zero,
683 .extra2 = &thousand,
684 },
685 {
686 .procname = "tcp_pacing_ca_ratio",
687 .data = &sysctl_tcp_pacing_ca_ratio,
688 .maxlen = sizeof(int),
689 .mode = 0644,
690 .proc_handler = proc_dointvec_minmax,
691 .extra1 = &zero,
692 .extra2 = &thousand,
693 },
694 {
695 .procname = "tcp_autocorking",
696 .data = &sysctl_tcp_autocorking,
697 .maxlen = sizeof(int),
698 .mode = 0644,
699 .proc_handler = proc_dointvec_minmax,
700 .extra1 = &zero,
701 .extra2 = &one,
702 },
703 {
704 .procname = "tcp_invalid_ratelimit",
705 .data = &sysctl_tcp_invalid_ratelimit,
706 .maxlen = sizeof(int),
707 .mode = 0644,
708 .proc_handler = proc_dointvec_ms_jiffies,
709 },
710 {
711 .procname = "tcp_available_ulp", 495 .procname = "tcp_available_ulp",
712 .maxlen = TCP_ULP_BUF_MAX, 496 .maxlen = TCP_ULP_BUF_MAX,
713 .mode = 0444, 497 .mode = 0444,
@@ -976,6 +760,13 @@ static struct ctl_table ipv4_net_table[] = {
976 }, 760 },
977#endif 761#endif
978 { 762 {
763 .procname = "tcp_congestion_control",
764 .data = &init_net.ipv4.tcp_congestion_control,
765 .mode = 0644,
766 .maxlen = TCP_CA_NAME_MAX,
767 .proc_handler = proc_tcp_congestion_control,
768 },
769 {
979 .procname = "tcp_keepalive_time", 770 .procname = "tcp_keepalive_time",
980 .data = &init_net.ipv4.sysctl_tcp_keepalive_time, 771 .data = &init_net.ipv4.sysctl_tcp_keepalive_time,
981 .maxlen = sizeof(int), 772 .maxlen = sizeof(int),
@@ -1085,6 +876,28 @@ static struct ctl_table ipv4_net_table[] = {
1085 .mode = 0644, 876 .mode = 0644,
1086 .proc_handler = proc_dointvec 877 .proc_handler = proc_dointvec
1087 }, 878 },
879 {
880 .procname = "tcp_fastopen",
881 .data = &init_net.ipv4.sysctl_tcp_fastopen,
882 .maxlen = sizeof(int),
883 .mode = 0644,
884 .proc_handler = proc_dointvec,
885 },
886 {
887 .procname = "tcp_fastopen_key",
888 .mode = 0600,
889 .data = &init_net.ipv4.sysctl_tcp_fastopen,
890 .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
891 .proc_handler = proc_tcp_fastopen_key,
892 },
893 {
894 .procname = "tcp_fastopen_blackhole_timeout_sec",
895 .data = &init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout,
896 .maxlen = sizeof(int),
897 .mode = 0644,
898 .proc_handler = proc_tfo_blackhole_detect_timeout,
899 .extra1 = &zero,
900 },
1088#ifdef CONFIG_IP_ROUTE_MULTIPATH 901#ifdef CONFIG_IP_ROUTE_MULTIPATH
1089 { 902 {
1090 .procname = "fib_multipath_use_neigh", 903 .procname = "fib_multipath_use_neigh",
@@ -1100,7 +913,7 @@ static struct ctl_table ipv4_net_table[] = {
1100 .data = &init_net.ipv4.sysctl_fib_multipath_hash_policy, 913 .data = &init_net.ipv4.sysctl_fib_multipath_hash_policy,
1101 .maxlen = sizeof(int), 914 .maxlen = sizeof(int),
1102 .mode = 0644, 915 .mode = 0644,
1103 .proc_handler = proc_dointvec_minmax, 916 .proc_handler = proc_fib_multipath_hash_policy,
1104 .extra1 = &zero, 917 .extra1 = &zero,
1105 .extra2 = &one, 918 .extra2 = &one,
1106 }, 919 },
@@ -1144,6 +957,216 @@ static struct ctl_table ipv4_net_table[] = {
1144 .mode = 0644, 957 .mode = 0644,
1145 .proc_handler = proc_dointvec 958 .proc_handler = proc_dointvec
1146 }, 959 },
960 {
961 .procname = "tcp_early_retrans",
962 .data = &init_net.ipv4.sysctl_tcp_early_retrans,
963 .maxlen = sizeof(int),
964 .mode = 0644,
965 .proc_handler = proc_dointvec_minmax,
966 .extra1 = &zero,
967 .extra2 = &four,
968 },
969 {
970 .procname = "tcp_recovery",
971 .data = &init_net.ipv4.sysctl_tcp_recovery,
972 .maxlen = sizeof(int),
973 .mode = 0644,
974 .proc_handler = proc_dointvec,
975 },
976 {
977 .procname = "tcp_thin_linear_timeouts",
978 .data = &init_net.ipv4.sysctl_tcp_thin_linear_timeouts,
979 .maxlen = sizeof(int),
980 .mode = 0644,
981 .proc_handler = proc_dointvec
982 },
983 {
984 .procname = "tcp_slow_start_after_idle",
985 .data = &init_net.ipv4.sysctl_tcp_slow_start_after_idle,
986 .maxlen = sizeof(int),
987 .mode = 0644,
988 .proc_handler = proc_dointvec
989 },
990 {
991 .procname = "tcp_retrans_collapse",
992 .data = &init_net.ipv4.sysctl_tcp_retrans_collapse,
993 .maxlen = sizeof(int),
994 .mode = 0644,
995 .proc_handler = proc_dointvec
996 },
997 {
998 .procname = "tcp_stdurg",
999 .data = &init_net.ipv4.sysctl_tcp_stdurg,
1000 .maxlen = sizeof(int),
1001 .mode = 0644,
1002 .proc_handler = proc_dointvec
1003 },
1004 {
1005 .procname = "tcp_rfc1337",
1006 .data = &init_net.ipv4.sysctl_tcp_rfc1337,
1007 .maxlen = sizeof(int),
1008 .mode = 0644,
1009 .proc_handler = proc_dointvec
1010 },
1011 {
1012 .procname = "tcp_abort_on_overflow",
1013 .data = &init_net.ipv4.sysctl_tcp_abort_on_overflow,
1014 .maxlen = sizeof(int),
1015 .mode = 0644,
1016 .proc_handler = proc_dointvec
1017 },
1018 {
1019 .procname = "tcp_fack",
1020 .data = &init_net.ipv4.sysctl_tcp_fack,
1021 .maxlen = sizeof(int),
1022 .mode = 0644,
1023 .proc_handler = proc_dointvec
1024 },
1025 {
1026 .procname = "tcp_max_reordering",
1027 .data = &init_net.ipv4.sysctl_tcp_max_reordering,
1028 .maxlen = sizeof(int),
1029 .mode = 0644,
1030 .proc_handler = proc_dointvec
1031 },
1032 {
1033 .procname = "tcp_dsack",
1034 .data = &init_net.ipv4.sysctl_tcp_dsack,
1035 .maxlen = sizeof(int),
1036 .mode = 0644,
1037 .proc_handler = proc_dointvec
1038 },
1039 {
1040 .procname = "tcp_app_win",
1041 .data = &init_net.ipv4.sysctl_tcp_app_win,
1042 .maxlen = sizeof(int),
1043 .mode = 0644,
1044 .proc_handler = proc_dointvec
1045 },
1046 {
1047 .procname = "tcp_adv_win_scale",
1048 .data = &init_net.ipv4.sysctl_tcp_adv_win_scale,
1049 .maxlen = sizeof(int),
1050 .mode = 0644,
1051 .proc_handler = proc_dointvec_minmax,
1052 .extra1 = &tcp_adv_win_scale_min,
1053 .extra2 = &tcp_adv_win_scale_max,
1054 },
1055 {
1056 .procname = "tcp_frto",
1057 .data = &init_net.ipv4.sysctl_tcp_frto,
1058 .maxlen = sizeof(int),
1059 .mode = 0644,
1060 .proc_handler = proc_dointvec
1061 },
1062 {
1063 .procname = "tcp_no_metrics_save",
1064 .data = &init_net.ipv4.sysctl_tcp_nometrics_save,
1065 .maxlen = sizeof(int),
1066 .mode = 0644,
1067 .proc_handler = proc_dointvec,
1068 },
1069 {
1070 .procname = "tcp_moderate_rcvbuf",
1071 .data = &init_net.ipv4.sysctl_tcp_moderate_rcvbuf,
1072 .maxlen = sizeof(int),
1073 .mode = 0644,
1074 .proc_handler = proc_dointvec,
1075 },
1076 {
1077 .procname = "tcp_tso_win_divisor",
1078 .data = &init_net.ipv4.sysctl_tcp_tso_win_divisor,
1079 .maxlen = sizeof(int),
1080 .mode = 0644,
1081 .proc_handler = proc_dointvec,
1082 },
1083 {
1084 .procname = "tcp_workaround_signed_windows",
1085 .data = &init_net.ipv4.sysctl_tcp_workaround_signed_windows,
1086 .maxlen = sizeof(int),
1087 .mode = 0644,
1088 .proc_handler = proc_dointvec
1089 },
1090 {
1091 .procname = "tcp_limit_output_bytes",
1092 .data = &init_net.ipv4.sysctl_tcp_limit_output_bytes,
1093 .maxlen = sizeof(int),
1094 .mode = 0644,
1095 .proc_handler = proc_dointvec
1096 },
1097 {
1098 .procname = "tcp_challenge_ack_limit",
1099 .data = &init_net.ipv4.sysctl_tcp_challenge_ack_limit,
1100 .maxlen = sizeof(int),
1101 .mode = 0644,
1102 .proc_handler = proc_dointvec
1103 },
1104 {
1105 .procname = "tcp_min_tso_segs",
1106 .data = &init_net.ipv4.sysctl_tcp_min_tso_segs,
1107 .maxlen = sizeof(int),
1108 .mode = 0644,
1109 .proc_handler = proc_dointvec_minmax,
1110 .extra1 = &one,
1111 .extra2 = &gso_max_segs,
1112 },
1113 {
1114 .procname = "tcp_min_rtt_wlen",
1115 .data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen,
1116 .maxlen = sizeof(int),
1117 .mode = 0644,
1118 .proc_handler = proc_dointvec
1119 },
1120 {
1121 .procname = "tcp_autocorking",
1122 .data = &init_net.ipv4.sysctl_tcp_autocorking,
1123 .maxlen = sizeof(int),
1124 .mode = 0644,
1125 .proc_handler = proc_dointvec_minmax,
1126 .extra1 = &zero,
1127 .extra2 = &one,
1128 },
1129 {
1130 .procname = "tcp_invalid_ratelimit",
1131 .data = &init_net.ipv4.sysctl_tcp_invalid_ratelimit,
1132 .maxlen = sizeof(int),
1133 .mode = 0644,
1134 .proc_handler = proc_dointvec_ms_jiffies,
1135 },
1136 {
1137 .procname = "tcp_pacing_ss_ratio",
1138 .data = &init_net.ipv4.sysctl_tcp_pacing_ss_ratio,
1139 .maxlen = sizeof(int),
1140 .mode = 0644,
1141 .proc_handler = proc_dointvec_minmax,
1142 .extra1 = &zero,
1143 .extra2 = &thousand,
1144 },
1145 {
1146 .procname = "tcp_pacing_ca_ratio",
1147 .data = &init_net.ipv4.sysctl_tcp_pacing_ca_ratio,
1148 .maxlen = sizeof(int),
1149 .mode = 0644,
1150 .proc_handler = proc_dointvec_minmax,
1151 .extra1 = &zero,
1152 .extra2 = &thousand,
1153 },
1154 {
1155 .procname = "tcp_wmem",
1156 .data = &init_net.ipv4.sysctl_tcp_wmem,
1157 .maxlen = sizeof(init_net.ipv4.sysctl_tcp_wmem),
1158 .mode = 0644,
1159 .proc_handler = proc_dointvec_minmax,
1160 .extra1 = &one,
1161 },
1162 {
1163 .procname = "tcp_rmem",
1164 .data = &init_net.ipv4.sysctl_tcp_rmem,
1165 .maxlen = sizeof(init_net.ipv4.sysctl_tcp_rmem),
1166 .mode = 0644,
1167 .proc_handler = proc_dointvec_minmax,
1168 .extra1 = &one,
1169 },
1147 { } 1170 { }
1148}; 1171};
1149 1172
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5091402720ab..bf97317e6c97 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -270,6 +270,7 @@
270#include <linux/time.h> 270#include <linux/time.h>
271#include <linux/slab.h> 271#include <linux/slab.h>
272#include <linux/errqueue.h> 272#include <linux/errqueue.h>
273#include <linux/static_key.h>
273 274
274#include <net/icmp.h> 275#include <net/icmp.h>
275#include <net/inet_common.h> 276#include <net/inet_common.h>
@@ -282,24 +283,22 @@
282#include <asm/ioctls.h> 283#include <asm/ioctls.h>
283#include <net/busy_poll.h> 284#include <net/busy_poll.h>
284 285
285int sysctl_tcp_min_tso_segs __read_mostly = 2; 286#include <trace/events/tcp.h>
286
287int sysctl_tcp_autocorking __read_mostly = 1;
288 287
289struct percpu_counter tcp_orphan_count; 288struct percpu_counter tcp_orphan_count;
290EXPORT_SYMBOL_GPL(tcp_orphan_count); 289EXPORT_SYMBOL_GPL(tcp_orphan_count);
291 290
292long sysctl_tcp_mem[3] __read_mostly; 291long sysctl_tcp_mem[3] __read_mostly;
293int sysctl_tcp_wmem[3] __read_mostly;
294int sysctl_tcp_rmem[3] __read_mostly;
295
296EXPORT_SYMBOL(sysctl_tcp_mem); 292EXPORT_SYMBOL(sysctl_tcp_mem);
297EXPORT_SYMBOL(sysctl_tcp_rmem);
298EXPORT_SYMBOL(sysctl_tcp_wmem);
299 293
300atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ 294atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
301EXPORT_SYMBOL(tcp_memory_allocated); 295EXPORT_SYMBOL(tcp_memory_allocated);
302 296
297#if IS_ENABLED(CONFIG_SMC)
298DEFINE_STATIC_KEY_FALSE(tcp_have_smc);
299EXPORT_SYMBOL(tcp_have_smc);
300#endif
301
303/* 302/*
304 * Current number of TCP sockets. 303 * Current number of TCP sockets.
305 */ 304 */
@@ -413,8 +412,10 @@ void tcp_init_sock(struct sock *sk)
413 struct tcp_sock *tp = tcp_sk(sk); 412 struct tcp_sock *tp = tcp_sk(sk);
414 413
415 tp->out_of_order_queue = RB_ROOT; 414 tp->out_of_order_queue = RB_ROOT;
415 sk->tcp_rtx_queue = RB_ROOT;
416 tcp_init_xmit_timers(sk); 416 tcp_init_xmit_timers(sk);
417 INIT_LIST_HEAD(&tp->tsq_node); 417 INIT_LIST_HEAD(&tp->tsq_node);
418 INIT_LIST_HEAD(&tp->tsorted_sent_queue);
418 419
419 icsk->icsk_rto = TCP_TIMEOUT_INIT; 420 icsk->icsk_rto = TCP_TIMEOUT_INIT;
420 tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); 421 tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
@@ -441,6 +442,7 @@ void tcp_init_sock(struct sock *sk)
441 tcp_assign_congestion_control(sk); 442 tcp_assign_congestion_control(sk);
442 443
443 tp->tsoffset = 0; 444 tp->tsoffset = 0;
445 tp->rack.reo_wnd_steps = 1;
444 446
445 sk->sk_state = TCP_CLOSE; 447 sk->sk_state = TCP_CLOSE;
446 448
@@ -449,15 +451,29 @@ void tcp_init_sock(struct sock *sk)
449 451
450 icsk->icsk_sync_mss = tcp_sync_mss; 452 icsk->icsk_sync_mss = tcp_sync_mss;
451 453
452 sk->sk_sndbuf = sysctl_tcp_wmem[1]; 454 sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
453 sk->sk_rcvbuf = sysctl_tcp_rmem[1]; 455 sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
454 456
455 sk_sockets_allocated_inc(sk); 457 sk_sockets_allocated_inc(sk);
456} 458}
457EXPORT_SYMBOL(tcp_init_sock); 459EXPORT_SYMBOL(tcp_init_sock);
458 460
459static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb) 461void tcp_init_transfer(struct sock *sk, int bpf_op)
460{ 462{
463 struct inet_connection_sock *icsk = inet_csk(sk);
464
465 tcp_mtup_init(sk);
466 icsk->icsk_af_ops->rebuild_header(sk);
467 tcp_init_metrics(sk);
468 tcp_call_bpf(sk, bpf_op);
469 tcp_init_congestion_control(sk);
470 tcp_init_buffer_space(sk);
471}
472
473static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
474{
475 struct sk_buff *skb = tcp_write_queue_tail(sk);
476
461 if (tsflags && skb) { 477 if (tsflags && skb) {
462 struct skb_shared_info *shinfo = skb_shinfo(skb); 478 struct skb_shared_info *shinfo = skb_shinfo(skb);
463 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); 479 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
@@ -675,7 +691,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
675 int size_goal) 691 int size_goal)
676{ 692{
677 return skb->len < size_goal && 693 return skb->len < size_goal &&
678 sysctl_tcp_autocorking && 694 sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
679 skb != tcp_write_queue_head(sk) && 695 skb != tcp_write_queue_head(sk) &&
680 refcount_read(&sk->sk_wmem_alloc) > skb->truesize; 696 refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
681} 697}
@@ -686,10 +702,9 @@ static void tcp_push(struct sock *sk, int flags, int mss_now,
686 struct tcp_sock *tp = tcp_sk(sk); 702 struct tcp_sock *tp = tcp_sk(sk);
687 struct sk_buff *skb; 703 struct sk_buff *skb;
688 704
689 if (!tcp_send_head(sk))
690 return;
691
692 skb = tcp_write_queue_tail(sk); 705 skb = tcp_write_queue_tail(sk);
706 if (!skb)
707 return;
693 if (!(flags & MSG_MORE) || forced_push(tp)) 708 if (!(flags & MSG_MORE) || forced_push(tp))
694 tcp_mark_push(tp, skb); 709 tcp_mark_push(tp, skb);
695 710
@@ -869,6 +884,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
869 * available to the caller, no more, no less. 884 * available to the caller, no more, no less.
870 */ 885 */
871 skb->reserved_tailroom = skb->end - skb->tail - size; 886 skb->reserved_tailroom = skb->end - skb->tail - size;
887 INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
872 return skb; 888 return skb;
873 } 889 }
874 __kfree_skb(skb); 890 __kfree_skb(skb);
@@ -948,14 +964,14 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
948 int copy, i; 964 int copy, i;
949 bool can_coalesce; 965 bool can_coalesce;
950 966
951 if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0 || 967 if (!skb || (copy = size_goal - skb->len) <= 0 ||
952 !tcp_skb_can_collapse_to(skb)) { 968 !tcp_skb_can_collapse_to(skb)) {
953new_segment: 969new_segment:
954 if (!sk_stream_memory_free(sk)) 970 if (!sk_stream_memory_free(sk))
955 goto wait_for_sndbuf; 971 goto wait_for_sndbuf;
956 972
957 skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, 973 skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
958 skb_queue_empty(&sk->sk_write_queue)); 974 tcp_rtx_and_write_queues_empty(sk));
959 if (!skb) 975 if (!skb)
960 goto wait_for_memory; 976 goto wait_for_memory;
961 977
@@ -1027,7 +1043,7 @@ wait_for_memory:
1027 1043
1028out: 1044out:
1029 if (copied) { 1045 if (copied) {
1030 tcp_tx_timestamp(sk, sk->sk_tsflags, tcp_write_queue_tail(sk)); 1046 tcp_tx_timestamp(sk, sk->sk_tsflags);
1031 if (!(flags & MSG_SENDPAGE_NOTLAST)) 1047 if (!(flags & MSG_SENDPAGE_NOTLAST))
1032 tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); 1048 tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
1033 } 1049 }
@@ -1126,7 +1142,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
1126 struct sockaddr *uaddr = msg->msg_name; 1142 struct sockaddr *uaddr = msg->msg_name;
1127 int err, flags; 1143 int err, flags;
1128 1144
1129 if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || 1145 if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
1130 (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && 1146 (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
1131 uaddr->sa_family == AF_UNSPEC)) 1147 uaddr->sa_family == AF_UNSPEC))
1132 return -EOPNOTSUPP; 1148 return -EOPNOTSUPP;
@@ -1183,7 +1199,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
1183 goto out_err; 1199 goto out_err;
1184 } 1200 }
1185 1201
1186 skb = tcp_send_head(sk) ? tcp_write_queue_tail(sk) : NULL; 1202 skb = tcp_write_queue_tail(sk);
1187 uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb)); 1203 uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb));
1188 if (!uarg) { 1204 if (!uarg) {
1189 err = -ENOBUFS; 1205 err = -ENOBUFS;
@@ -1259,7 +1275,7 @@ restart:
1259 int max = size_goal; 1275 int max = size_goal;
1260 1276
1261 skb = tcp_write_queue_tail(sk); 1277 skb = tcp_write_queue_tail(sk);
1262 if (tcp_send_head(sk)) { 1278 if (skb) {
1263 if (skb->ip_summed == CHECKSUM_NONE) 1279 if (skb->ip_summed == CHECKSUM_NONE)
1264 max = mss_now; 1280 max = mss_now;
1265 copy = max - skb->len; 1281 copy = max - skb->len;
@@ -1279,7 +1295,7 @@ new_segment:
1279 process_backlog = false; 1295 process_backlog = false;
1280 goto restart; 1296 goto restart;
1281 } 1297 }
1282 first_skb = skb_queue_empty(&sk->sk_write_queue); 1298 first_skb = tcp_rtx_and_write_queues_empty(sk);
1283 skb = sk_stream_alloc_skb(sk, 1299 skb = sk_stream_alloc_skb(sk,
1284 select_size(sk, sg, first_skb), 1300 select_size(sk, sg, first_skb),
1285 sk->sk_allocation, 1301 sk->sk_allocation,
@@ -1404,7 +1420,7 @@ wait_for_memory:
1404 1420
1405out: 1421out:
1406 if (copied) { 1422 if (copied) {
1407 tcp_tx_timestamp(sk, sockc.tsflags, tcp_write_queue_tail(sk)); 1423 tcp_tx_timestamp(sk, sockc.tsflags);
1408 tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); 1424 tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
1409 } 1425 }
1410out_nopush: 1426out_nopush:
@@ -1505,6 +1521,13 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
1505 1521
1506 /* XXX -- need to support SO_PEEK_OFF */ 1522 /* XXX -- need to support SO_PEEK_OFF */
1507 1523
1524 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
1525 err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
1526 if (err)
1527 return err;
1528 copied += skb->len;
1529 }
1530
1508 skb_queue_walk(&sk->sk_write_queue, skb) { 1531 skb_queue_walk(&sk->sk_write_queue, skb) {
1509 err = skb_copy_datagram_msg(skb, 0, msg, skb->len); 1532 err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
1510 if (err) 1533 if (err)
@@ -2017,6 +2040,8 @@ void tcp_set_state(struct sock *sk, int state)
2017{ 2040{
2018 int oldstate = sk->sk_state; 2041 int oldstate = sk->sk_state;
2019 2042
2043 trace_tcp_set_state(sk, oldstate, state);
2044
2020 switch (state) { 2045 switch (state) {
2021 case TCP_ESTABLISHED: 2046 case TCP_ESTABLISHED:
2022 if (oldstate != TCP_ESTABLISHED) 2047 if (oldstate != TCP_ESTABLISHED)
@@ -2304,6 +2329,37 @@ static inline bool tcp_need_reset(int state)
2304 TCPF_FIN_WAIT2 | TCPF_SYN_RECV); 2329 TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
2305} 2330}
2306 2331
2332static void tcp_rtx_queue_purge(struct sock *sk)
2333{
2334 struct rb_node *p = rb_first(&sk->tcp_rtx_queue);
2335
2336 while (p) {
2337 struct sk_buff *skb = rb_to_skb(p);
2338
2339 p = rb_next(p);
2340 /* Since we are deleting whole queue, no need to
2341 * list_del(&skb->tcp_tsorted_anchor)
2342 */
2343 tcp_rtx_queue_unlink(skb, sk);
2344 sk_wmem_free_skb(sk, skb);
2345 }
2346}
2347
2348void tcp_write_queue_purge(struct sock *sk)
2349{
2350 struct sk_buff *skb;
2351
2352 tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
2353 while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
2354 tcp_skb_tsorted_anchor_cleanup(skb);
2355 sk_wmem_free_skb(sk, skb);
2356 }
2357 tcp_rtx_queue_purge(sk);
2358 INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
2359 sk_mem_reclaim(sk);
2360 tcp_clear_all_retrans_hints(tcp_sk(sk));
2361}
2362
2307int tcp_disconnect(struct sock *sk, int flags) 2363int tcp_disconnect(struct sock *sk, int flags)
2308{ 2364{
2309 struct inet_sock *inet = inet_sk(sk); 2365 struct inet_sock *inet = inet_sk(sk);
@@ -2362,7 +2418,6 @@ int tcp_disconnect(struct sock *sk, int flags)
2362 * issue in __tcp_select_window() 2418 * issue in __tcp_select_window()
2363 */ 2419 */
2364 icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; 2420 icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
2365 tcp_init_send_head(sk);
2366 memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); 2421 memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
2367 __sk_dst_reset(sk); 2422 __sk_dst_reset(sk);
2368 dst_release(sk->sk_rx_dst); 2423 dst_release(sk->sk_rx_dst);
@@ -2454,8 +2509,6 @@ static int tcp_repair_options_est(struct sock *sk,
2454 return -EINVAL; 2509 return -EINVAL;
2455 2510
2456 tp->rx_opt.sack_ok |= TCP_SACK_SEEN; 2511 tp->rx_opt.sack_ok |= TCP_SACK_SEEN;
2457 if (sysctl_tcp_fack)
2458 tcp_enable_fack(tp);
2459 break; 2512 break;
2460 case TCPOPT_TIMESTAMP: 2513 case TCPOPT_TIMESTAMP:
2461 if (opt.opt_val != 0) 2514 if (opt.opt_val != 0)
@@ -2518,6 +2571,17 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2518 release_sock(sk); 2571 release_sock(sk);
2519 return err; 2572 return err;
2520 } 2573 }
2574 case TCP_FASTOPEN_KEY: {
2575 __u8 key[TCP_FASTOPEN_KEY_LENGTH];
2576
2577 if (optlen != sizeof(key))
2578 return -EINVAL;
2579
2580 if (copy_from_user(key, optval, optlen))
2581 return -EFAULT;
2582
2583 return tcp_fastopen_reset_cipher(net, sk, key, sizeof(key));
2584 }
2521 default: 2585 default:
2522 /* fallthru */ 2586 /* fallthru */
2523 break; 2587 break;
@@ -2749,7 +2813,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2749 case TCP_FASTOPEN: 2813 case TCP_FASTOPEN:
2750 if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | 2814 if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
2751 TCPF_LISTEN))) { 2815 TCPF_LISTEN))) {
2752 tcp_fastopen_init_key_once(true); 2816 tcp_fastopen_init_key_once(net);
2753 2817
2754 fastopen_queue_tune(sk, val); 2818 fastopen_queue_tune(sk, val);
2755 } else { 2819 } else {
@@ -2759,7 +2823,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2759 case TCP_FASTOPEN_CONNECT: 2823 case TCP_FASTOPEN_CONNECT:
2760 if (val > 1 || val < 0) { 2824 if (val > 1 || val < 0) {
2761 err = -EINVAL; 2825 err = -EINVAL;
2762 } else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) { 2826 } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
2763 if (sk->sk_state == TCP_CLOSE) 2827 if (sk->sk_state == TCP_CLOSE)
2764 tp->fastopen_connect = val; 2828 tp->fastopen_connect = val;
2765 else 2829 else
@@ -2768,6 +2832,14 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2768 err = -EOPNOTSUPP; 2832 err = -EOPNOTSUPP;
2769 } 2833 }
2770 break; 2834 break;
2835 case TCP_FASTOPEN_NO_COOKIE:
2836 if (val > 1 || val < 0)
2837 err = -EINVAL;
2838 else if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
2839 err = -EINVAL;
2840 else
2841 tp->fastopen_no_cookie = val;
2842 break;
2771 case TCP_TIMESTAMP: 2843 case TCP_TIMESTAMP:
2772 if (!tp->repair) 2844 if (!tp->repair)
2773 err = -EPERM; 2845 err = -EPERM;
@@ -2905,7 +2977,6 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2905 2977
2906 info->tcpi_lost = tp->lost_out; 2978 info->tcpi_lost = tp->lost_out;
2907 info->tcpi_retrans = tp->retrans_out; 2979 info->tcpi_retrans = tp->retrans_out;
2908 info->tcpi_fackets = tp->fackets_out;
2909 2980
2910 now = tcp_jiffies32; 2981 now = tcp_jiffies32;
2911 info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime); 2982 info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
@@ -3104,6 +3175,28 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
3104 return -EFAULT; 3175 return -EFAULT;
3105 return 0; 3176 return 0;
3106 3177
3178 case TCP_FASTOPEN_KEY: {
3179 __u8 key[TCP_FASTOPEN_KEY_LENGTH];
3180 struct tcp_fastopen_context *ctx;
3181
3182 if (get_user(len, optlen))
3183 return -EFAULT;
3184
3185 rcu_read_lock();
3186 ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx);
3187 if (ctx)
3188 memcpy(key, ctx->key, sizeof(key));
3189 else
3190 len = 0;
3191 rcu_read_unlock();
3192
3193 len = min_t(unsigned int, len, sizeof(key));
3194 if (put_user(len, optlen))
3195 return -EFAULT;
3196 if (copy_to_user(optval, key, len))
3197 return -EFAULT;
3198 return 0;
3199 }
3107 case TCP_THIN_LINEAR_TIMEOUTS: 3200 case TCP_THIN_LINEAR_TIMEOUTS:
3108 val = tp->thin_lto; 3201 val = tp->thin_lto;
3109 break; 3202 break;
@@ -3166,6 +3259,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
3166 val = tp->fastopen_connect; 3259 val = tp->fastopen_connect;
3167 break; 3260 break;
3168 3261
3262 case TCP_FASTOPEN_NO_COOKIE:
3263 val = tp->fastopen_no_cookie;
3264 break;
3265
3169 case TCP_TIMESTAMP: 3266 case TCP_TIMESTAMP:
3170 val = tcp_time_stamp_raw() + tp->tsoffset; 3267 val = tcp_time_stamp_raw() + tp->tsoffset;
3171 break; 3268 break;
@@ -3531,13 +3628,13 @@ void __init tcp_init(void)
3531 max_wshare = min(4UL*1024*1024, limit); 3628 max_wshare = min(4UL*1024*1024, limit);
3532 max_rshare = min(6UL*1024*1024, limit); 3629 max_rshare = min(6UL*1024*1024, limit);
3533 3630
3534 sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; 3631 init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
3535 sysctl_tcp_wmem[1] = 16*1024; 3632 init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
3536 sysctl_tcp_wmem[2] = max(64*1024, max_wshare); 3633 init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
3537 3634
3538 sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; 3635 init_net.ipv4.sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
3539 sysctl_tcp_rmem[1] = 87380; 3636 init_net.ipv4.sysctl_tcp_rmem[1] = 87380;
3540 sysctl_tcp_rmem[2] = max(87380, max_rshare); 3637 init_net.ipv4.sysctl_tcp_rmem[2] = max(87380, max_rshare);
3541 3638
3542 pr_info("Hash tables configured (established %u bind %u)\n", 3639 pr_info("Hash tables configured (established %u bind %u)\n",
3543 tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); 3640 tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 66ac69f7bd19..06fbe102a425 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -389,7 +389,7 @@ static void tcp_cdg_release(struct sock *sk)
389 kfree(ca->gradients); 389 kfree(ca->gradients);
390} 390}
391 391
392struct tcp_congestion_ops tcp_cdg __read_mostly = { 392static struct tcp_congestion_ops tcp_cdg __read_mostly = {
393 .cong_avoid = tcp_cdg_cong_avoid, 393 .cong_avoid = tcp_cdg_cong_avoid,
394 .cwnd_event = tcp_cdg_cwnd_event, 394 .cwnd_event = tcp_cdg_cwnd_event,
395 .pkts_acked = tcp_cdg_acked, 395 .pkts_acked = tcp_cdg_acked,
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 2f26124fd160..bc6c02f16243 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -33,9 +33,11 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name)
33} 33}
34 34
35/* Must be called with rcu lock held */ 35/* Must be called with rcu lock held */
36static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name) 36static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net,
37 const char *name)
37{ 38{
38 const struct tcp_congestion_ops *ca = tcp_ca_find(name); 39 struct tcp_congestion_ops *ca = tcp_ca_find(name);
40
39#ifdef CONFIG_MODULES 41#ifdef CONFIG_MODULES
40 if (!ca && capable(CAP_NET_ADMIN)) { 42 if (!ca && capable(CAP_NET_ADMIN)) {
41 rcu_read_unlock(); 43 rcu_read_unlock();
@@ -115,7 +117,7 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
115} 117}
116EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); 118EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
117 119
118u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca) 120u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca)
119{ 121{
120 const struct tcp_congestion_ops *ca; 122 const struct tcp_congestion_ops *ca;
121 u32 key = TCP_CA_UNSPEC; 123 u32 key = TCP_CA_UNSPEC;
@@ -123,7 +125,7 @@ u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca)
123 might_sleep(); 125 might_sleep();
124 126
125 rcu_read_lock(); 127 rcu_read_lock();
126 ca = __tcp_ca_find_autoload(name); 128 ca = tcp_ca_find_autoload(net, name);
127 if (ca) { 129 if (ca) {
128 key = ca->key; 130 key = ca->key;
129 *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN; 131 *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN;
@@ -153,23 +155,18 @@ EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key);
153/* Assign choice of congestion control. */ 155/* Assign choice of congestion control. */
154void tcp_assign_congestion_control(struct sock *sk) 156void tcp_assign_congestion_control(struct sock *sk)
155{ 157{
158 struct net *net = sock_net(sk);
156 struct inet_connection_sock *icsk = inet_csk(sk); 159 struct inet_connection_sock *icsk = inet_csk(sk);
157 struct tcp_congestion_ops *ca; 160 const struct tcp_congestion_ops *ca;
158 161
159 rcu_read_lock(); 162 rcu_read_lock();
160 list_for_each_entry_rcu(ca, &tcp_cong_list, list) { 163 ca = rcu_dereference(net->ipv4.tcp_congestion_control);
161 if (likely(try_module_get(ca->owner))) { 164 if (unlikely(!try_module_get(ca->owner)))
162 icsk->icsk_ca_ops = ca; 165 ca = &tcp_reno;
163 goto out; 166 icsk->icsk_ca_ops = ca;
164 }
165 /* Fallback to next available. The last really
166 * guaranteed fallback is Reno from this list.
167 */
168 }
169out:
170 rcu_read_unlock(); 167 rcu_read_unlock();
171 memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
172 168
169 memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
173 if (ca->flags & TCP_CONG_NEEDS_ECN) 170 if (ca->flags & TCP_CONG_NEEDS_ECN)
174 INET_ECN_xmit(sk); 171 INET_ECN_xmit(sk);
175 else 172 else
@@ -214,29 +211,27 @@ void tcp_cleanup_congestion_control(struct sock *sk)
214} 211}
215 212
216/* Used by sysctl to change default congestion control */ 213/* Used by sysctl to change default congestion control */
217int tcp_set_default_congestion_control(const char *name) 214int tcp_set_default_congestion_control(struct net *net, const char *name)
218{ 215{
219 struct tcp_congestion_ops *ca; 216 struct tcp_congestion_ops *ca;
220 int ret = -ENOENT; 217 const struct tcp_congestion_ops *prev;
221 218 int ret;
222 spin_lock(&tcp_cong_list_lock);
223 ca = tcp_ca_find(name);
224#ifdef CONFIG_MODULES
225 if (!ca && capable(CAP_NET_ADMIN)) {
226 spin_unlock(&tcp_cong_list_lock);
227 219
228 request_module("tcp_%s", name); 220 rcu_read_lock();
229 spin_lock(&tcp_cong_list_lock); 221 ca = tcp_ca_find_autoload(net, name);
230 ca = tcp_ca_find(name); 222 if (!ca) {
231 } 223 ret = -ENOENT;
232#endif 224 } else if (!try_module_get(ca->owner)) {
225 ret = -EBUSY;
226 } else {
227 prev = xchg(&net->ipv4.tcp_congestion_control, ca);
228 if (prev)
229 module_put(prev->owner);
233 230
234 if (ca) { 231 ca->flags |= TCP_CONG_NON_RESTRICTED;
235 ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */
236 list_move(&ca->list, &tcp_cong_list);
237 ret = 0; 232 ret = 0;
238 } 233 }
239 spin_unlock(&tcp_cong_list_lock); 234 rcu_read_unlock();
240 235
241 return ret; 236 return ret;
242} 237}
@@ -244,7 +239,8 @@ int tcp_set_default_congestion_control(const char *name)
244/* Set default value from kernel configuration at bootup */ 239/* Set default value from kernel configuration at bootup */
245static int __init tcp_congestion_default(void) 240static int __init tcp_congestion_default(void)
246{ 241{
247 return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG); 242 return tcp_set_default_congestion_control(&init_net,
243 CONFIG_DEFAULT_TCP_CONG);
248} 244}
249late_initcall(tcp_congestion_default); 245late_initcall(tcp_congestion_default);
250 246
@@ -264,14 +260,12 @@ void tcp_get_available_congestion_control(char *buf, size_t maxlen)
264} 260}
265 261
266/* Get current default congestion control */ 262/* Get current default congestion control */
267void tcp_get_default_congestion_control(char *name) 263void tcp_get_default_congestion_control(struct net *net, char *name)
268{ 264{
269 struct tcp_congestion_ops *ca; 265 const struct tcp_congestion_ops *ca;
270 /* We will always have reno... */
271 BUG_ON(list_empty(&tcp_cong_list));
272 266
273 rcu_read_lock(); 267 rcu_read_lock();
274 ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list); 268 ca = rcu_dereference(net->ipv4.tcp_congestion_control);
275 strncpy(name, ca->name, TCP_CA_NAME_MAX); 269 strncpy(name, ca->name, TCP_CA_NAME_MAX);
276 rcu_read_unlock(); 270 rcu_read_unlock();
277} 271}
@@ -351,12 +345,14 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, boo
351 if (!load) 345 if (!load)
352 ca = tcp_ca_find(name); 346 ca = tcp_ca_find(name);
353 else 347 else
354 ca = __tcp_ca_find_autoload(name); 348 ca = tcp_ca_find_autoload(sock_net(sk), name);
349
355 /* No change asking for existing value */ 350 /* No change asking for existing value */
356 if (ca == icsk->icsk_ca_ops) { 351 if (ca == icsk->icsk_ca_ops) {
357 icsk->icsk_ca_setsockopt = 1; 352 icsk->icsk_ca_setsockopt = 1;
358 goto out; 353 goto out;
359 } 354 }
355
360 if (!ca) { 356 if (!ca) {
361 err = -ENOENT; 357 err = -ENOENT;
362 } else if (!load) { 358 } else if (!load) {
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index e3c33220c418..78c192ee03a4 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/crypto.h> 2#include <linux/crypto.h>
2#include <linux/err.h> 3#include <linux/err.h>
3#include <linux/init.h> 4#include <linux/init.h>
@@ -9,15 +10,18 @@
9#include <net/inetpeer.h> 10#include <net/inetpeer.h>
10#include <net/tcp.h> 11#include <net/tcp.h>
11 12
12int sysctl_tcp_fastopen __read_mostly = TFO_CLIENT_ENABLE; 13void tcp_fastopen_init_key_once(struct net *net)
13
14struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
15
16static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
17
18void tcp_fastopen_init_key_once(bool publish)
19{ 14{
20 static u8 key[TCP_FASTOPEN_KEY_LENGTH]; 15 u8 key[TCP_FASTOPEN_KEY_LENGTH];
16 struct tcp_fastopen_context *ctxt;
17
18 rcu_read_lock();
19 ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
20 if (ctxt) {
21 rcu_read_unlock();
22 return;
23 }
24 rcu_read_unlock();
21 25
22 /* tcp_fastopen_reset_cipher publishes the new context 26 /* tcp_fastopen_reset_cipher publishes the new context
23 * atomically, so we allow this race happening here. 27 * atomically, so we allow this race happening here.
@@ -25,8 +29,8 @@ void tcp_fastopen_init_key_once(bool publish)
25 * All call sites of tcp_fastopen_cookie_gen also check 29 * All call sites of tcp_fastopen_cookie_gen also check
26 * for a valid cookie, so this is an acceptable risk. 30 * for a valid cookie, so this is an acceptable risk.
27 */ 31 */
28 if (net_get_random_once(key, sizeof(key)) && publish) 32 get_random_bytes(key, sizeof(key));
29 tcp_fastopen_reset_cipher(key, sizeof(key)); 33 tcp_fastopen_reset_cipher(net, NULL, key, sizeof(key));
30} 34}
31 35
32static void tcp_fastopen_ctx_free(struct rcu_head *head) 36static void tcp_fastopen_ctx_free(struct rcu_head *head)
@@ -37,10 +41,37 @@ static void tcp_fastopen_ctx_free(struct rcu_head *head)
37 kfree(ctx); 41 kfree(ctx);
38} 42}
39 43
40int tcp_fastopen_reset_cipher(void *key, unsigned int len) 44void tcp_fastopen_destroy_cipher(struct sock *sk)
45{
46 struct tcp_fastopen_context *ctx;
47
48 ctx = rcu_dereference_protected(
49 inet_csk(sk)->icsk_accept_queue.fastopenq.ctx, 1);
50 if (ctx)
51 call_rcu(&ctx->rcu, tcp_fastopen_ctx_free);
52}
53
54void tcp_fastopen_ctx_destroy(struct net *net)
55{
56 struct tcp_fastopen_context *ctxt;
57
58 spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
59
60 ctxt = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
61 lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
62 rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, NULL);
63 spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
64
65 if (ctxt)
66 call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
67}
68
69int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
70 void *key, unsigned int len)
41{ 71{
42 int err;
43 struct tcp_fastopen_context *ctx, *octx; 72 struct tcp_fastopen_context *ctx, *octx;
73 struct fastopen_queue *q;
74 int err;
44 75
45 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 76 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
46 if (!ctx) 77 if (!ctx)
@@ -61,26 +92,37 @@ error: kfree(ctx);
61 } 92 }
62 memcpy(ctx->key, key, len); 93 memcpy(ctx->key, key, len);
63 94
64 spin_lock(&tcp_fastopen_ctx_lock);
65 95
66 octx = rcu_dereference_protected(tcp_fastopen_ctx, 96 spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
67 lockdep_is_held(&tcp_fastopen_ctx_lock)); 97 if (sk) {
68 rcu_assign_pointer(tcp_fastopen_ctx, ctx); 98 q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
69 spin_unlock(&tcp_fastopen_ctx_lock); 99 octx = rcu_dereference_protected(q->ctx,
100 lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
101 rcu_assign_pointer(q->ctx, ctx);
102 } else {
103 octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
104 lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
105 rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
106 }
107 spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
70 108
71 if (octx) 109 if (octx)
72 call_rcu(&octx->rcu, tcp_fastopen_ctx_free); 110 call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
73 return err; 111 return err;
74} 112}
75 113
76static bool __tcp_fastopen_cookie_gen(const void *path, 114static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path,
77 struct tcp_fastopen_cookie *foc) 115 struct tcp_fastopen_cookie *foc)
78{ 116{
79 struct tcp_fastopen_context *ctx; 117 struct tcp_fastopen_context *ctx;
80 bool ok = false; 118 bool ok = false;
81 119
82 rcu_read_lock(); 120 rcu_read_lock();
83 ctx = rcu_dereference(tcp_fastopen_ctx); 121
122 ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx);
123 if (!ctx)
124 ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx);
125
84 if (ctx) { 126 if (ctx) {
85 crypto_cipher_encrypt_one(ctx->tfm, foc->val, path); 127 crypto_cipher_encrypt_one(ctx->tfm, foc->val, path);
86 foc->len = TCP_FASTOPEN_COOKIE_SIZE; 128 foc->len = TCP_FASTOPEN_COOKIE_SIZE;
@@ -96,7 +138,8 @@ static bool __tcp_fastopen_cookie_gen(const void *path,
96 * 138 *
97 * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE. 139 * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
98 */ 140 */
99static bool tcp_fastopen_cookie_gen(struct request_sock *req, 141static bool tcp_fastopen_cookie_gen(struct sock *sk,
142 struct request_sock *req,
100 struct sk_buff *syn, 143 struct sk_buff *syn,
101 struct tcp_fastopen_cookie *foc) 144 struct tcp_fastopen_cookie *foc)
102{ 145{
@@ -104,7 +147,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
104 const struct iphdr *iph = ip_hdr(syn); 147 const struct iphdr *iph = ip_hdr(syn);
105 148
106 __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 }; 149 __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
107 return __tcp_fastopen_cookie_gen(path, foc); 150 return __tcp_fastopen_cookie_gen(sk, path, foc);
108 } 151 }
109 152
110#if IS_ENABLED(CONFIG_IPV6) 153#if IS_ENABLED(CONFIG_IPV6)
@@ -112,13 +155,13 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
112 const struct ipv6hdr *ip6h = ipv6_hdr(syn); 155 const struct ipv6hdr *ip6h = ipv6_hdr(syn);
113 struct tcp_fastopen_cookie tmp; 156 struct tcp_fastopen_cookie tmp;
114 157
115 if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) { 158 if (__tcp_fastopen_cookie_gen(sk, &ip6h->saddr, &tmp)) {
116 struct in6_addr *buf = &tmp.addr; 159 struct in6_addr *buf = &tmp.addr;
117 int i; 160 int i;
118 161
119 for (i = 0; i < 4; i++) 162 for (i = 0; i < 4; i++)
120 buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i]; 163 buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
121 return __tcp_fastopen_cookie_gen(buf, foc); 164 return __tcp_fastopen_cookie_gen(sk, buf, foc);
122 } 165 }
123 } 166 }
124#endif 167#endif
@@ -216,12 +259,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
216 refcount_set(&req->rsk_refcnt, 2); 259 refcount_set(&req->rsk_refcnt, 2);
217 260
218 /* Now finish processing the fastopen child socket. */ 261 /* Now finish processing the fastopen child socket. */
219 inet_csk(child)->icsk_af_ops->rebuild_header(child); 262 tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
220 tcp_init_congestion_control(child);
221 tcp_mtup_init(child);
222 tcp_init_metrics(child);
223 tcp_call_bpf(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
224 tcp_init_buffer_space(child);
225 263
226 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; 264 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
227 265
@@ -271,33 +309,45 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
271 return true; 309 return true;
272} 310}
273 311
312static bool tcp_fastopen_no_cookie(const struct sock *sk,
313 const struct dst_entry *dst,
314 int flag)
315{
316 return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) ||
317 tcp_sk(sk)->fastopen_no_cookie ||
318 (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE));
319}
320
274/* Returns true if we should perform Fast Open on the SYN. The cookie (foc) 321/* Returns true if we should perform Fast Open on the SYN. The cookie (foc)
275 * may be updated and return the client in the SYN-ACK later. E.g., Fast Open 322 * may be updated and return the client in the SYN-ACK later. E.g., Fast Open
276 * cookie request (foc->len == 0). 323 * cookie request (foc->len == 0).
277 */ 324 */
278struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, 325struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
279 struct request_sock *req, 326 struct request_sock *req,
280 struct tcp_fastopen_cookie *foc) 327 struct tcp_fastopen_cookie *foc,
328 const struct dst_entry *dst)
281{ 329{
282 struct tcp_fastopen_cookie valid_foc = { .len = -1 };
283 bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; 330 bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
331 int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
332 struct tcp_fastopen_cookie valid_foc = { .len = -1 };
284 struct sock *child; 333 struct sock *child;
285 334
286 if (foc->len == 0) /* Client requests a cookie */ 335 if (foc->len == 0) /* Client requests a cookie */
287 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); 336 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
288 337
289 if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && 338 if (!((tcp_fastopen & TFO_SERVER_ENABLE) &&
290 (syn_data || foc->len >= 0) && 339 (syn_data || foc->len >= 0) &&
291 tcp_fastopen_queue_check(sk))) { 340 tcp_fastopen_queue_check(sk))) {
292 foc->len = -1; 341 foc->len = -1;
293 return NULL; 342 return NULL;
294 } 343 }
295 344
296 if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD)) 345 if (syn_data &&
346 tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
297 goto fastopen; 347 goto fastopen;
298 348
299 if (foc->len >= 0 && /* Client presents or requests a cookie */ 349 if (foc->len >= 0 && /* Client presents or requests a cookie */
300 tcp_fastopen_cookie_gen(req, skb, &valid_foc) && 350 tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc) &&
301 foc->len == TCP_FASTOPEN_COOKIE_SIZE && 351 foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
302 foc->len == valid_foc.len && 352 foc->len == valid_foc.len &&
303 !memcmp(foc->val, valid_foc.val, foc->len)) { 353 !memcmp(foc->val, valid_foc.val, foc->len)) {
@@ -330,6 +380,7 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
330 struct tcp_fastopen_cookie *cookie) 380 struct tcp_fastopen_cookie *cookie)
331{ 381{
332 unsigned long last_syn_loss = 0; 382 unsigned long last_syn_loss = 0;
383 const struct dst_entry *dst;
333 int syn_loss = 0; 384 int syn_loss = 0;
334 385
335 tcp_fastopen_cache_get(sk, mss, cookie, &syn_loss, &last_syn_loss); 386 tcp_fastopen_cache_get(sk, mss, cookie, &syn_loss, &last_syn_loss);
@@ -347,7 +398,9 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
347 return false; 398 return false;
348 } 399 }
349 400
350 if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) { 401 dst = __sk_dst_get(sk);
402
403 if (tcp_fastopen_no_cookie(sk, dst, TFO_CLIENT_NO_COOKIE)) {
351 cookie->len = -1; 404 cookie->len = -1;
352 return true; 405 return true;
353 } 406 }
@@ -401,25 +454,16 @@ EXPORT_SYMBOL(tcp_fastopen_defer_connect);
401 * TFO connection with data exchanges. 454 * TFO connection with data exchanges.
402 */ 455 */
403 456
404/* Default to 1hr */
405unsigned int sysctl_tcp_fastopen_blackhole_timeout __read_mostly = 60 * 60;
406static atomic_t tfo_active_disable_times __read_mostly = ATOMIC_INIT(0);
407static unsigned long tfo_active_disable_stamp __read_mostly;
408
409/* Disable active TFO and record current jiffies and 457/* Disable active TFO and record current jiffies and
410 * tfo_active_disable_times 458 * tfo_active_disable_times
411 */ 459 */
412void tcp_fastopen_active_disable(struct sock *sk) 460void tcp_fastopen_active_disable(struct sock *sk)
413{ 461{
414 atomic_inc(&tfo_active_disable_times); 462 struct net *net = sock_net(sk);
415 tfo_active_disable_stamp = jiffies;
416 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENBLACKHOLE);
417}
418 463
419/* Reset tfo_active_disable_times to 0 */ 464 atomic_inc(&net->ipv4.tfo_active_disable_times);
420void tcp_fastopen_active_timeout_reset(void) 465 net->ipv4.tfo_active_disable_stamp = jiffies;
421{ 466 NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE);
422 atomic_set(&tfo_active_disable_times, 0);
423} 467}
424 468
425/* Calculate timeout for tfo active disable 469/* Calculate timeout for tfo active disable
@@ -428,17 +472,18 @@ void tcp_fastopen_active_timeout_reset(void)
428 */ 472 */
429bool tcp_fastopen_active_should_disable(struct sock *sk) 473bool tcp_fastopen_active_should_disable(struct sock *sk)
430{ 474{
431 int tfo_da_times = atomic_read(&tfo_active_disable_times); 475 unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout;
432 int multiplier; 476 int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times);
433 unsigned long timeout; 477 unsigned long timeout;
478 int multiplier;
434 479
435 if (!tfo_da_times) 480 if (!tfo_da_times)
436 return false; 481 return false;
437 482
438 /* Limit timout to max: 2^6 * initial timeout */ 483 /* Limit timout to max: 2^6 * initial timeout */
439 multiplier = 1 << min(tfo_da_times - 1, 6); 484 multiplier = 1 << min(tfo_da_times - 1, 6);
440 timeout = multiplier * sysctl_tcp_fastopen_blackhole_timeout * HZ; 485 timeout = multiplier * tfo_bh_timeout * HZ;
441 if (time_before(jiffies, tfo_active_disable_stamp + timeout)) 486 if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout))
442 return true; 487 return true;
443 488
444 /* Mark check bit so we can check for successful active TFO 489 /* Mark check bit so we can check for successful active TFO
@@ -457,27 +502,25 @@ bool tcp_fastopen_active_should_disable(struct sock *sk)
457void tcp_fastopen_active_disable_ofo_check(struct sock *sk) 502void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
458{ 503{
459 struct tcp_sock *tp = tcp_sk(sk); 504 struct tcp_sock *tp = tcp_sk(sk);
460 struct rb_node *p;
461 struct sk_buff *skb;
462 struct dst_entry *dst; 505 struct dst_entry *dst;
506 struct sk_buff *skb;
463 507
464 if (!tp->syn_fastopen) 508 if (!tp->syn_fastopen)
465 return; 509 return;
466 510
467 if (!tp->data_segs_in) { 511 if (!tp->data_segs_in) {
468 p = rb_first(&tp->out_of_order_queue); 512 skb = skb_rb_first(&tp->out_of_order_queue);
469 if (p && !rb_next(p)) { 513 if (skb && !skb_rb_next(skb)) {
470 skb = rb_entry(p, struct sk_buff, rbnode);
471 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { 514 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
472 tcp_fastopen_active_disable(sk); 515 tcp_fastopen_active_disable(sk);
473 return; 516 return;
474 } 517 }
475 } 518 }
476 } else if (tp->syn_fastopen_ch && 519 } else if (tp->syn_fastopen_ch &&
477 atomic_read(&tfo_active_disable_times)) { 520 atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) {
478 dst = sk_dst_get(sk); 521 dst = sk_dst_get(sk);
479 if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))) 522 if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)))
480 tcp_fastopen_active_timeout_reset(); 523 atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0);
481 dst_release(dst); 524 dst_release(dst);
482 } 525 }
483} 526}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c5d7656beeee..734cfc8ff76e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket 4 * operating system. INET is implemented using the BSD Socket
@@ -75,25 +76,10 @@
75#include <linux/ipsec.h> 76#include <linux/ipsec.h>
76#include <asm/unaligned.h> 77#include <asm/unaligned.h>
77#include <linux/errqueue.h> 78#include <linux/errqueue.h>
79#include <trace/events/tcp.h>
80#include <linux/static_key.h>
78 81
79int sysctl_tcp_fack __read_mostly;
80int sysctl_tcp_max_reordering __read_mostly = 300;
81int sysctl_tcp_dsack __read_mostly = 1;
82int sysctl_tcp_app_win __read_mostly = 31;
83int sysctl_tcp_adv_win_scale __read_mostly = 1;
84EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
85
86/* rfc5961 challenge ack rate limiting */
87int sysctl_tcp_challenge_ack_limit = 1000;
88
89int sysctl_tcp_stdurg __read_mostly;
90int sysctl_tcp_rfc1337 __read_mostly;
91int sysctl_tcp_max_orphans __read_mostly = NR_FILE; 82int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
92int sysctl_tcp_frto __read_mostly = 2;
93int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
94int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
95int sysctl_tcp_early_retrans __read_mostly = 3;
96int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
97 83
98#define FLAG_DATA 0x01 /* Incoming frame contained data. */ 84#define FLAG_DATA 0x01 /* Incoming frame contained data. */
99#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ 85#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -114,7 +100,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
114 100
115#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) 101#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
116#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) 102#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
117#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) 103#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK)
118#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) 104#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
119 105
120#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) 106#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
@@ -334,7 +320,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
334 sndmem *= nr_segs * per_mss; 320 sndmem *= nr_segs * per_mss;
335 321
336 if (sk->sk_sndbuf < sndmem) 322 if (sk->sk_sndbuf < sndmem)
337 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); 323 sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]);
338} 324}
339 325
340/* 2. Tuning advertised window (window_clamp, rcv_ssthresh) 326/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -367,8 +353,8 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
367{ 353{
368 struct tcp_sock *tp = tcp_sk(sk); 354 struct tcp_sock *tp = tcp_sk(sk);
369 /* Optimize this! */ 355 /* Optimize this! */
370 int truesize = tcp_win_from_space(skb->truesize) >> 1; 356 int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
371 int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1; 357 int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
372 358
373 while (tp->rcv_ssthresh <= window) { 359 while (tp->rcv_ssthresh <= window) {
374 if (truesize <= skb->len) 360 if (truesize <= skb->len)
@@ -393,7 +379,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
393 /* Check #2. Increase window, if skb with such overhead 379 /* Check #2. Increase window, if skb with such overhead
394 * will fit to rcvbuf in future. 380 * will fit to rcvbuf in future.
395 */ 381 */
396 if (tcp_win_from_space(skb->truesize) <= skb->len) 382 if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
397 incr = 2 * tp->advmss; 383 incr = 2 * tp->advmss;
398 else 384 else
399 incr = __tcp_grow_window(sk, skb); 385 incr = __tcp_grow_window(sk, skb);
@@ -419,11 +405,11 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
419 /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency 405 /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency
420 * Allow enough cushion so that sender is not limited by our window 406 * Allow enough cushion so that sender is not limited by our window
421 */ 407 */
422 if (sysctl_tcp_moderate_rcvbuf) 408 if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)
423 rcvmem <<= 2; 409 rcvmem <<= 2;
424 410
425 if (sk->sk_rcvbuf < rcvmem) 411 if (sk->sk_rcvbuf < rcvmem)
426 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); 412 sk->sk_rcvbuf = min(rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
427} 413}
428 414
429/* 4. Try to fixup all. It is made immediately after connection enters 415/* 4. Try to fixup all. It is made immediately after connection enters
@@ -431,6 +417,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
431 */ 417 */
432void tcp_init_buffer_space(struct sock *sk) 418void tcp_init_buffer_space(struct sock *sk)
433{ 419{
420 int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
434 struct tcp_sock *tp = tcp_sk(sk); 421 struct tcp_sock *tp = tcp_sk(sk);
435 int maxwin; 422 int maxwin;
436 423
@@ -449,14 +436,14 @@ void tcp_init_buffer_space(struct sock *sk)
449 if (tp->window_clamp >= maxwin) { 436 if (tp->window_clamp >= maxwin) {
450 tp->window_clamp = maxwin; 437 tp->window_clamp = maxwin;
451 438
452 if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss) 439 if (tcp_app_win && maxwin > 4 * tp->advmss)
453 tp->window_clamp = max(maxwin - 440 tp->window_clamp = max(maxwin -
454 (maxwin >> sysctl_tcp_app_win), 441 (maxwin >> tcp_app_win),
455 4 * tp->advmss); 442 4 * tp->advmss);
456 } 443 }
457 444
458 /* Force reservation of one segment. */ 445 /* Force reservation of one segment. */
459 if (sysctl_tcp_app_win && 446 if (tcp_app_win &&
460 tp->window_clamp > 2 * tp->advmss && 447 tp->window_clamp > 2 * tp->advmss &&
461 tp->window_clamp + tp->advmss > maxwin) 448 tp->window_clamp + tp->advmss > maxwin)
462 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss); 449 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
@@ -470,15 +457,16 @@ static void tcp_clamp_window(struct sock *sk)
470{ 457{
471 struct tcp_sock *tp = tcp_sk(sk); 458 struct tcp_sock *tp = tcp_sk(sk);
472 struct inet_connection_sock *icsk = inet_csk(sk); 459 struct inet_connection_sock *icsk = inet_csk(sk);
460 struct net *net = sock_net(sk);
473 461
474 icsk->icsk_ack.quick = 0; 462 icsk->icsk_ack.quick = 0;
475 463
476 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && 464 if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
477 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && 465 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
478 !tcp_under_memory_pressure(sk) && 466 !tcp_under_memory_pressure(sk) &&
479 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { 467 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
480 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), 468 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
481 sysctl_tcp_rmem[2]); 469 net->ipv4.sysctl_tcp_rmem[2]);
482 } 470 }
483 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) 471 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
484 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss); 472 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
@@ -609,7 +597,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
609 * <prev RTT . ><current RTT .. ><next RTT .... > 597 * <prev RTT . ><current RTT .. ><next RTT .... >
610 */ 598 */
611 599
612 if (sysctl_tcp_moderate_rcvbuf && 600 if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
613 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { 601 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
614 int rcvwin, rcvmem, rcvbuf; 602 int rcvwin, rcvmem, rcvbuf;
615 603
@@ -633,10 +621,11 @@ void tcp_rcv_space_adjust(struct sock *sk)
633 } 621 }
634 622
635 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); 623 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
636 while (tcp_win_from_space(rcvmem) < tp->advmss) 624 while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
637 rcvmem += 128; 625 rcvmem += 128;
638 626
639 rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]); 627 rcvbuf = min(rcvwin / tp->advmss * rcvmem,
628 sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
640 if (rcvbuf > sk->sk_rcvbuf) { 629 if (rcvbuf > sk->sk_rcvbuf) {
641 sk->sk_rcvbuf = rcvbuf; 630 sk->sk_rcvbuf = rcvbuf;
642 631
@@ -780,15 +769,6 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
780 tp->srtt_us = max(1U, srtt); 769 tp->srtt_us = max(1U, srtt);
781} 770}
782 771
783/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
784 * Note: TCP stack does not yet implement pacing.
785 * FQ packet scheduler can be used to implement cheap but effective
786 * TCP pacing, to smooth the burst on large writes when packets
787 * in flight is significantly lower than cwnd (or rwin)
788 */
789int sysctl_tcp_pacing_ss_ratio __read_mostly = 200;
790int sysctl_tcp_pacing_ca_ratio __read_mostly = 120;
791
792static void tcp_update_pacing_rate(struct sock *sk) 772static void tcp_update_pacing_rate(struct sock *sk)
793{ 773{
794 const struct tcp_sock *tp = tcp_sk(sk); 774 const struct tcp_sock *tp = tcp_sk(sk);
@@ -806,21 +786,21 @@ static void tcp_update_pacing_rate(struct sock *sk)
806 * end of slow start and should slow down. 786 * end of slow start and should slow down.
807 */ 787 */
808 if (tp->snd_cwnd < tp->snd_ssthresh / 2) 788 if (tp->snd_cwnd < tp->snd_ssthresh / 2)
809 rate *= sysctl_tcp_pacing_ss_ratio; 789 rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
810 else 790 else
811 rate *= sysctl_tcp_pacing_ca_ratio; 791 rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
812 792
813 rate *= max(tp->snd_cwnd, tp->packets_out); 793 rate *= max(tp->snd_cwnd, tp->packets_out);
814 794
815 if (likely(tp->srtt_us)) 795 if (likely(tp->srtt_us))
816 do_div(rate, tp->srtt_us); 796 do_div(rate, tp->srtt_us);
817 797
818 /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate 798 /* WRITE_ONCE() is needed because sch_fq fetches sk_pacing_rate
819 * without any lock. We want to make sure compiler wont store 799 * without any lock. We want to make sure compiler wont store
820 * intermediate values in this location. 800 * intermediate values in this location.
821 */ 801 */
822 ACCESS_ONCE(sk->sk_pacing_rate) = min_t(u64, rate, 802 WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate,
823 sk->sk_max_pacing_rate); 803 sk->sk_max_pacing_rate));
824} 804}
825 805
826/* Calculate rto without backoff. This is the second half of Van Jacobson's 806/* Calculate rto without backoff. This is the second half of Van Jacobson's
@@ -862,60 +842,46 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
862 return min_t(__u32, cwnd, tp->snd_cwnd_clamp); 842 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
863} 843}
864 844
865/*
866 * Packet counting of FACK is based on in-order assumptions, therefore TCP
867 * disables it when reordering is detected
868 */
869void tcp_disable_fack(struct tcp_sock *tp)
870{
871 /* RFC3517 uses different metric in lost marker => reset on change */
872 if (tcp_is_fack(tp))
873 tp->lost_skb_hint = NULL;
874 tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
875}
876
877/* Take a notice that peer is sending D-SACKs */ 845/* Take a notice that peer is sending D-SACKs */
878static void tcp_dsack_seen(struct tcp_sock *tp) 846static void tcp_dsack_seen(struct tcp_sock *tp)
879{ 847{
880 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; 848 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
849 tp->rack.dsack_seen = 1;
881} 850}
882 851
883static void tcp_update_reordering(struct sock *sk, const int metric, 852/* It's reordering when higher sequence was delivered (i.e. sacked) before
884 const int ts) 853 * some lower never-retransmitted sequence ("low_seq"). The maximum reordering
854 * distance is approximated in full-mss packet distance ("reordering").
855 */
856static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
857 const int ts)
885{ 858{
886 struct tcp_sock *tp = tcp_sk(sk); 859 struct tcp_sock *tp = tcp_sk(sk);
887 int mib_idx; 860 const u32 mss = tp->mss_cache;
861 u32 fack, metric;
888 862
889 if (WARN_ON_ONCE(metric < 0)) 863 fack = tcp_highest_sack_seq(tp);
864 if (!before(low_seq, fack))
890 return; 865 return;
891 866
892 if (metric > tp->reordering) { 867 metric = fack - low_seq;
893 tp->reordering = min(sysctl_tcp_max_reordering, metric); 868 if ((metric > tp->reordering * mss) && mss) {
894
895#if FASTRETRANS_DEBUG > 1 869#if FASTRETRANS_DEBUG > 1
896 pr_debug("Disorder%d %d %u f%u s%u rr%d\n", 870 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
897 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, 871 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
898 tp->reordering, 872 tp->reordering,
899 tp->fackets_out, 873 0,
900 tp->sacked_out, 874 tp->sacked_out,
901 tp->undo_marker ? tp->undo_retrans : 0); 875 tp->undo_marker ? tp->undo_retrans : 0);
902#endif 876#endif
903 tcp_disable_fack(tp); 877 tp->reordering = min_t(u32, (metric + mss - 1) / mss,
878 sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
904 } 879 }
905 880
906 tp->rack.reord = 1; 881 tp->rack.reord = 1;
907
908 /* This exciting event is worth to be remembered. 8) */ 882 /* This exciting event is worth to be remembered. 8) */
909 if (ts) 883 NET_INC_STATS(sock_net(sk),
910 mib_idx = LINUX_MIB_TCPTSREORDER; 884 ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
911 else if (tcp_is_reno(tp))
912 mib_idx = LINUX_MIB_TCPRENOREORDER;
913 else if (tcp_is_fack(tp))
914 mib_idx = LINUX_MIB_TCPFACKREORDER;
915 else
916 mib_idx = LINUX_MIB_TCPSACKREORDER;
917
918 NET_INC_STATS(sock_net(sk), mib_idx);
919} 885}
920 886
921/* This must be called before lost_out is incremented */ 887/* This must be called before lost_out is incremented */
@@ -989,7 +955,6 @@ void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
989 * 3. Loss detection event of two flavors: 955 * 3. Loss detection event of two flavors:
990 * A. Scoreboard estimator decided the packet is lost. 956 * A. Scoreboard estimator decided the packet is lost.
991 * A'. Reno "three dupacks" marks head of queue lost. 957 * A'. Reno "three dupacks" marks head of queue lost.
992 * A''. Its FACK modification, head until snd.fack is lost.
993 * B. SACK arrives sacking SND.NXT at the moment, when the 958 * B. SACK arrives sacking SND.NXT at the moment, when the
994 * segment was retransmitted. 959 * segment was retransmitted.
995 * 4. D-SACK added new rule: D-SACK changes any tag to S. 960 * 4. D-SACK added new rule: D-SACK changes any tag to S.
@@ -1132,8 +1097,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1132} 1097}
1133 1098
1134struct tcp_sacktag_state { 1099struct tcp_sacktag_state {
1135 int reord; 1100 u32 reord;
1136 int fack_count;
1137 /* Timestamps for earliest and latest never-retransmitted segment 1101 /* Timestamps for earliest and latest never-retransmitted segment
1138 * that was SACKed. RTO needs the earliest RTT to stay conservative, 1102 * that was SACKed. RTO needs the earliest RTT to stay conservative,
1139 * but congestion control should still get an accurate delay signal. 1103 * but congestion control should still get an accurate delay signal.
@@ -1142,6 +1106,7 @@ struct tcp_sacktag_state {
1142 u64 last_sackt; 1106 u64 last_sackt;
1143 struct rate_sample *rate; 1107 struct rate_sample *rate;
1144 int flag; 1108 int flag;
1109 unsigned int mss_now;
1145}; 1110};
1146 1111
1147/* Check if skb is fully within the SACK block. In presence of GSO skbs, 1112/* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@ -1191,7 +1156,8 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1191 if (pkt_len >= skb->len && !in_sack) 1156 if (pkt_len >= skb->len && !in_sack)
1192 return 0; 1157 return 0;
1193 1158
1194 err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC); 1159 err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
1160 pkt_len, mss, GFP_ATOMIC);
1195 if (err < 0) 1161 if (err < 0)
1196 return err; 1162 return err;
1197 } 1163 }
@@ -1207,15 +1173,15 @@ static u8 tcp_sacktag_one(struct sock *sk,
1207 u64 xmit_time) 1173 u64 xmit_time)
1208{ 1174{
1209 struct tcp_sock *tp = tcp_sk(sk); 1175 struct tcp_sock *tp = tcp_sk(sk);
1210 int fack_count = state->fack_count;
1211 1176
1212 /* Account D-SACK for retransmitted packet. */ 1177 /* Account D-SACK for retransmitted packet. */
1213 if (dup_sack && (sacked & TCPCB_RETRANS)) { 1178 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1214 if (tp->undo_marker && tp->undo_retrans > 0 && 1179 if (tp->undo_marker && tp->undo_retrans > 0 &&
1215 after(end_seq, tp->undo_marker)) 1180 after(end_seq, tp->undo_marker))
1216 tp->undo_retrans--; 1181 tp->undo_retrans--;
1217 if (sacked & TCPCB_SACKED_ACKED) 1182 if ((sacked & TCPCB_SACKED_ACKED) &&
1218 state->reord = min(fack_count, state->reord); 1183 before(start_seq, state->reord))
1184 state->reord = start_seq;
1219 } 1185 }
1220 1186
1221 /* Nothing to do; acked frame is about to be dropped (was ACKed). */ 1187 /* Nothing to do; acked frame is about to be dropped (was ACKed). */
@@ -1241,9 +1207,10 @@ static u8 tcp_sacktag_one(struct sock *sk,
1241 * which was in hole. It is reordering. 1207 * which was in hole. It is reordering.
1242 */ 1208 */
1243 if (before(start_seq, 1209 if (before(start_seq,
1244 tcp_highest_sack_seq(tp))) 1210 tcp_highest_sack_seq(tp)) &&
1245 state->reord = min(fack_count, 1211 before(start_seq, state->reord))
1246 state->reord); 1212 state->reord = start_seq;
1213
1247 if (!after(end_seq, tp->high_seq)) 1214 if (!after(end_seq, tp->high_seq))
1248 state->flag |= FLAG_ORIG_SACK_ACKED; 1215 state->flag |= FLAG_ORIG_SACK_ACKED;
1249 if (state->first_sackt == 0) 1216 if (state->first_sackt == 0)
@@ -1262,15 +1229,10 @@ static u8 tcp_sacktag_one(struct sock *sk,
1262 tp->sacked_out += pcount; 1229 tp->sacked_out += pcount;
1263 tp->delivered += pcount; /* Out-of-order packets delivered */ 1230 tp->delivered += pcount; /* Out-of-order packets delivered */
1264 1231
1265 fack_count += pcount;
1266
1267 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ 1232 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
1268 if (!tcp_is_fack(tp) && tp->lost_skb_hint && 1233 if (tp->lost_skb_hint &&
1269 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) 1234 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1270 tp->lost_cnt_hint += pcount; 1235 tp->lost_cnt_hint += pcount;
1271
1272 if (fack_count > tp->fackets_out)
1273 tp->fackets_out = fack_count;
1274 } 1236 }
1275 1237
1276 /* D-SACK. We can detect redundant retransmission in S|R and plain R 1238 /* D-SACK. We can detect redundant retransmission in S|R and plain R
@@ -1288,13 +1250,13 @@ static u8 tcp_sacktag_one(struct sock *sk,
1288/* Shift newly-SACKed bytes from this skb to the immediately previous 1250/* Shift newly-SACKed bytes from this skb to the immediately previous
1289 * already-SACKed sk_buff. Mark the newly-SACKed bytes as such. 1251 * already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
1290 */ 1252 */
1291static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, 1253static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
1254 struct sk_buff *skb,
1292 struct tcp_sacktag_state *state, 1255 struct tcp_sacktag_state *state,
1293 unsigned int pcount, int shifted, int mss, 1256 unsigned int pcount, int shifted, int mss,
1294 bool dup_sack) 1257 bool dup_sack)
1295{ 1258{
1296 struct tcp_sock *tp = tcp_sk(sk); 1259 struct tcp_sock *tp = tcp_sk(sk);
1297 struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
1298 u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */ 1260 u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */
1299 u32 end_seq = start_seq + shifted; /* end of newly-SACKed */ 1261 u32 end_seq = start_seq + shifted; /* end of newly-SACKed */
1300 1262
@@ -1363,8 +1325,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1363 if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp)) 1325 if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
1364 TCP_SKB_CB(prev)->tx.delivered_mstamp = 0; 1326 TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
1365 1327
1366 tcp_unlink_write_queue(skb, sk); 1328 tcp_rtx_queue_unlink_and_free(skb, sk);
1367 sk_wmem_free_skb(sk, skb);
1368 1329
1369 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED); 1330 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
1370 1331
@@ -1414,9 +1375,9 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1414 goto fallback; 1375 goto fallback;
1415 1376
1416 /* Can only happen with delayed DSACK + discard craziness */ 1377 /* Can only happen with delayed DSACK + discard craziness */
1417 if (unlikely(skb == tcp_write_queue_head(sk))) 1378 prev = skb_rb_prev(skb);
1379 if (!prev)
1418 goto fallback; 1380 goto fallback;
1419 prev = tcp_write_queue_prev(sk, skb);
1420 1381
1421 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) 1382 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1422 goto fallback; 1383 goto fallback;
@@ -1495,18 +1456,17 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1495 1456
1496 if (!skb_shift(prev, skb, len)) 1457 if (!skb_shift(prev, skb, len))
1497 goto fallback; 1458 goto fallback;
1498 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) 1459 if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
1499 goto out; 1460 goto out;
1500 1461
1501 /* Hole filled allows collapsing with the next as well, this is very 1462 /* Hole filled allows collapsing with the next as well, this is very
1502 * useful when hole on every nth skb pattern happens 1463 * useful when hole on every nth skb pattern happens
1503 */ 1464 */
1504 if (prev == tcp_write_queue_tail(sk)) 1465 skb = skb_rb_next(prev);
1466 if (!skb)
1505 goto out; 1467 goto out;
1506 skb = tcp_write_queue_next(sk, prev);
1507 1468
1508 if (!skb_can_shift(skb) || 1469 if (!skb_can_shift(skb) ||
1509 (skb == tcp_send_head(sk)) ||
1510 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) || 1470 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1511 (mss != tcp_skb_seglen(skb))) 1471 (mss != tcp_skb_seglen(skb)))
1512 goto out; 1472 goto out;
@@ -1514,11 +1474,11 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1514 len = skb->len; 1474 len = skb->len;
1515 if (skb_shift(prev, skb, len)) { 1475 if (skb_shift(prev, skb, len)) {
1516 pcount += tcp_skb_pcount(skb); 1476 pcount += tcp_skb_pcount(skb);
1517 tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0); 1477 tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb),
1478 len, mss, 0);
1518 } 1479 }
1519 1480
1520out: 1481out:
1521 state->fack_count += pcount;
1522 return prev; 1482 return prev;
1523 1483
1524noop: 1484noop:
@@ -1538,13 +1498,10 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1538 struct tcp_sock *tp = tcp_sk(sk); 1498 struct tcp_sock *tp = tcp_sk(sk);
1539 struct sk_buff *tmp; 1499 struct sk_buff *tmp;
1540 1500
1541 tcp_for_write_queue_from(skb, sk) { 1501 skb_rbtree_walk_from(skb) {
1542 int in_sack = 0; 1502 int in_sack = 0;
1543 bool dup_sack = dup_sack_in; 1503 bool dup_sack = dup_sack_in;
1544 1504
1545 if (skb == tcp_send_head(sk))
1546 break;
1547
1548 /* queue is in-order => we can short-circuit the walk early */ 1505 /* queue is in-order => we can short-circuit the walk early */
1549 if (!before(TCP_SKB_CB(skb)->seq, end_seq)) 1506 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1550 break; 1507 break;
@@ -1593,34 +1550,48 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1593 tcp_skb_pcount(skb), 1550 tcp_skb_pcount(skb),
1594 skb->skb_mstamp); 1551 skb->skb_mstamp);
1595 tcp_rate_skb_delivered(sk, skb, state->rate); 1552 tcp_rate_skb_delivered(sk, skb, state->rate);
1553 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1554 list_del_init(&skb->tcp_tsorted_anchor);
1596 1555
1597 if (!before(TCP_SKB_CB(skb)->seq, 1556 if (!before(TCP_SKB_CB(skb)->seq,
1598 tcp_highest_sack_seq(tp))) 1557 tcp_highest_sack_seq(tp)))
1599 tcp_advance_highest_sack(sk, skb); 1558 tcp_advance_highest_sack(sk, skb);
1600 } 1559 }
1601
1602 state->fack_count += tcp_skb_pcount(skb);
1603 } 1560 }
1604 return skb; 1561 return skb;
1605} 1562}
1606 1563
1607/* Avoid all extra work that is being done by sacktag while walking in 1564static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk,
1608 * a normal way 1565 struct tcp_sacktag_state *state,
1609 */ 1566 u32 seq)
1567{
1568 struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
1569 struct sk_buff *skb;
1570
1571 while (*p) {
1572 parent = *p;
1573 skb = rb_to_skb(parent);
1574 if (before(seq, TCP_SKB_CB(skb)->seq)) {
1575 p = &parent->rb_left;
1576 continue;
1577 }
1578 if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
1579 p = &parent->rb_right;
1580 continue;
1581 }
1582 return skb;
1583 }
1584 return NULL;
1585}
1586
1610static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, 1587static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1611 struct tcp_sacktag_state *state, 1588 struct tcp_sacktag_state *state,
1612 u32 skip_to_seq) 1589 u32 skip_to_seq)
1613{ 1590{
1614 tcp_for_write_queue_from(skb, sk) { 1591 if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
1615 if (skb == tcp_send_head(sk)) 1592 return skb;
1616 break;
1617
1618 if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
1619 break;
1620 1593
1621 state->fack_count += tcp_skb_pcount(skb); 1594 return tcp_sacktag_bsearch(sk, state, skip_to_seq);
1622 }
1623 return skb;
1624} 1595}
1625 1596
1626static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, 1597static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
@@ -1665,13 +1636,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1665 int first_sack_index; 1636 int first_sack_index;
1666 1637
1667 state->flag = 0; 1638 state->flag = 0;
1668 state->reord = tp->packets_out; 1639 state->reord = tp->snd_nxt;
1669 1640
1670 if (!tp->sacked_out) { 1641 if (!tp->sacked_out)
1671 if (WARN_ON(tp->fackets_out))
1672 tp->fackets_out = 0;
1673 tcp_highest_sack_reset(sk); 1642 tcp_highest_sack_reset(sk);
1674 }
1675 1643
1676 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, 1644 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1677 num_sacks, prior_snd_una); 1645 num_sacks, prior_snd_una);
@@ -1742,8 +1710,8 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1742 } 1710 }
1743 } 1711 }
1744 1712
1745 skb = tcp_write_queue_head(sk); 1713 state->mss_now = tcp_current_mss(sk);
1746 state->fack_count = 0; 1714 skb = NULL;
1747 i = 0; 1715 i = 0;
1748 1716
1749 if (!tp->sacked_out) { 1717 if (!tp->sacked_out) {
@@ -1800,7 +1768,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1800 skb = tcp_highest_sack(sk); 1768 skb = tcp_highest_sack(sk);
1801 if (!skb) 1769 if (!skb)
1802 break; 1770 break;
1803 state->fack_count = tp->fackets_out;
1804 cache++; 1771 cache++;
1805 goto walk; 1772 goto walk;
1806 } 1773 }
@@ -1815,7 +1782,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1815 skb = tcp_highest_sack(sk); 1782 skb = tcp_highest_sack(sk);
1816 if (!skb) 1783 if (!skb)
1817 break; 1784 break;
1818 state->fack_count = tp->fackets_out;
1819 } 1785 }
1820 skb = tcp_sacktag_skip(skb, sk, state, start_seq); 1786 skb = tcp_sacktag_skip(skb, sk, state, start_seq);
1821 1787
@@ -1835,9 +1801,8 @@ advance_sp:
1835 for (j = 0; j < used_sacks; j++) 1801 for (j = 0; j < used_sacks; j++)
1836 tp->recv_sack_cache[i++] = sp[j]; 1802 tp->recv_sack_cache[i++] = sp[j];
1837 1803
1838 if ((state->reord < tp->fackets_out) && 1804 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
1839 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) 1805 tcp_check_sack_reordering(sk, state->reord, 0);
1840 tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
1841 1806
1842 tcp_verify_left_out(tp); 1807 tcp_verify_left_out(tp);
1843out: 1808out:
@@ -1875,8 +1840,13 @@ static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
1875static void tcp_check_reno_reordering(struct sock *sk, const int addend) 1840static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1876{ 1841{
1877 struct tcp_sock *tp = tcp_sk(sk); 1842 struct tcp_sock *tp = tcp_sk(sk);
1878 if (tcp_limit_reno_sacked(tp)) 1843
1879 tcp_update_reordering(sk, tp->packets_out + addend, 0); 1844 if (!tcp_limit_reno_sacked(tp))
1845 return;
1846
1847 tp->reordering = min_t(u32, tp->packets_out + addend,
1848 sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
1849 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
1880} 1850}
1881 1851
1882/* Emulate SACKs for SACKless connection: account for a new dupack. */ 1852/* Emulate SACKs for SACKless connection: account for a new dupack. */
@@ -1922,7 +1892,6 @@ void tcp_clear_retrans(struct tcp_sock *tp)
1922 tp->lost_out = 0; 1892 tp->lost_out = 0;
1923 tp->undo_marker = 0; 1893 tp->undo_marker = 0;
1924 tp->undo_retrans = -1; 1894 tp->undo_retrans = -1;
1925 tp->fackets_out = 0;
1926 tp->sacked_out = 0; 1895 tp->sacked_out = 0;
1927} 1896}
1928 1897
@@ -1967,19 +1936,15 @@ void tcp_enter_loss(struct sock *sk)
1967 if (tcp_is_reno(tp)) 1936 if (tcp_is_reno(tp))
1968 tcp_reset_reno_sack(tp); 1937 tcp_reset_reno_sack(tp);
1969 1938
1970 skb = tcp_write_queue_head(sk); 1939 skb = tcp_rtx_queue_head(sk);
1971 is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED); 1940 is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
1972 if (is_reneg) { 1941 if (is_reneg) {
1973 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); 1942 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
1974 tp->sacked_out = 0; 1943 tp->sacked_out = 0;
1975 tp->fackets_out = 0;
1976 } 1944 }
1977 tcp_clear_all_retrans_hints(tp); 1945 tcp_clear_all_retrans_hints(tp);
1978 1946
1979 tcp_for_write_queue(skb, sk) { 1947 skb_rbtree_walk_from(skb) {
1980 if (skb == tcp_send_head(sk))
1981 break;
1982
1983 mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || 1948 mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
1984 is_reneg); 1949 is_reneg);
1985 if (mark_lost) 1950 if (mark_lost)
@@ -2013,7 +1978,7 @@ void tcp_enter_loss(struct sock *sk)
2013 * falsely raise the receive window, which results in repeated 1978 * falsely raise the receive window, which results in repeated
2014 * timeouts and stop-and-go behavior. 1979 * timeouts and stop-and-go behavior.
2015 */ 1980 */
2016 tp->frto = sysctl_tcp_frto && 1981 tp->frto = net->ipv4.sysctl_tcp_frto &&
2017 (new_recovery || icsk->icsk_retransmits) && 1982 (new_recovery || icsk->icsk_retransmits) &&
2018 !inet_csk(sk)->icsk_mtup.probe_size; 1983 !inet_csk(sk)->icsk_mtup.probe_size;
2019} 1984}
@@ -2042,19 +2007,10 @@ static bool tcp_check_sack_reneging(struct sock *sk, int flag)
2042 return false; 2007 return false;
2043} 2008}
2044 2009
2045static inline int tcp_fackets_out(const struct tcp_sock *tp)
2046{
2047 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
2048}
2049
2050/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs 2010/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
2051 * counter when SACK is enabled (without SACK, sacked_out is used for 2011 * counter when SACK is enabled (without SACK, sacked_out is used for
2052 * that purpose). 2012 * that purpose).
2053 * 2013 *
2054 * Instead, with FACK TCP uses fackets_out that includes both SACKed
2055 * segments up to the highest received SACK block so far and holes in
2056 * between them.
2057 *
2058 * With reordering, holes may still be in flight, so RFC3517 recovery 2014 * With reordering, holes may still be in flight, so RFC3517 recovery
2059 * uses pure sacked_out (total number of SACKed segments) even though 2015 * uses pure sacked_out (total number of SACKed segments) even though
2060 * it violates the RFC that uses duplicate ACKs, often these are equal 2016 * it violates the RFC that uses duplicate ACKs, often these are equal
@@ -2064,10 +2020,10 @@ static inline int tcp_fackets_out(const struct tcp_sock *tp)
2064 */ 2020 */
2065static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) 2021static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2066{ 2022{
2067 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; 2023 return tp->sacked_out + 1;
2068} 2024}
2069 2025
2070/* Linux NewReno/SACK/FACK/ECN state machine. 2026/* Linux NewReno/SACK/ECN state machine.
2071 * -------------------------------------- 2027 * --------------------------------------
2072 * 2028 *
2073 * "Open" Normal state, no dubious events, fast path. 2029 * "Open" Normal state, no dubious events, fast path.
@@ -2132,16 +2088,6 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2132 * dynamically measured and adjusted. This is implemented in 2088 * dynamically measured and adjusted. This is implemented in
2133 * tcp_rack_mark_lost. 2089 * tcp_rack_mark_lost.
2134 * 2090 *
2135 * FACK (Disabled by default. Subsumbed by RACK):
2136 * It is the simplest heuristics. As soon as we decided
2137 * that something is lost, we decide that _all_ not SACKed
2138 * packets until the most forward SACK are lost. I.e.
2139 * lost_out = fackets_out - sacked_out and left_out = fackets_out.
2140 * It is absolutely correct estimate, if network does not reorder
2141 * packets. And it loses any connection to reality when reordering
2142 * takes place. We use FACK by default until reordering
2143 * is suspected on the path to this destination.
2144 *
2145 * If the receiver does not support SACK: 2091 * If the receiver does not support SACK:
2146 * 2092 *
2147 * NewReno (RFC6582): in Recovery we assume that one segment 2093 * NewReno (RFC6582): in Recovery we assume that one segment
@@ -2190,7 +2136,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
2190} 2136}
2191 2137
2192/* Detect loss in event "A" above by marking head of queue up as lost. 2138/* Detect loss in event "A" above by marking head of queue up as lost.
2193 * For FACK or non-SACK(Reno) senders, the first "packets" number of segments 2139 * For non-SACK(Reno) senders, the first "packets" number of segments
2194 * are considered lost. For RFC3517 SACK, a segment is considered lost if it 2140 * are considered lost. For RFC3517 SACK, a segment is considered lost if it
2195 * has at least tp->reordering SACKed seqments above it; "packets" refers to 2141 * has at least tp->reordering SACKed seqments above it; "packets" refers to
2196 * the maximum SACKed segments to pass before reaching this limit. 2142 * the maximum SACKed segments to pass before reaching this limit.
@@ -2205,20 +2151,18 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2205 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq; 2151 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
2206 2152
2207 WARN_ON(packets > tp->packets_out); 2153 WARN_ON(packets > tp->packets_out);
2208 if (tp->lost_skb_hint) { 2154 skb = tp->lost_skb_hint;
2209 skb = tp->lost_skb_hint; 2155 if (skb) {
2210 cnt = tp->lost_cnt_hint;
2211 /* Head already handled? */ 2156 /* Head already handled? */
2212 if (mark_head && skb != tcp_write_queue_head(sk)) 2157 if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
2213 return; 2158 return;
2159 cnt = tp->lost_cnt_hint;
2214 } else { 2160 } else {
2215 skb = tcp_write_queue_head(sk); 2161 skb = tcp_rtx_queue_head(sk);
2216 cnt = 0; 2162 cnt = 0;
2217 } 2163 }
2218 2164
2219 tcp_for_write_queue_from(skb, sk) { 2165 skb_rbtree_walk_from(skb) {
2220 if (skb == tcp_send_head(sk))
2221 break;
2222 /* TODO: do this better */ 2166 /* TODO: do this better */
2223 /* this is not the most efficient way to do this... */ 2167 /* this is not the most efficient way to do this... */
2224 tp->lost_skb_hint = skb; 2168 tp->lost_skb_hint = skb;
@@ -2228,12 +2172,12 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2228 break; 2172 break;
2229 2173
2230 oldcnt = cnt; 2174 oldcnt = cnt;
2231 if (tcp_is_fack(tp) || tcp_is_reno(tp) || 2175 if (tcp_is_reno(tp) ||
2232 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) 2176 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2233 cnt += tcp_skb_pcount(skb); 2177 cnt += tcp_skb_pcount(skb);
2234 2178
2235 if (cnt > packets) { 2179 if (cnt > packets) {
2236 if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || 2180 if (tcp_is_sack(tp) ||
2237 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || 2181 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
2238 (oldcnt >= packets)) 2182 (oldcnt >= packets))
2239 break; 2183 break;
@@ -2242,7 +2186,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2242 /* If needed, chop off the prefix to mark as lost. */ 2186 /* If needed, chop off the prefix to mark as lost. */
2243 lost = (packets - oldcnt) * mss; 2187 lost = (packets - oldcnt) * mss;
2244 if (lost < skb->len && 2188 if (lost < skb->len &&
2245 tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0) 2189 tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
2190 lost, mss, GFP_ATOMIC) < 0)
2246 break; 2191 break;
2247 cnt = packets; 2192 cnt = packets;
2248 } 2193 }
@@ -2263,11 +2208,6 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2263 2208
2264 if (tcp_is_reno(tp)) { 2209 if (tcp_is_reno(tp)) {
2265 tcp_mark_head_lost(sk, 1, 1); 2210 tcp_mark_head_lost(sk, 1, 1);
2266 } else if (tcp_is_fack(tp)) {
2267 int lost = tp->fackets_out - tp->reordering;
2268 if (lost <= 0)
2269 lost = 1;
2270 tcp_mark_head_lost(sk, lost, 0);
2271 } else { 2211 } else {
2272 int sacked_upto = tp->sacked_out - tp->reordering; 2212 int sacked_upto = tp->sacked_out - tp->reordering;
2273 if (sacked_upto >= 0) 2213 if (sacked_upto >= 0)
@@ -2326,16 +2266,16 @@ static bool tcp_any_retrans_done(const struct sock *sk)
2326 if (tp->retrans_out) 2266 if (tp->retrans_out)
2327 return true; 2267 return true;
2328 2268
2329 skb = tcp_write_queue_head(sk); 2269 skb = tcp_rtx_queue_head(sk);
2330 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS)) 2270 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2331 return true; 2271 return true;
2332 2272
2333 return false; 2273 return false;
2334} 2274}
2335 2275
2336#if FASTRETRANS_DEBUG > 1
2337static void DBGUNDO(struct sock *sk, const char *msg) 2276static void DBGUNDO(struct sock *sk, const char *msg)
2338{ 2277{
2278#if FASTRETRANS_DEBUG > 1
2339 struct tcp_sock *tp = tcp_sk(sk); 2279 struct tcp_sock *tp = tcp_sk(sk);
2340 struct inet_sock *inet = inet_sk(sk); 2280 struct inet_sock *inet = inet_sk(sk);
2341 2281
@@ -2357,10 +2297,8 @@ static void DBGUNDO(struct sock *sk, const char *msg)
2357 tp->packets_out); 2297 tp->packets_out);
2358 } 2298 }
2359#endif 2299#endif
2360}
2361#else
2362#define DBGUNDO(x...) do { } while (0)
2363#endif 2300#endif
2301}
2364 2302
2365static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) 2303static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2366{ 2304{
@@ -2369,9 +2307,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2369 if (unmark_loss) { 2307 if (unmark_loss) {
2370 struct sk_buff *skb; 2308 struct sk_buff *skb;
2371 2309
2372 tcp_for_write_queue(skb, sk) { 2310 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2373 if (skb == tcp_send_head(sk))
2374 break;
2375 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; 2311 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2376 } 2312 }
2377 tp->lost_out = 0; 2313 tp->lost_out = 0;
@@ -2416,6 +2352,8 @@ static bool tcp_try_undo_recovery(struct sock *sk)
2416 mib_idx = LINUX_MIB_TCPFULLUNDO; 2352 mib_idx = LINUX_MIB_TCPFULLUNDO;
2417 2353
2418 NET_INC_STATS(sock_net(sk), mib_idx); 2354 NET_INC_STATS(sock_net(sk), mib_idx);
2355 } else if (tp->rack.reo_wnd_persist) {
2356 tp->rack.reo_wnd_persist--;
2419 } 2357 }
2420 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { 2358 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2421 /* Hold old state until something *above* high_seq 2359 /* Hold old state until something *above* high_seq
@@ -2435,6 +2373,8 @@ static bool tcp_try_undo_dsack(struct sock *sk)
2435 struct tcp_sock *tp = tcp_sk(sk); 2373 struct tcp_sock *tp = tcp_sk(sk);
2436 2374
2437 if (tp->undo_marker && !tp->undo_retrans) { 2375 if (tp->undo_marker && !tp->undo_retrans) {
2376 tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
2377 tp->rack.reo_wnd_persist + 1);
2438 DBGUNDO(sk, "D-SACK"); 2378 DBGUNDO(sk, "D-SACK");
2439 tcp_undo_cwnd_reduction(sk, false); 2379 tcp_undo_cwnd_reduction(sk, false);
2440 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); 2380 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
@@ -2614,11 +2554,8 @@ void tcp_simple_retransmit(struct sock *sk)
2614 struct tcp_sock *tp = tcp_sk(sk); 2554 struct tcp_sock *tp = tcp_sk(sk);
2615 struct sk_buff *skb; 2555 struct sk_buff *skb;
2616 unsigned int mss = tcp_current_mss(sk); 2556 unsigned int mss = tcp_current_mss(sk);
2617 u32 prior_lost = tp->lost_out;
2618 2557
2619 tcp_for_write_queue(skb, sk) { 2558 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2620 if (skb == tcp_send_head(sk))
2621 break;
2622 if (tcp_skb_seglen(skb) > mss && 2559 if (tcp_skb_seglen(skb) > mss &&
2623 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { 2560 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2624 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { 2561 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
@@ -2631,7 +2568,7 @@ void tcp_simple_retransmit(struct sock *sk)
2631 2568
2632 tcp_clear_retrans_hints_partial(tp); 2569 tcp_clear_retrans_hints_partial(tp);
2633 2570
2634 if (prior_lost == tp->lost_out) 2571 if (!tp->lost_out)
2635 return; 2572 return;
2636 2573
2637 if (tcp_is_reno(tp)) 2574 if (tcp_is_reno(tp))
@@ -2712,7 +2649,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
2712 * is updated in tcp_ack()). Otherwise fall back to 2649 * is updated in tcp_ack()). Otherwise fall back to
2713 * the conventional recovery. 2650 * the conventional recovery.
2714 */ 2651 */
2715 if (tcp_send_head(sk) && 2652 if (!tcp_write_queue_empty(sk) &&
2716 after(tcp_wnd_end(tp), tp->snd_nxt)) { 2653 after(tcp_wnd_end(tp), tp->snd_nxt)) {
2717 *rexmit = REXMIT_NEW; 2654 *rexmit = REXMIT_NEW;
2718 return; 2655 return;
@@ -2739,15 +2676,15 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
2739} 2676}
2740 2677
2741/* Undo during fast recovery after partial ACK. */ 2678/* Undo during fast recovery after partial ACK. */
2742static bool tcp_try_undo_partial(struct sock *sk, const int acked) 2679static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
2743{ 2680{
2744 struct tcp_sock *tp = tcp_sk(sk); 2681 struct tcp_sock *tp = tcp_sk(sk);
2745 2682
2746 if (tp->undo_marker && tcp_packet_delayed(tp)) { 2683 if (tp->undo_marker && tcp_packet_delayed(tp)) {
2747 /* Plain luck! Hole if filled with delayed 2684 /* Plain luck! Hole if filled with delayed
2748 * packet, rather than with a retransmit. 2685 * packet, rather than with a retransmit. Check reordering.
2749 */ 2686 */
2750 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); 2687 tcp_check_sack_reordering(sk, prior_snd_una, 1);
2751 2688
2752 /* We are getting evidence that the reordering degree is higher 2689 /* We are getting evidence that the reordering degree is higher
2753 * than we realized. If there are no retransmits out then we 2690 * than we realized. If there are no retransmits out then we
@@ -2774,7 +2711,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
2774 struct tcp_sock *tp = tcp_sk(sk); 2711 struct tcp_sock *tp = tcp_sk(sk);
2775 2712
2776 /* Use RACK to detect loss */ 2713 /* Use RACK to detect loss */
2777 if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) { 2714 if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
2778 u32 prior_retrans = tp->retrans_out; 2715 u32 prior_retrans = tp->retrans_out;
2779 2716
2780 tcp_rack_mark_lost(sk); 2717 tcp_rack_mark_lost(sk);
@@ -2783,6 +2720,14 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
2783 } 2720 }
2784} 2721}
2785 2722
2723static bool tcp_force_fast_retransmit(struct sock *sk)
2724{
2725 struct tcp_sock *tp = tcp_sk(sk);
2726
2727 return after(tcp_highest_sack_seq(tp),
2728 tp->snd_una + tp->reordering * tp->mss_cache);
2729}
2730
2786/* Process an event, which can update packets-in-flight not trivially. 2731/* Process an event, which can update packets-in-flight not trivially.
2787 * Main goal of this function is to calculate new estimate for left_out, 2732 * Main goal of this function is to calculate new estimate for left_out,
2788 * taking into account both packets sitting in receiver's buffer and 2733 * taking into account both packets sitting in receiver's buffer and
@@ -2795,19 +2740,17 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
2795 * It does _not_ decide what to send, it is made in function 2740 * It does _not_ decide what to send, it is made in function
2796 * tcp_xmit_retransmit_queue(). 2741 * tcp_xmit_retransmit_queue().
2797 */ 2742 */
2798static void tcp_fastretrans_alert(struct sock *sk, const int acked, 2743static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
2799 bool is_dupack, int *ack_flag, int *rexmit) 2744 bool is_dupack, int *ack_flag, int *rexmit)
2800{ 2745{
2801 struct inet_connection_sock *icsk = inet_csk(sk); 2746 struct inet_connection_sock *icsk = inet_csk(sk);
2802 struct tcp_sock *tp = tcp_sk(sk); 2747 struct tcp_sock *tp = tcp_sk(sk);
2803 int fast_rexmit = 0, flag = *ack_flag; 2748 int fast_rexmit = 0, flag = *ack_flag;
2804 bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && 2749 bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2805 (tcp_fackets_out(tp) > tp->reordering)); 2750 tcp_force_fast_retransmit(sk));
2806 2751
2807 if (WARN_ON(!tp->packets_out && tp->sacked_out)) 2752 if (!tp->packets_out && tp->sacked_out)
2808 tp->sacked_out = 0; 2753 tp->sacked_out = 0;
2809 if (WARN_ON(!tp->sacked_out && tp->fackets_out))
2810 tp->fackets_out = 0;
2811 2754
2812 /* Now state machine starts. 2755 /* Now state machine starts.
2813 * A. ECE, hence prohibit cwnd undoing, the reduction is required. */ 2756 * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
@@ -2854,11 +2797,11 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2854 if (tcp_is_reno(tp) && is_dupack) 2797 if (tcp_is_reno(tp) && is_dupack)
2855 tcp_add_reno_sack(sk); 2798 tcp_add_reno_sack(sk);
2856 } else { 2799 } else {
2857 if (tcp_try_undo_partial(sk, acked)) 2800 if (tcp_try_undo_partial(sk, prior_snd_una))
2858 return; 2801 return;
2859 /* Partial ACK arrived. Force fast retransmit. */ 2802 /* Partial ACK arrived. Force fast retransmit. */
2860 do_lost = tcp_is_reno(tp) || 2803 do_lost = tcp_is_reno(tp) ||
2861 tcp_fackets_out(tp) > tp->reordering; 2804 tcp_force_fast_retransmit(sk);
2862 } 2805 }
2863 if (tcp_try_undo_dsack(sk)) { 2806 if (tcp_try_undo_dsack(sk)) {
2864 tcp_try_keep_open(sk); 2807 tcp_try_keep_open(sk);
@@ -2873,6 +2816,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2873 (*ack_flag & FLAG_LOST_RETRANS))) 2816 (*ack_flag & FLAG_LOST_RETRANS)))
2874 return; 2817 return;
2875 /* Change state if cwnd is undone or retransmits are lost */ 2818 /* Change state if cwnd is undone or retransmits are lost */
2819 /* fall through */
2876 default: 2820 default:
2877 if (tcp_is_reno(tp)) { 2821 if (tcp_is_reno(tp)) {
2878 if (flag & FLAG_SND_UNA_ADVANCED) 2822 if (flag & FLAG_SND_UNA_ADVANCED)
@@ -2913,8 +2857,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2913 2857
2914static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us) 2858static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
2915{ 2859{
2860 u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
2916 struct tcp_sock *tp = tcp_sk(sk); 2861 struct tcp_sock *tp = tcp_sk(sk);
2917 u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
2918 2862
2919 minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32, 2863 minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
2920 rtt_us ? : jiffies_to_usecs(1)); 2864 rtt_us ? : jiffies_to_usecs(1));
@@ -3020,7 +2964,7 @@ void tcp_rearm_rto(struct sock *sk)
3020/* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */ 2964/* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */
3021static void tcp_set_xmit_timer(struct sock *sk) 2965static void tcp_set_xmit_timer(struct sock *sk)
3022{ 2966{
3023 if (!tcp_schedule_loss_probe(sk)) 2967 if (!tcp_schedule_loss_probe(sk, true))
3024 tcp_rearm_rto(sk); 2968 tcp_rearm_rto(sk);
3025} 2969}
3026 2970
@@ -3056,28 +3000,31 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
3056 3000
3057 shinfo = skb_shinfo(skb); 3001 shinfo = skb_shinfo(skb);
3058 if (!before(shinfo->tskey, prior_snd_una) && 3002 if (!before(shinfo->tskey, prior_snd_una) &&
3059 before(shinfo->tskey, tcp_sk(sk)->snd_una)) 3003 before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
3060 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); 3004 tcp_skb_tsorted_save(skb) {
3005 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
3006 } tcp_skb_tsorted_restore(skb);
3007 }
3061} 3008}
3062 3009
3063/* Remove acknowledged frames from the retransmission queue. If our packet 3010/* Remove acknowledged frames from the retransmission queue. If our packet
3064 * is before the ack sequence we can discard it as it's confirmed to have 3011 * is before the ack sequence we can discard it as it's confirmed to have
3065 * arrived at the other end. 3012 * arrived at the other end.
3066 */ 3013 */
3067static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, 3014static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
3068 u32 prior_snd_una, int *acked, 3015 u32 prior_snd_una,
3069 struct tcp_sacktag_state *sack) 3016 struct tcp_sacktag_state *sack)
3070{ 3017{
3071 const struct inet_connection_sock *icsk = inet_csk(sk); 3018 const struct inet_connection_sock *icsk = inet_csk(sk);
3072 u64 first_ackt, last_ackt; 3019 u64 first_ackt, last_ackt;
3073 struct tcp_sock *tp = tcp_sk(sk); 3020 struct tcp_sock *tp = tcp_sk(sk);
3074 u32 prior_sacked = tp->sacked_out; 3021 u32 prior_sacked = tp->sacked_out;
3075 u32 reord = tp->packets_out; 3022 u32 reord = tp->snd_nxt; /* lowest acked un-retx un-sacked seq */
3023 struct sk_buff *skb, *next;
3076 bool fully_acked = true; 3024 bool fully_acked = true;
3077 long sack_rtt_us = -1L; 3025 long sack_rtt_us = -1L;
3078 long seq_rtt_us = -1L; 3026 long seq_rtt_us = -1L;
3079 long ca_rtt_us = -1L; 3027 long ca_rtt_us = -1L;
3080 struct sk_buff *skb;
3081 u32 pkts_acked = 0; 3028 u32 pkts_acked = 0;
3082 u32 last_in_flight = 0; 3029 u32 last_in_flight = 0;
3083 bool rtt_update; 3030 bool rtt_update;
@@ -3085,8 +3032,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3085 3032
3086 first_ackt = 0; 3033 first_ackt = 0;
3087 3034
3088 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { 3035 for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
3089 struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 3036 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3037 const u32 start_seq = scb->seq;
3090 u8 sacked = scb->sacked; 3038 u8 sacked = scb->sacked;
3091 u32 acked_pcount; 3039 u32 acked_pcount;
3092 3040
@@ -3103,8 +3051,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3103 break; 3051 break;
3104 fully_acked = false; 3052 fully_acked = false;
3105 } else { 3053 } else {
3106 /* Speedup tcp_unlink_write_queue() and next loop */
3107 prefetchw(skb->next);
3108 acked_pcount = tcp_skb_pcount(skb); 3054 acked_pcount = tcp_skb_pcount(skb);
3109 } 3055 }
3110 3056
@@ -3119,7 +3065,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3119 first_ackt = last_ackt; 3065 first_ackt = last_ackt;
3120 3066
3121 last_in_flight = TCP_SKB_CB(skb)->tx.in_flight; 3067 last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
3122 reord = min(pkts_acked, reord); 3068 if (before(start_seq, reord))
3069 reord = start_seq;
3123 if (!after(scb->end_seq, tp->high_seq)) 3070 if (!after(scb->end_seq, tp->high_seq))
3124 flag |= FLAG_ORIG_SACK_ACKED; 3071 flag |= FLAG_ORIG_SACK_ACKED;
3125 } 3072 }
@@ -3156,12 +3103,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3156 if (!fully_acked) 3103 if (!fully_acked)
3157 break; 3104 break;
3158 3105
3159 tcp_unlink_write_queue(skb, sk); 3106 next = skb_rb_next(skb);
3160 sk_wmem_free_skb(sk, skb);
3161 if (unlikely(skb == tp->retransmit_skb_hint)) 3107 if (unlikely(skb == tp->retransmit_skb_hint))
3162 tp->retransmit_skb_hint = NULL; 3108 tp->retransmit_skb_hint = NULL;
3163 if (unlikely(skb == tp->lost_skb_hint)) 3109 if (unlikely(skb == tp->lost_skb_hint))
3164 tp->lost_skb_hint = NULL; 3110 tp->lost_skb_hint = NULL;
3111 tcp_rtx_queue_unlink_and_free(skb, sk);
3165 } 3112 }
3166 3113
3167 if (!skb) 3114 if (!skb)
@@ -3197,16 +3144,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3197 int delta; 3144 int delta;
3198 3145
3199 /* Non-retransmitted hole got filled? That's reordering */ 3146 /* Non-retransmitted hole got filled? That's reordering */
3200 if (reord < prior_fackets && reord <= tp->fackets_out) 3147 if (before(reord, prior_fack))
3201 tcp_update_reordering(sk, tp->fackets_out - reord, 0); 3148 tcp_check_sack_reordering(sk, reord, 0);
3202 3149
3203 delta = tcp_is_fack(tp) ? pkts_acked : 3150 delta = prior_sacked - tp->sacked_out;
3204 prior_sacked - tp->sacked_out;
3205 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta); 3151 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3206 } 3152 }
3207
3208 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
3209
3210 } else if (skb && rtt_update && sack_rtt_us >= 0 && 3153 } else if (skb && rtt_update && sack_rtt_us >= 0 &&
3211 sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) { 3154 sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) {
3212 /* Do not re-arm RTO if the sack RTT is measured from data sent 3155 /* Do not re-arm RTO if the sack RTT is measured from data sent
@@ -3247,18 +3190,19 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3247 } 3190 }
3248 } 3191 }
3249#endif 3192#endif
3250 *acked = pkts_acked;
3251 return flag; 3193 return flag;
3252} 3194}
3253 3195
3254static void tcp_ack_probe(struct sock *sk) 3196static void tcp_ack_probe(struct sock *sk)
3255{ 3197{
3256 const struct tcp_sock *tp = tcp_sk(sk);
3257 struct inet_connection_sock *icsk = inet_csk(sk); 3198 struct inet_connection_sock *icsk = inet_csk(sk);
3199 struct sk_buff *head = tcp_send_head(sk);
3200 const struct tcp_sock *tp = tcp_sk(sk);
3258 3201
3259 /* Was it a usable window open? */ 3202 /* Was it a usable window open? */
3260 3203 if (!head)
3261 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) { 3204 return;
3205 if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
3262 icsk->icsk_backoff = 0; 3206 icsk->icsk_backoff = 0;
3263 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0); 3207 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3264 /* Socket must be waked up by subsequent tcp_data_snd_check(). 3208 /* Socket must be waked up by subsequent tcp_data_snd_check().
@@ -3378,7 +3322,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
3378 tp->pred_flags = 0; 3322 tp->pred_flags = 0;
3379 tcp_fast_path_check(sk); 3323 tcp_fast_path_check(sk);
3380 3324
3381 if (tcp_send_head(sk)) 3325 if (!tcp_write_queue_empty(sk))
3382 tcp_slow_start_after_idle_check(sk); 3326 tcp_slow_start_after_idle_check(sk);
3383 3327
3384 if (nwin > tp->max_window) { 3328 if (nwin > tp->max_window) {
@@ -3399,7 +3343,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
3399 if (*last_oow_ack_time) { 3343 if (*last_oow_ack_time) {
3400 s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); 3344 s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
3401 3345
3402 if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { 3346 if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
3403 NET_INC_STATS(net, mib_idx); 3347 NET_INC_STATS(net, mib_idx);
3404 return true; /* rate-limited: don't send yet! */ 3348 return true; /* rate-limited: don't send yet! */
3405 } 3349 }
@@ -3435,10 +3379,11 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
3435 static u32 challenge_timestamp; 3379 static u32 challenge_timestamp;
3436 static unsigned int challenge_count; 3380 static unsigned int challenge_count;
3437 struct tcp_sock *tp = tcp_sk(sk); 3381 struct tcp_sock *tp = tcp_sk(sk);
3382 struct net *net = sock_net(sk);
3438 u32 count, now; 3383 u32 count, now;
3439 3384
3440 /* First check our per-socket dupack rate limit. */ 3385 /* First check our per-socket dupack rate limit. */
3441 if (__tcp_oow_rate_limited(sock_net(sk), 3386 if (__tcp_oow_rate_limited(net,
3442 LINUX_MIB_TCPACKSKIPPEDCHALLENGE, 3387 LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
3443 &tp->last_oow_ack_time)) 3388 &tp->last_oow_ack_time))
3444 return; 3389 return;
@@ -3446,16 +3391,16 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
3446 /* Then check host-wide RFC 5961 rate limit. */ 3391 /* Then check host-wide RFC 5961 rate limit. */
3447 now = jiffies / HZ; 3392 now = jiffies / HZ;
3448 if (now != challenge_timestamp) { 3393 if (now != challenge_timestamp) {
3449 u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1; 3394 u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
3395 u32 half = (ack_limit + 1) >> 1;
3450 3396
3451 challenge_timestamp = now; 3397 challenge_timestamp = now;
3452 WRITE_ONCE(challenge_count, half + 3398 WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
3453 prandom_u32_max(sysctl_tcp_challenge_ack_limit));
3454 } 3399 }
3455 count = READ_ONCE(challenge_count); 3400 count = READ_ONCE(challenge_count);
3456 if (count > 0) { 3401 if (count > 0) {
3457 WRITE_ONCE(challenge_count, count - 1); 3402 WRITE_ONCE(challenge_count, count - 1);
3458 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); 3403 NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
3459 tcp_send_ack(sk); 3404 tcp_send_ack(sk);
3460 } 3405 }
3461} 3406}
@@ -3553,18 +3498,17 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3553 u32 ack_seq = TCP_SKB_CB(skb)->seq; 3498 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3554 u32 ack = TCP_SKB_CB(skb)->ack_seq; 3499 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3555 bool is_dupack = false; 3500 bool is_dupack = false;
3556 u32 prior_fackets;
3557 int prior_packets = tp->packets_out; 3501 int prior_packets = tp->packets_out;
3558 u32 delivered = tp->delivered; 3502 u32 delivered = tp->delivered;
3559 u32 lost = tp->lost; 3503 u32 lost = tp->lost;
3560 int acked = 0; /* Number of packets newly acked */
3561 int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */ 3504 int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
3505 u32 prior_fack;
3562 3506
3563 sack_state.first_sackt = 0; 3507 sack_state.first_sackt = 0;
3564 sack_state.rate = &rs; 3508 sack_state.rate = &rs;
3565 3509
3566 /* We very likely will need to access write queue head. */ 3510 /* We very likely will need to access rtx queue. */
3567 prefetchw(sk->sk_write_queue.next); 3511 prefetch(sk->tcp_rtx_queue.rb_node);
3568 3512
3569 /* If the ack is older than previous acks 3513 /* If the ack is older than previous acks
3570 * then we can probably ignore it. 3514 * then we can probably ignore it.
@@ -3590,7 +3534,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3590 icsk->icsk_retransmits = 0; 3534 icsk->icsk_retransmits = 0;
3591 } 3535 }
3592 3536
3593 prior_fackets = tp->fackets_out; 3537 prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
3594 rs.prior_in_flight = tcp_packets_in_flight(tp); 3538 rs.prior_in_flight = tcp_packets_in_flight(tp);
3595 3539
3596 /* ts_recent update must be made after we are sure that the packet 3540 /* ts_recent update must be made after we are sure that the packet
@@ -3646,8 +3590,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3646 goto no_queue; 3590 goto no_queue;
3647 3591
3648 /* See if we can take anything off of the retransmit queue. */ 3592 /* See if we can take anything off of the retransmit queue. */
3649 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked, 3593 flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state);
3650 &sack_state); 3594
3595 tcp_rack_update_reo_wnd(sk, &rs);
3651 3596
3652 if (tp->tlp_high_seq) 3597 if (tp->tlp_high_seq)
3653 tcp_process_tlp_ack(sk, ack, flag); 3598 tcp_process_tlp_ack(sk, ack, flag);
@@ -3657,7 +3602,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3657 3602
3658 if (tcp_ack_is_dubious(sk, flag)) { 3603 if (tcp_ack_is_dubious(sk, flag)) {
3659 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 3604 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3660 tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); 3605 tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
3606 &rexmit);
3661 } 3607 }
3662 3608
3663 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) 3609 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
@@ -3673,13 +3619,13 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3673no_queue: 3619no_queue:
3674 /* If data was DSACKed, see if we can undo a cwnd reduction. */ 3620 /* If data was DSACKed, see if we can undo a cwnd reduction. */
3675 if (flag & FLAG_DSACKING_ACK) 3621 if (flag & FLAG_DSACKING_ACK)
3676 tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); 3622 tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
3623 &rexmit);
3677 /* If this ack opens up a zero window, clear backoff. It was 3624 /* If this ack opens up a zero window, clear backoff. It was
3678 * being used to time the probes, and is probably far higher than 3625 * being used to time the probes, and is probably far higher than
3679 * it needs to be for normal retransmission. 3626 * it needs to be for normal retransmission.
3680 */ 3627 */
3681 if (tcp_send_head(sk)) 3628 tcp_ack_probe(sk);
3682 tcp_ack_probe(sk);
3683 3629
3684 if (tp->tlp_high_seq) 3630 if (tp->tlp_high_seq)
3685 tcp_process_tlp_ack(sk, ack, flag); 3631 tcp_process_tlp_ack(sk, ack, flag);
@@ -3696,7 +3642,8 @@ old_ack:
3696 if (TCP_SKB_CB(skb)->sacked) { 3642 if (TCP_SKB_CB(skb)->sacked) {
3697 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, 3643 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3698 &sack_state); 3644 &sack_state);
3699 tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); 3645 tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
3646 &rexmit);
3700 tcp_xmit_recovery(sk, rexmit); 3647 tcp_xmit_recovery(sk, rexmit);
3701 } 3648 }
3702 3649
@@ -3721,6 +3668,21 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
3721 foc->exp = exp_opt; 3668 foc->exp = exp_opt;
3722} 3669}
3723 3670
3671static void smc_parse_options(const struct tcphdr *th,
3672 struct tcp_options_received *opt_rx,
3673 const unsigned char *ptr,
3674 int opsize)
3675{
3676#if IS_ENABLED(CONFIG_SMC)
3677 if (static_branch_unlikely(&tcp_have_smc)) {
3678 if (th->syn && !(opsize & 1) &&
3679 opsize >= TCPOLEN_EXP_SMC_BASE &&
3680 get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
3681 opt_rx->smc_ok = 1;
3682 }
3683#endif
3684}
3685
3724/* Look for tcp options. Normally only called on SYN and SYNACK packets. 3686/* Look for tcp options. Normally only called on SYN and SYNACK packets.
3725 * But, this can also be called on packets in the established flow when 3687 * But, this can also be called on packets in the established flow when
3726 * the fast version below fails. 3688 * the fast version below fails.
@@ -3828,6 +3790,9 @@ void tcp_parse_options(const struct net *net,
3828 tcp_parse_fastopen_option(opsize - 3790 tcp_parse_fastopen_option(opsize -
3829 TCPOLEN_EXP_FASTOPEN_BASE, 3791 TCPOLEN_EXP_FASTOPEN_BASE,
3830 ptr + 2, th->syn, foc, true); 3792 ptr + 2, th->syn, foc, true);
3793 else
3794 smc_parse_options(th, opt_rx, ptr,
3795 opsize);
3831 break; 3796 break;
3832 3797
3833 } 3798 }
@@ -3995,6 +3960,8 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
3995/* When we get a reset we do this. */ 3960/* When we get a reset we do this. */
3996void tcp_reset(struct sock *sk) 3961void tcp_reset(struct sock *sk)
3997{ 3962{
3963 trace_tcp_receive_reset(sk);
3964
3998 /* We want the right error as BSD sees it (and indeed as we do). */ 3965 /* We want the right error as BSD sees it (and indeed as we do). */
3999 switch (sk->sk_state) { 3966 switch (sk->sk_state) {
4000 case TCP_SYN_SENT: 3967 case TCP_SYN_SENT:
@@ -4117,7 +4084,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4117{ 4084{
4118 struct tcp_sock *tp = tcp_sk(sk); 4085 struct tcp_sock *tp = tcp_sk(sk);
4119 4086
4120 if (tcp_is_sack(tp) && sysctl_tcp_dsack) { 4087 if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
4121 int mib_idx; 4088 int mib_idx;
4122 4089
4123 if (before(seq, tp->rcv_nxt)) 4090 if (before(seq, tp->rcv_nxt))
@@ -4152,7 +4119,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
4152 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); 4119 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4153 tcp_enter_quickack_mode(sk); 4120 tcp_enter_quickack_mode(sk);
4154 4121
4155 if (tcp_is_sack(tp) && sysctl_tcp_dsack) { 4122 if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
4156 u32 end_seq = TCP_SKB_CB(skb)->end_seq; 4123 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4157 4124
4158 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) 4125 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@ -4268,11 +4235,6 @@ static void tcp_sack_remove(struct tcp_sock *tp)
4268 tp->rx_opt.num_sacks = num_sacks; 4235 tp->rx_opt.num_sacks = num_sacks;
4269} 4236}
4270 4237
4271enum tcp_queue {
4272 OOO_QUEUE,
4273 RCV_QUEUE,
4274};
4275
4276/** 4238/**
4277 * tcp_try_coalesce - try to merge skb to prior one 4239 * tcp_try_coalesce - try to merge skb to prior one
4278 * @sk: socket 4240 * @sk: socket
@@ -4288,7 +4250,6 @@ enum tcp_queue {
4288 * Returns true if caller should free @from instead of queueing it 4250 * Returns true if caller should free @from instead of queueing it
4289 */ 4251 */
4290static bool tcp_try_coalesce(struct sock *sk, 4252static bool tcp_try_coalesce(struct sock *sk,
4291 enum tcp_queue dest,
4292 struct sk_buff *to, 4253 struct sk_buff *to,
4293 struct sk_buff *from, 4254 struct sk_buff *from,
4294 bool *fragstolen) 4255 bool *fragstolen)
@@ -4313,10 +4274,7 @@ static bool tcp_try_coalesce(struct sock *sk,
4313 4274
4314 if (TCP_SKB_CB(from)->has_rxtstamp) { 4275 if (TCP_SKB_CB(from)->has_rxtstamp) {
4315 TCP_SKB_CB(to)->has_rxtstamp = true; 4276 TCP_SKB_CB(to)->has_rxtstamp = true;
4316 if (dest == OOO_QUEUE) 4277 to->tstamp = from->tstamp;
4317 TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp;
4318 else
4319 to->tstamp = from->tstamp;
4320 } 4278 }
4321 4279
4322 return true; 4280 return true;
@@ -4341,7 +4299,7 @@ static void tcp_ofo_queue(struct sock *sk)
4341 4299
4342 p = rb_first(&tp->out_of_order_queue); 4300 p = rb_first(&tp->out_of_order_queue);
4343 while (p) { 4301 while (p) {
4344 skb = rb_entry(p, struct sk_buff, rbnode); 4302 skb = rb_to_skb(p);
4345 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) 4303 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4346 break; 4304 break;
4347 4305
@@ -4353,9 +4311,6 @@ static void tcp_ofo_queue(struct sock *sk)
4353 } 4311 }
4354 p = rb_next(p); 4312 p = rb_next(p);
4355 rb_erase(&skb->rbnode, &tp->out_of_order_queue); 4313 rb_erase(&skb->rbnode, &tp->out_of_order_queue);
4356 /* Replace tstamp which was stomped by rbnode */
4357 if (TCP_SKB_CB(skb)->has_rxtstamp)
4358 skb->tstamp = TCP_SKB_CB(skb)->swtstamp;
4359 4314
4360 if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { 4315 if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
4361 SOCK_DEBUG(sk, "ofo packet was already received\n"); 4316 SOCK_DEBUG(sk, "ofo packet was already received\n");
@@ -4367,8 +4322,7 @@ static void tcp_ofo_queue(struct sock *sk)
4367 TCP_SKB_CB(skb)->end_seq); 4322 TCP_SKB_CB(skb)->end_seq);
4368 4323
4369 tail = skb_peek_tail(&sk->sk_receive_queue); 4324 tail = skb_peek_tail(&sk->sk_receive_queue);
4370 eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE, 4325 eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
4371 tail, skb, &fragstolen);
4372 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); 4326 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
4373 fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; 4327 fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
4374 if (!eaten) 4328 if (!eaten)
@@ -4409,7 +4363,7 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
4409static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) 4363static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4410{ 4364{
4411 struct tcp_sock *tp = tcp_sk(sk); 4365 struct tcp_sock *tp = tcp_sk(sk);
4412 struct rb_node **p, *q, *parent; 4366 struct rb_node **p, *parent;
4413 struct sk_buff *skb1; 4367 struct sk_buff *skb1;
4414 u32 seq, end_seq; 4368 u32 seq, end_seq;
4415 bool fragstolen; 4369 bool fragstolen;
@@ -4422,10 +4376,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4422 return; 4376 return;
4423 } 4377 }
4424 4378
4425 /* Stash tstamp to avoid being stomped on by rbnode */
4426 if (TCP_SKB_CB(skb)->has_rxtstamp)
4427 TCP_SKB_CB(skb)->swtstamp = skb->tstamp;
4428
4429 /* Disable header prediction. */ 4379 /* Disable header prediction. */
4430 tp->pred_flags = 0; 4380 tp->pred_flags = 0;
4431 inet_csk_schedule_ack(sk); 4381 inet_csk_schedule_ack(sk);
@@ -4453,7 +4403,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4453 /* In the typical case, we are adding an skb to the end of the list. 4403 /* In the typical case, we are adding an skb to the end of the list.
4454 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. 4404 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
4455 */ 4405 */
4456 if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb, 4406 if (tcp_try_coalesce(sk, tp->ooo_last_skb,
4457 skb, &fragstolen)) { 4407 skb, &fragstolen)) {
4458coalesce_done: 4408coalesce_done:
4459 tcp_grow_window(sk, skb); 4409 tcp_grow_window(sk, skb);
@@ -4472,7 +4422,7 @@ coalesce_done:
4472 parent = NULL; 4422 parent = NULL;
4473 while (*p) { 4423 while (*p) {
4474 parent = *p; 4424 parent = *p;
4475 skb1 = rb_entry(parent, struct sk_buff, rbnode); 4425 skb1 = rb_to_skb(parent);
4476 if (before(seq, TCP_SKB_CB(skb1)->seq)) { 4426 if (before(seq, TCP_SKB_CB(skb1)->seq)) {
4477 p = &parent->rb_left; 4427 p = &parent->rb_left;
4478 continue; 4428 continue;
@@ -4504,7 +4454,7 @@ coalesce_done:
4504 __kfree_skb(skb1); 4454 __kfree_skb(skb1);
4505 goto merge_right; 4455 goto merge_right;
4506 } 4456 }
4507 } else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1, 4457 } else if (tcp_try_coalesce(sk, skb1,
4508 skb, &fragstolen)) { 4458 skb, &fragstolen)) {
4509 goto coalesce_done; 4459 goto coalesce_done;
4510 } 4460 }
@@ -4517,9 +4467,7 @@ insert:
4517 4467
4518merge_right: 4468merge_right:
4519 /* Remove other segments covered by skb. */ 4469 /* Remove other segments covered by skb. */
4520 while ((q = rb_next(&skb->rbnode)) != NULL) { 4470 while ((skb1 = skb_rb_next(skb)) != NULL) {
4521 skb1 = rb_entry(q, struct sk_buff, rbnode);
4522
4523 if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) 4471 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4524 break; 4472 break;
4525 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { 4473 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
@@ -4534,7 +4482,7 @@ merge_right:
4534 tcp_drop(sk, skb1); 4482 tcp_drop(sk, skb1);
4535 } 4483 }
4536 /* If there is no skb after us, we are the last_skb ! */ 4484 /* If there is no skb after us, we are the last_skb ! */
4537 if (!q) 4485 if (!skb1)
4538 tp->ooo_last_skb = skb; 4486 tp->ooo_last_skb = skb;
4539 4487
4540add_sack: 4488add_sack:
@@ -4556,7 +4504,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
4556 4504
4557 __skb_pull(skb, hdrlen); 4505 __skb_pull(skb, hdrlen);
4558 eaten = (tail && 4506 eaten = (tail &&
4559 tcp_try_coalesce(sk, RCV_QUEUE, tail, 4507 tcp_try_coalesce(sk, tail,
4560 skb, fragstolen)) ? 1 : 0; 4508 skb, fragstolen)) ? 1 : 0;
4561 tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); 4509 tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
4562 if (!eaten) { 4510 if (!eaten) {
@@ -4720,7 +4668,7 @@ static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *li
4720 if (list) 4668 if (list)
4721 return !skb_queue_is_last(list, skb) ? skb->next : NULL; 4669 return !skb_queue_is_last(list, skb) ? skb->next : NULL;
4722 4670
4723 return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode); 4671 return skb_rb_next(skb);
4724} 4672}
4725 4673
4726static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, 4674static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
@@ -4741,7 +4689,7 @@ static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4741} 4689}
4742 4690
4743/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */ 4691/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
4744static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb) 4692void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
4745{ 4693{
4746 struct rb_node **p = &root->rb_node; 4694 struct rb_node **p = &root->rb_node;
4747 struct rb_node *parent = NULL; 4695 struct rb_node *parent = NULL;
@@ -4749,7 +4697,7 @@ static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
4749 4697
4750 while (*p) { 4698 while (*p) {
4751 parent = *p; 4699 parent = *p;
4752 skb1 = rb_entry(parent, struct sk_buff, rbnode); 4700 skb1 = rb_to_skb(parent);
4753 if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq)) 4701 if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
4754 p = &parent->rb_left; 4702 p = &parent->rb_left;
4755 else 4703 else
@@ -4796,7 +4744,7 @@ restart:
4796 * overlaps to the next one. 4744 * overlaps to the next one.
4797 */ 4745 */
4798 if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) && 4746 if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
4799 (tcp_win_from_space(skb->truesize) > skb->len || 4747 (tcp_win_from_space(sk, skb->truesize) > skb->len ||
4800 before(TCP_SKB_CB(skb)->seq, start))) { 4748 before(TCP_SKB_CB(skb)->seq, start))) {
4801 end_of_skbs = false; 4749 end_of_skbs = false;
4802 break; 4750 break;
@@ -4868,26 +4816,19 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
4868{ 4816{
4869 struct tcp_sock *tp = tcp_sk(sk); 4817 struct tcp_sock *tp = tcp_sk(sk);
4870 struct sk_buff *skb, *head; 4818 struct sk_buff *skb, *head;
4871 struct rb_node *p;
4872 u32 start, end; 4819 u32 start, end;
4873 4820
4874 p = rb_first(&tp->out_of_order_queue); 4821 skb = skb_rb_first(&tp->out_of_order_queue);
4875 skb = rb_entry_safe(p, struct sk_buff, rbnode);
4876new_range: 4822new_range:
4877 if (!skb) { 4823 if (!skb) {
4878 p = rb_last(&tp->out_of_order_queue); 4824 tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
4879 /* Note: This is possible p is NULL here. We do not
4880 * use rb_entry_safe(), as ooo_last_skb is valid only
4881 * if rbtree is not empty.
4882 */
4883 tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode);
4884 return; 4825 return;
4885 } 4826 }
4886 start = TCP_SKB_CB(skb)->seq; 4827 start = TCP_SKB_CB(skb)->seq;
4887 end = TCP_SKB_CB(skb)->end_seq; 4828 end = TCP_SKB_CB(skb)->end_seq;
4888 4829
4889 for (head = skb;;) { 4830 for (head = skb;;) {
4890 skb = tcp_skb_next(skb, NULL); 4831 skb = skb_rb_next(skb);
4891 4832
4892 /* Range is terminated when we see a gap or when 4833 /* Range is terminated when we see a gap or when
4893 * we are at the queue end. 4834 * we are at the queue end.
@@ -4930,14 +4871,14 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
4930 do { 4871 do {
4931 prev = rb_prev(node); 4872 prev = rb_prev(node);
4932 rb_erase(node, &tp->out_of_order_queue); 4873 rb_erase(node, &tp->out_of_order_queue);
4933 tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode)); 4874 tcp_drop(sk, rb_to_skb(node));
4934 sk_mem_reclaim(sk); 4875 sk_mem_reclaim(sk);
4935 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 4876 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
4936 !tcp_under_memory_pressure(sk)) 4877 !tcp_under_memory_pressure(sk))
4937 break; 4878 break;
4938 node = prev; 4879 node = prev;
4939 } while (node); 4880 } while (node);
4940 tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode); 4881 tp->ooo_last_skb = rb_to_skb(prev);
4941 4882
4942 /* Reset SACK state. A conforming SACK implementation will 4883 /* Reset SACK state. A conforming SACK implementation will
4943 * do the same at a timeout based retransmit. When a connection 4884 * do the same at a timeout based retransmit. When a connection
@@ -5112,7 +5053,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
5112 struct tcp_sock *tp = tcp_sk(sk); 5053 struct tcp_sock *tp = tcp_sk(sk);
5113 u32 ptr = ntohs(th->urg_ptr); 5054 u32 ptr = ntohs(th->urg_ptr);
5114 5055
5115 if (ptr && !sysctl_tcp_stdurg) 5056 if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
5116 ptr--; 5057 ptr--;
5117 ptr += ntohl(th->seq); 5058 ptr += ntohl(th->seq);
5118 5059
@@ -5532,20 +5473,13 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5532 security_inet_conn_established(sk, skb); 5473 security_inet_conn_established(sk, skb);
5533 } 5474 }
5534 5475
5535 /* Make sure socket is routed, for correct metrics. */ 5476 tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
5536 icsk->icsk_af_ops->rebuild_header(sk);
5537
5538 tcp_init_metrics(sk);
5539 tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
5540 tcp_init_congestion_control(sk);
5541 5477
5542 /* Prevent spurious tcp_cwnd_restart() on first data 5478 /* Prevent spurious tcp_cwnd_restart() on first data
5543 * packet. 5479 * packet.
5544 */ 5480 */
5545 tp->lsndtime = tcp_jiffies32; 5481 tp->lsndtime = tcp_jiffies32;
5546 5482
5547 tcp_init_buffer_space(sk);
5548
5549 if (sock_flag(sk, SOCK_KEEPOPEN)) 5483 if (sock_flag(sk, SOCK_KEEPOPEN))
5550 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); 5484 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5551 5485
@@ -5559,7 +5493,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5559 struct tcp_fastopen_cookie *cookie) 5493 struct tcp_fastopen_cookie *cookie)
5560{ 5494{
5561 struct tcp_sock *tp = tcp_sk(sk); 5495 struct tcp_sock *tp = tcp_sk(sk);
5562 struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL; 5496 struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
5563 u16 mss = tp->rx_opt.mss_clamp, try_exp = 0; 5497 u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
5564 bool syn_drop = false; 5498 bool syn_drop = false;
5565 5499
@@ -5594,9 +5528,8 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5594 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp); 5528 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
5595 5529
5596 if (data) { /* Retransmit unacked data in SYN */ 5530 if (data) { /* Retransmit unacked data in SYN */
5597 tcp_for_write_queue_from(data, sk) { 5531 skb_rbtree_walk_from(data) {
5598 if (data == tcp_send_head(sk) || 5532 if (__tcp_retransmit_skb(sk, data, 1))
5599 __tcp_retransmit_skb(sk, data, 1))
5600 break; 5533 break;
5601 } 5534 }
5602 tcp_rearm_rto(sk); 5535 tcp_rearm_rto(sk);
@@ -5614,6 +5547,16 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5614 return false; 5547 return false;
5615} 5548}
5616 5549
5550static void smc_check_reset_syn(struct tcp_sock *tp)
5551{
5552#if IS_ENABLED(CONFIG_SMC)
5553 if (static_branch_unlikely(&tcp_have_smc)) {
5554 if (tp->syn_smc && !tp->rx_opt.smc_ok)
5555 tp->syn_smc = 0;
5556 }
5557#endif
5558}
5559
5617static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, 5560static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5618 const struct tcphdr *th) 5561 const struct tcphdr *th)
5619{ 5562{
@@ -5709,10 +5652,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5709 tp->tcp_header_len = sizeof(struct tcphdr); 5652 tp->tcp_header_len = sizeof(struct tcphdr);
5710 } 5653 }
5711 5654
5712 if (tcp_is_sack(tp) && sysctl_tcp_fack)
5713 tcp_enable_fack(tp);
5714
5715 tcp_mtup_init(sk);
5716 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); 5655 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5717 tcp_initialize_rcv_mss(sk); 5656 tcp_initialize_rcv_mss(sk);
5718 5657
@@ -5721,6 +5660,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5721 * is initialized. */ 5660 * is initialized. */
5722 tp->copied_seq = tp->rcv_nxt; 5661 tp->copied_seq = tp->rcv_nxt;
5723 5662
5663 smc_check_reset_syn(tp);
5664
5724 smp_mb(); 5665 smp_mb();
5725 5666
5726 tcp_finish_connect(sk, skb); 5667 tcp_finish_connect(sk, skb);
@@ -5938,15 +5879,18 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
5938 if (req) { 5879 if (req) {
5939 inet_csk(sk)->icsk_retransmits = 0; 5880 inet_csk(sk)->icsk_retransmits = 0;
5940 reqsk_fastopen_remove(sk, req, false); 5881 reqsk_fastopen_remove(sk, req, false);
5882 /* Re-arm the timer because data may have been sent out.
5883 * This is similar to the regular data transmission case
5884 * when new data has just been ack'ed.
5885 *
5886 * (TFO) - we could try to be more aggressive and
5887 * retransmitting any data sooner based on when they
5888 * are sent out.
5889 */
5890 tcp_rearm_rto(sk);
5941 } else { 5891 } else {
5942 /* Make sure socket is routed, for correct metrics. */ 5892 tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
5943 icsk->icsk_af_ops->rebuild_header(sk);
5944 tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
5945 tcp_init_congestion_control(sk);
5946
5947 tcp_mtup_init(sk);
5948 tp->copied_seq = tp->rcv_nxt; 5893 tp->copied_seq = tp->rcv_nxt;
5949 tcp_init_buffer_space(sk);
5950 } 5894 }
5951 smp_mb(); 5895 smp_mb();
5952 tcp_set_state(sk, TCP_ESTABLISHED); 5896 tcp_set_state(sk, TCP_ESTABLISHED);
@@ -5966,19 +5910,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
5966 if (tp->rx_opt.tstamp_ok) 5910 if (tp->rx_opt.tstamp_ok)
5967 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; 5911 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5968 5912
5969 if (req) {
5970 /* Re-arm the timer because data may have been sent out.
5971 * This is similar to the regular data transmission case
5972 * when new data has just been ack'ed.
5973 *
5974 * (TFO) - we could try to be more aggressive and
5975 * retransmitting any data sooner based on when they
5976 * are sent out.
5977 */
5978 tcp_rearm_rto(sk);
5979 } else
5980 tcp_init_metrics(sk);
5981
5982 if (!inet_csk(sk)->icsk_ca_ops->cong_control) 5913 if (!inet_csk(sk)->icsk_ca_ops->cong_control)
5983 tcp_update_pacing_rate(sk); 5914 tcp_update_pacing_rate(sk);
5984 5915
@@ -6075,6 +6006,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
6075 case TCP_LAST_ACK: 6006 case TCP_LAST_ACK:
6076 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) 6007 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
6077 break; 6008 break;
6009 /* fall through */
6078 case TCP_FIN_WAIT1: 6010 case TCP_FIN_WAIT1:
6079 case TCP_FIN_WAIT2: 6011 case TCP_FIN_WAIT2:
6080 /* RFC 793 says to queue data in these states, 6012 /* RFC 793 says to queue data in these states,
@@ -6183,6 +6115,9 @@ static void tcp_openreq_init(struct request_sock *req,
6183 ireq->ir_rmt_port = tcp_hdr(skb)->source; 6115 ireq->ir_rmt_port = tcp_hdr(skb)->source;
6184 ireq->ir_num = ntohs(tcp_hdr(skb)->dest); 6116 ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
6185 ireq->ir_mark = inet_request_mark(sk, skb); 6117 ireq->ir_mark = inet_request_mark(sk, skb);
6118#if IS_ENABLED(CONFIG_SMC)
6119 ireq->smc_ok = rx_opt->smc_ok;
6120#endif
6186} 6121}
6187 6122
6188struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, 6123struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
@@ -6195,8 +6130,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
6195 if (req) { 6130 if (req) {
6196 struct inet_request_sock *ireq = inet_rsk(req); 6131 struct inet_request_sock *ireq = inet_rsk(req);
6197 6132
6198 kmemcheck_annotate_bitfield(ireq, flags); 6133 ireq->ireq_opt = NULL;
6199 ireq->opt = NULL;
6200#if IS_ENABLED(CONFIG_IPV6) 6134#if IS_ENABLED(CONFIG_IPV6)
6201 ireq->pktopts = NULL; 6135 ireq->pktopts = NULL;
6202#endif 6136#endif
@@ -6358,7 +6292,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6358 tcp_openreq_init_rwin(req, sk, dst); 6292 tcp_openreq_init_rwin(req, sk, dst);
6359 if (!want_cookie) { 6293 if (!want_cookie) {
6360 tcp_reqsk_record_syn(sk, req, skb); 6294 tcp_reqsk_record_syn(sk, req, skb);
6361 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc); 6295 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
6362 } 6296 }
6363 if (fastopen_sk) { 6297 if (fastopen_sk) {
6364 af_ops->send_synack(fastopen_sk, dst, &fl, req, 6298 af_ops->send_synack(fastopen_sk, dst, &fl, req,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d9416b5162bc..c6bc0c4d19c6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -85,6 +85,8 @@
85#include <crypto/hash.h> 85#include <crypto/hash.h>
86#include <linux/scatterlist.h> 86#include <linux/scatterlist.h>
87 87
88#include <trace/events/tcp.h>
89
88#ifdef CONFIG_TCP_MD5SIG 90#ifdef CONFIG_TCP_MD5SIG
89static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 91static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
90 __be32 daddr, __be32 saddr, const struct tcphdr *th); 92 __be32 daddr, __be32 saddr, const struct tcphdr *th);
@@ -480,7 +482,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
480 TCP_TIMEOUT_INIT; 482 TCP_TIMEOUT_INIT;
481 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); 483 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
482 484
483 skb = tcp_write_queue_head(sk); 485 skb = tcp_rtx_queue_head(sk);
484 BUG_ON(!skb); 486 BUG_ON(!skb);
485 487
486 tcp_mstamp_refresh(tp); 488 tcp_mstamp_refresh(tp);
@@ -701,8 +703,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
701 * routing might fail in this case. No choice here, if we choose to force 703 * routing might fail in this case. No choice here, if we choose to force
702 * input interface, we will misroute in case of asymmetric route. 704 * input interface, we will misroute in case of asymmetric route.
703 */ 705 */
704 if (sk) 706 if (sk) {
705 arg.bound_dev_if = sk->sk_bound_dev_if; 707 arg.bound_dev_if = sk->sk_bound_dev_if;
708 trace_tcp_send_reset(sk, skb);
709 }
706 710
707 BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != 711 BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
708 offsetof(struct inet_timewait_sock, tw_bound_dev_if)); 712 offsetof(struct inet_timewait_sock, tw_bound_dev_if));
@@ -877,7 +881,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
877 881
878 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, 882 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
879 ireq->ir_rmt_addr, 883 ireq->ir_rmt_addr,
880 ireq->opt); 884 ireq_opt_deref(ireq));
881 err = net_xmit_eval(err); 885 err = net_xmit_eval(err);
882 } 886 }
883 887
@@ -889,7 +893,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
889 */ 893 */
890static void tcp_v4_reqsk_destructor(struct request_sock *req) 894static void tcp_v4_reqsk_destructor(struct request_sock *req)
891{ 895{
892 kfree(inet_rsk(req)->opt); 896 kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
893} 897}
894 898
895#ifdef CONFIG_TCP_MD5SIG 899#ifdef CONFIG_TCP_MD5SIG
@@ -1265,10 +1269,11 @@ static void tcp_v4_init_req(struct request_sock *req,
1265 struct sk_buff *skb) 1269 struct sk_buff *skb)
1266{ 1270{
1267 struct inet_request_sock *ireq = inet_rsk(req); 1271 struct inet_request_sock *ireq = inet_rsk(req);
1272 struct net *net = sock_net(sk_listener);
1268 1273
1269 sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); 1274 sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
1270 sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); 1275 sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
1271 ireq->opt = tcp_v4_save_options(sock_net(sk_listener), skb); 1276 RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
1272} 1277}
1273 1278
1274static struct dst_entry *tcp_v4_route_req(const struct sock *sk, 1279static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
@@ -1355,10 +1360,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1355 sk_daddr_set(newsk, ireq->ir_rmt_addr); 1360 sk_daddr_set(newsk, ireq->ir_rmt_addr);
1356 sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); 1361 sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
1357 newsk->sk_bound_dev_if = ireq->ir_iif; 1362 newsk->sk_bound_dev_if = ireq->ir_iif;
1358 newinet->inet_saddr = ireq->ir_loc_addr; 1363 newinet->inet_saddr = ireq->ir_loc_addr;
1359 inet_opt = ireq->opt; 1364 inet_opt = rcu_dereference(ireq->ireq_opt);
1360 rcu_assign_pointer(newinet->inet_opt, inet_opt); 1365 RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
1361 ireq->opt = NULL;
1362 newinet->mc_index = inet_iif(skb); 1366 newinet->mc_index = inet_iif(skb);
1363 newinet->mc_ttl = ip_hdr(skb)->ttl; 1367 newinet->mc_ttl = ip_hdr(skb)->ttl;
1364 newinet->rcv_tos = ip_hdr(skb)->tos; 1368 newinet->rcv_tos = ip_hdr(skb)->tos;
@@ -1403,9 +1407,12 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1403 if (__inet_inherit_port(sk, newsk) < 0) 1407 if (__inet_inherit_port(sk, newsk) < 0)
1404 goto put_and_exit; 1408 goto put_and_exit;
1405 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); 1409 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1406 if (*own_req) 1410 if (likely(*own_req)) {
1407 tcp_move_syn(newtp, req); 1411 tcp_move_syn(newtp, req);
1408 1412 ireq->ireq_opt = NULL;
1413 } else {
1414 newinet->inet_opt = NULL;
1415 }
1409 return newsk; 1416 return newsk;
1410 1417
1411exit_overflow: 1418exit_overflow:
@@ -1416,6 +1423,7 @@ exit:
1416 tcp_listendrop(sk); 1423 tcp_listendrop(sk);
1417 return NULL; 1424 return NULL;
1418put_and_exit: 1425put_and_exit:
1426 newinet->inet_opt = NULL;
1419 inet_csk_prepare_forced_close(newsk); 1427 inet_csk_prepare_forced_close(newsk);
1420 tcp_done(newsk); 1428 tcp_done(newsk);
1421 goto exit; 1429 goto exit;
@@ -1503,23 +1511,23 @@ csum_err:
1503} 1511}
1504EXPORT_SYMBOL(tcp_v4_do_rcv); 1512EXPORT_SYMBOL(tcp_v4_do_rcv);
1505 1513
1506void tcp_v4_early_demux(struct sk_buff *skb) 1514int tcp_v4_early_demux(struct sk_buff *skb)
1507{ 1515{
1508 const struct iphdr *iph; 1516 const struct iphdr *iph;
1509 const struct tcphdr *th; 1517 const struct tcphdr *th;
1510 struct sock *sk; 1518 struct sock *sk;
1511 1519
1512 if (skb->pkt_type != PACKET_HOST) 1520 if (skb->pkt_type != PACKET_HOST)
1513 return; 1521 return 0;
1514 1522
1515 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1523 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1516 return; 1524 return 0;
1517 1525
1518 iph = ip_hdr(skb); 1526 iph = ip_hdr(skb);
1519 th = tcp_hdr(skb); 1527 th = tcp_hdr(skb);
1520 1528
1521 if (th->doff < sizeof(struct tcphdr) / 4) 1529 if (th->doff < sizeof(struct tcphdr) / 4)
1522 return; 1530 return 0;
1523 1531
1524 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, 1532 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1525 iph->saddr, th->source, 1533 iph->saddr, th->source,
@@ -1538,6 +1546,7 @@ void tcp_v4_early_demux(struct sk_buff *skb)
1538 skb_dst_set_noref(skb, dst); 1546 skb_dst_set_noref(skb, dst);
1539 } 1547 }
1540 } 1548 }
1549 return 0;
1541} 1550}
1542 1551
1543bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) 1552bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
@@ -1778,8 +1787,9 @@ do_time_wait:
1778 refcounted = false; 1787 refcounted = false;
1779 goto process; 1788 goto process;
1780 } 1789 }
1781 /* Fall through to ACK */
1782 } 1790 }
1791 /* to ACK */
1792 /* fall through */
1783 case TCP_TW_ACK: 1793 case TCP_TW_ACK:
1784 tcp_v4_timewait_ack(sk, skb); 1794 tcp_v4_timewait_ack(sk, skb);
1785 break; 1795 break;
@@ -1859,6 +1869,8 @@ void tcp_v4_destroy_sock(struct sock *sk)
1859{ 1869{
1860 struct tcp_sock *tp = tcp_sk(sk); 1870 struct tcp_sock *tp = tcp_sk(sk);
1861 1871
1872 trace_tcp_destroy_sock(sk);
1873
1862 tcp_clear_xmit_timers(sk); 1874 tcp_clear_xmit_timers(sk);
1863 1875
1864 tcp_cleanup_congestion_control(sk); 1876 tcp_cleanup_congestion_control(sk);
@@ -1891,6 +1903,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
1891 1903
1892 /* If socket is aborted during connect operation */ 1904 /* If socket is aborted during connect operation */
1893 tcp_free_fastopen_req(tp); 1905 tcp_free_fastopen_req(tp);
1906 tcp_fastopen_destroy_cipher(sk);
1894 tcp_saved_syn_free(tp); 1907 tcp_saved_syn_free(tp);
1895 1908
1896 sk_sockets_allocated_dec(sk); 1909 sk_sockets_allocated_dec(sk);
@@ -2396,8 +2409,8 @@ struct proto tcp_prot = {
2396 .memory_allocated = &tcp_memory_allocated, 2409 .memory_allocated = &tcp_memory_allocated,
2397 .memory_pressure = &tcp_memory_pressure, 2410 .memory_pressure = &tcp_memory_pressure,
2398 .sysctl_mem = sysctl_tcp_mem, 2411 .sysctl_mem = sysctl_tcp_mem,
2399 .sysctl_wmem = sysctl_tcp_wmem, 2412 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2400 .sysctl_rmem = sysctl_tcp_rmem, 2413 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2401 .max_header = MAX_TCP_HEADER, 2414 .max_header = MAX_TCP_HEADER,
2402 .obj_size = sizeof(struct tcp_sock), 2415 .obj_size = sizeof(struct tcp_sock),
2403 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2416 .slab_flags = SLAB_TYPESAFE_BY_RCU,
@@ -2417,6 +2430,8 @@ static void __net_exit tcp_sk_exit(struct net *net)
2417{ 2430{
2418 int cpu; 2431 int cpu;
2419 2432
2433 module_put(net->ipv4.tcp_congestion_control->owner);
2434
2420 for_each_possible_cpu(cpu) 2435 for_each_possible_cpu(cpu)
2421 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); 2436 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2422 free_percpu(net->ipv4.tcp_sk); 2437 free_percpu(net->ipv4.tcp_sk);
@@ -2471,6 +2486,50 @@ static int __net_init tcp_sk_init(struct net *net)
2471 net->ipv4.sysctl_tcp_sack = 1; 2486 net->ipv4.sysctl_tcp_sack = 1;
2472 net->ipv4.sysctl_tcp_window_scaling = 1; 2487 net->ipv4.sysctl_tcp_window_scaling = 1;
2473 net->ipv4.sysctl_tcp_timestamps = 1; 2488 net->ipv4.sysctl_tcp_timestamps = 1;
2489 net->ipv4.sysctl_tcp_early_retrans = 3;
2490 net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
2491 net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */
2492 net->ipv4.sysctl_tcp_retrans_collapse = 1;
2493 net->ipv4.sysctl_tcp_max_reordering = 300;
2494 net->ipv4.sysctl_tcp_dsack = 1;
2495 net->ipv4.sysctl_tcp_app_win = 31;
2496 net->ipv4.sysctl_tcp_adv_win_scale = 1;
2497 net->ipv4.sysctl_tcp_frto = 2;
2498 net->ipv4.sysctl_tcp_moderate_rcvbuf = 1;
2499 /* This limits the percentage of the congestion window which we
2500 * will allow a single TSO frame to consume. Building TSO frames
2501 * which are too large can cause TCP streams to be bursty.
2502 */
2503 net->ipv4.sysctl_tcp_tso_win_divisor = 3;
2504 /* Default TSQ limit of four TSO segments */
2505 net->ipv4.sysctl_tcp_limit_output_bytes = 262144;
2506 /* rfc5961 challenge ack rate limiting */
2507 net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
2508 net->ipv4.sysctl_tcp_min_tso_segs = 2;
2509 net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
2510 net->ipv4.sysctl_tcp_autocorking = 1;
2511 net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
2512 net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
2513 net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
2514 if (net != &init_net) {
2515 memcpy(net->ipv4.sysctl_tcp_rmem,
2516 init_net.ipv4.sysctl_tcp_rmem,
2517 sizeof(init_net.ipv4.sysctl_tcp_rmem));
2518 memcpy(net->ipv4.sysctl_tcp_wmem,
2519 init_net.ipv4.sysctl_tcp_wmem,
2520 sizeof(init_net.ipv4.sysctl_tcp_wmem));
2521 }
2522 net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
2523 spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
2524 net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
2525 atomic_set(&net->ipv4.tfo_active_disable_times, 0);
2526
2527 /* Reno is always built in */
2528 if (!net_eq(net, &init_net) &&
2529 try_module_get(init_net.ipv4.tcp_congestion_control->owner))
2530 net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
2531 else
2532 net->ipv4.tcp_congestion_control = &tcp_reno;
2474 2533
2475 return 0; 2534 return 0;
2476fail: 2535fail:
@@ -2481,7 +2540,12 @@ fail:
2481 2540
2482static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) 2541static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2483{ 2542{
2543 struct net *net;
2544
2484 inet_twsk_purge(&tcp_hashinfo, AF_INET); 2545 inet_twsk_purge(&tcp_hashinfo, AF_INET);
2546
2547 list_for_each_entry(net, net_exit_list, exit_list)
2548 tcp_fastopen_ctx_destroy(net);
2485} 2549}
2486 2550
2487static struct pernet_operations __net_initdata tcp_sk_ops = { 2551static struct pernet_operations __net_initdata tcp_sk_ops = {
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 102b2c90bb80..7097f92d16e5 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/rcupdate.h> 2#include <linux/rcupdate.h>
2#include <linux/spinlock.h> 3#include <linux/spinlock.h>
3#include <linux/jiffies.h> 4#include <linux/jiffies.h>
@@ -20,8 +21,6 @@
20#include <net/tcp.h> 21#include <net/tcp.h>
21#include <net/genetlink.h> 22#include <net/genetlink.h>
22 23
23int sysctl_tcp_nometrics_save __read_mostly;
24
25static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr, 24static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr,
26 const struct inetpeer_addr *daddr, 25 const struct inetpeer_addr *daddr,
27 struct net *net, unsigned int hash); 26 struct net *net, unsigned int hash);
@@ -330,7 +329,7 @@ void tcp_update_metrics(struct sock *sk)
330 int m; 329 int m;
331 330
332 sk_dst_confirm(sk); 331 sk_dst_confirm(sk);
333 if (sysctl_tcp_nometrics_save || !dst) 332 if (net->ipv4.sysctl_tcp_nometrics_save || !dst)
334 return; 333 return;
335 334
336 rcu_read_lock(); 335 rcu_read_lock();
@@ -471,10 +470,8 @@ void tcp_init_metrics(struct sock *sk)
471 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 470 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
472 } 471 }
473 val = tcp_metric_get(tm, TCP_METRIC_REORDERING); 472 val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
474 if (val && tp->reordering != val) { 473 if (val && tp->reordering != val)
475 tcp_disable_fack(tp);
476 tp->reordering = val; 474 tp->reordering = val;
477 }
478 475
479 crtt = tcp_metric_get(tm, TCP_METRIC_RTT); 476 crtt = tcp_metric_get(tm, TCP_METRIC_RTT);
480 rcu_read_unlock(); 477 rcu_read_unlock();
@@ -892,10 +889,14 @@ static void tcp_metrics_flush_all(struct net *net)
892 889
893 for (row = 0; row < max_rows; row++, hb++) { 890 for (row = 0; row < max_rows; row++, hb++) {
894 struct tcp_metrics_block __rcu **pp; 891 struct tcp_metrics_block __rcu **pp;
892 bool match;
893
895 spin_lock_bh(&tcp_metrics_lock); 894 spin_lock_bh(&tcp_metrics_lock);
896 pp = &hb->chain; 895 pp = &hb->chain;
897 for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) { 896 for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) {
898 if (net_eq(tm_net(tm), net)) { 897 match = net ? net_eq(tm_net(tm), net) :
898 !atomic_read(&tm_net(tm)->count);
899 if (match) {
899 *pp = tm->tcpm_next; 900 *pp = tm->tcpm_next;
900 kfree_rcu(tm, rcu_head); 901 kfree_rcu(tm, rcu_head);
901 } else { 902 } else {
@@ -1018,14 +1019,14 @@ static int __net_init tcp_net_metrics_init(struct net *net)
1018 return 0; 1019 return 0;
1019} 1020}
1020 1021
1021static void __net_exit tcp_net_metrics_exit(struct net *net) 1022static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_list)
1022{ 1023{
1023 tcp_metrics_flush_all(net); 1024 tcp_metrics_flush_all(NULL);
1024} 1025}
1025 1026
1026static __net_initdata struct pernet_operations tcp_net_metrics_ops = { 1027static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
1027 .init = tcp_net_metrics_init, 1028 .init = tcp_net_metrics_init,
1028 .exit = tcp_net_metrics_exit, 1029 .exit_batch = tcp_net_metrics_exit_batch,
1029}; 1030};
1030 1031
1031void __init tcp_metrics_init(void) 1032void __init tcp_metrics_init(void)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 188a6f31356d..e36eff0403f4 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -23,13 +23,12 @@
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/sysctl.h> 24#include <linux/sysctl.h>
25#include <linux/workqueue.h> 25#include <linux/workqueue.h>
26#include <linux/static_key.h>
26#include <net/tcp.h> 27#include <net/tcp.h>
27#include <net/inet_common.h> 28#include <net/inet_common.h>
28#include <net/xfrm.h> 29#include <net/xfrm.h>
29#include <net/busy_poll.h> 30#include <net/busy_poll.h>
30 31
31int sysctl_tcp_abort_on_overflow __read_mostly;
32
33static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) 32static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
34{ 33{
35 if (seq == s_win) 34 if (seq == s_win)
@@ -180,7 +179,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
180 * Oh well... nobody has a sufficient solution to this 179 * Oh well... nobody has a sufficient solution to this
181 * protocol bug yet. 180 * protocol bug yet.
182 */ 181 */
183 if (sysctl_tcp_rfc1337 == 0) { 182 if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
184kill: 183kill:
185 inet_twsk_deschedule_put(tw); 184 inet_twsk_deschedule_put(tw);
186 return TCP_TW_SUCCESS; 185 return TCP_TW_SUCCESS;
@@ -298,8 +297,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
298 key = tp->af_specific->md5_lookup(sk, sk); 297 key = tp->af_specific->md5_lookup(sk, sk);
299 if (key) { 298 if (key) {
300 tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); 299 tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
301 if (tcptw->tw_md5_key && !tcp_alloc_md5sig_pool()) 300 BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool());
302 BUG();
303 } 301 }
304 } while (0); 302 } while (0);
305#endif 303#endif
@@ -371,7 +369,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
371 full_space = rcv_wnd * mss; 369 full_space = rcv_wnd * mss;
372 370
373 /* tcp_full_space because it is guaranteed to be the first packet */ 371 /* tcp_full_space because it is guaranteed to be the first packet */
374 tcp_select_initial_window(full_space, 372 tcp_select_initial_window(sk_listener, full_space,
375 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), 373 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
376 &req->rsk_rcv_wnd, 374 &req->rsk_rcv_wnd,
377 &req->rsk_window_clamp, 375 &req->rsk_window_clamp,
@@ -417,6 +415,21 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
417} 415}
418EXPORT_SYMBOL_GPL(tcp_ca_openreq_child); 416EXPORT_SYMBOL_GPL(tcp_ca_openreq_child);
419 417
418static void smc_check_reset_syn_req(struct tcp_sock *oldtp,
419 struct request_sock *req,
420 struct tcp_sock *newtp)
421{
422#if IS_ENABLED(CONFIG_SMC)
423 struct inet_request_sock *ireq;
424
425 if (static_branch_unlikely(&tcp_have_smc)) {
426 ireq = inet_rsk(req);
427 if (oldtp->syn_smc && !ireq->smc_ok)
428 newtp->syn_smc = 0;
429 }
430#endif
431}
432
420/* This is not only more efficient than what we used to do, it eliminates 433/* This is not only more efficient than what we used to do, it eliminates
421 * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM 434 * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
422 * 435 *
@@ -434,6 +447,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
434 struct tcp_request_sock *treq = tcp_rsk(req); 447 struct tcp_request_sock *treq = tcp_rsk(req);
435 struct inet_connection_sock *newicsk = inet_csk(newsk); 448 struct inet_connection_sock *newicsk = inet_csk(newsk);
436 struct tcp_sock *newtp = tcp_sk(newsk); 449 struct tcp_sock *newtp = tcp_sk(newsk);
450 struct tcp_sock *oldtp = tcp_sk(sk);
451
452 smc_check_reset_syn_req(oldtp, req, newtp);
437 453
438 /* Now setup tcp_sock */ 454 /* Now setup tcp_sock */
439 newtp->pred_flags = 0; 455 newtp->pred_flags = 0;
@@ -446,6 +462,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
446 newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; 462 newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
447 463
448 INIT_LIST_HEAD(&newtp->tsq_node); 464 INIT_LIST_HEAD(&newtp->tsq_node);
465 INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
449 466
450 tcp_init_wl(newtp, treq->rcv_isn); 467 tcp_init_wl(newtp, treq->rcv_isn);
451 468
@@ -458,7 +475,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
458 newtp->packets_out = 0; 475 newtp->packets_out = 0;
459 newtp->retrans_out = 0; 476 newtp->retrans_out = 0;
460 newtp->sacked_out = 0; 477 newtp->sacked_out = 0;
461 newtp->fackets_out = 0;
462 newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 478 newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
463 newtp->tlp_high_seq = 0; 479 newtp->tlp_high_seq = 0;
464 newtp->lsndtime = tcp_jiffies32; 480 newtp->lsndtime = tcp_jiffies32;
@@ -492,10 +508,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
492 keepalive_time_when(newtp)); 508 keepalive_time_when(newtp));
493 509
494 newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; 510 newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
495 if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { 511 newtp->rx_opt.sack_ok = ireq->sack_ok;
496 if (sysctl_tcp_fack)
497 tcp_enable_fack(newtp);
498 }
499 newtp->window_clamp = req->rsk_window_clamp; 512 newtp->window_clamp = req->rsk_window_clamp;
500 newtp->rcv_ssthresh = req->rsk_rcv_wnd; 513 newtp->rcv_ssthresh = req->rsk_rcv_wnd;
501 newtp->rcv_wnd = req->rsk_rcv_wnd; 514 newtp->rcv_wnd = req->rsk_rcv_wnd;
@@ -534,6 +547,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
534 newtp->syn_data_acked = 0; 547 newtp->syn_data_acked = 0;
535 newtp->rack.mstamp = 0; 548 newtp->rack.mstamp = 0;
536 newtp->rack.advanced = 0; 549 newtp->rack.advanced = 0;
550 newtp->rack.reo_wnd_steps = 1;
551 newtp->rack.last_delivered = 0;
552 newtp->rack.reo_wnd_persist = 0;
553 newtp->rack.dsack_seen = 0;
537 554
538 __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); 555 __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
539 } 556 }
@@ -764,7 +781,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
764 return inet_csk_complete_hashdance(sk, child, req, own_req); 781 return inet_csk_complete_hashdance(sk, child, req, own_req);
765 782
766listen_overflow: 783listen_overflow:
767 if (!sysctl_tcp_abort_on_overflow) { 784 if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) {
768 inet_rsk(req)->acked = 1; 785 inet_rsk(req)->acked = 1;
769 return NULL; 786 return NULL;
770 } 787 }
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 1ff73982e28c..0b5a05bd82e3 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -39,7 +39,7 @@
39 * nv_cong_dec_mult Decrease cwnd by X% (30%) of congestion when detected 39 * nv_cong_dec_mult Decrease cwnd by X% (30%) of congestion when detected
40 * nv_ssthresh_factor On congestion set ssthresh to this * <desired cwnd> / 8 40 * nv_ssthresh_factor On congestion set ssthresh to this * <desired cwnd> / 8
41 * nv_rtt_factor RTT averaging factor 41 * nv_rtt_factor RTT averaging factor
42 * nv_loss_dec_factor Decrease cwnd by this (50%) when losses occur 42 * nv_loss_dec_factor Decrease cwnd to this (80%) when losses occur
43 * nv_dec_eval_min_calls Wait this many RTT measurements before dec cwnd 43 * nv_dec_eval_min_calls Wait this many RTT measurements before dec cwnd
44 * nv_inc_eval_min_calls Wait this many RTT measurements before inc cwnd 44 * nv_inc_eval_min_calls Wait this many RTT measurements before inc cwnd
45 * nv_ssthresh_eval_min_calls Wait this many RTT measurements before stopping 45 * nv_ssthresh_eval_min_calls Wait this many RTT measurements before stopping
@@ -61,7 +61,7 @@ static int nv_min_cwnd __read_mostly = 2;
61static int nv_cong_dec_mult __read_mostly = 30 * 128 / 100; /* = 30% */ 61static int nv_cong_dec_mult __read_mostly = 30 * 128 / 100; /* = 30% */
62static int nv_ssthresh_factor __read_mostly = 8; /* = 1 */ 62static int nv_ssthresh_factor __read_mostly = 8; /* = 1 */
63static int nv_rtt_factor __read_mostly = 128; /* = 1/2*old + 1/2*new */ 63static int nv_rtt_factor __read_mostly = 128; /* = 1/2*old + 1/2*new */
64static int nv_loss_dec_factor __read_mostly = 512; /* => 50% */ 64static int nv_loss_dec_factor __read_mostly = 819; /* => 80% */
65static int nv_cwnd_growth_rate_neg __read_mostly = 8; 65static int nv_cwnd_growth_rate_neg __read_mostly = 8;
66static int nv_cwnd_growth_rate_pos __read_mostly; /* 0 => fixed like Reno */ 66static int nv_cwnd_growth_rate_pos __read_mostly; /* 0 => fixed like Reno */
67static int nv_dec_eval_min_calls __read_mostly = 60; 67static int nv_dec_eval_min_calls __read_mostly = 60;
@@ -101,6 +101,11 @@ struct tcpnv {
101 u32 nv_last_rtt; /* last rtt */ 101 u32 nv_last_rtt; /* last rtt */
102 u32 nv_min_rtt; /* active min rtt. Used to determine slope */ 102 u32 nv_min_rtt; /* active min rtt. Used to determine slope */
103 u32 nv_min_rtt_new; /* min rtt for future use */ 103 u32 nv_min_rtt_new; /* min rtt for future use */
104 u32 nv_base_rtt; /* If non-zero it represents the threshold for
105 * congestion */
106 u32 nv_lower_bound_rtt; /* Used in conjunction with nv_base_rtt. It is
107 * set to 80% of nv_base_rtt. It helps reduce
108 * unfairness between flows */
104 u32 nv_rtt_max_rate; /* max rate seen during current RTT */ 109 u32 nv_rtt_max_rate; /* max rate seen during current RTT */
105 u32 nv_rtt_start_seq; /* current RTT ends when packet arrives 110 u32 nv_rtt_start_seq; /* current RTT ends when packet arrives
106 * acking beyond nv_rtt_start_seq */ 111 * acking beyond nv_rtt_start_seq */
@@ -132,9 +137,24 @@ static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk)
132static void tcpnv_init(struct sock *sk) 137static void tcpnv_init(struct sock *sk)
133{ 138{
134 struct tcpnv *ca = inet_csk_ca(sk); 139 struct tcpnv *ca = inet_csk_ca(sk);
140 int base_rtt;
135 141
136 tcpnv_reset(ca, sk); 142 tcpnv_reset(ca, sk);
137 143
144 /* See if base_rtt is available from socket_ops bpf program.
145 * It is meant to be used in environments, such as communication
146 * within a datacenter, where we have reasonable estimates of
147 * RTTs
148 */
149 base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT);
150 if (base_rtt > 0) {
151 ca->nv_base_rtt = base_rtt;
152 ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8; /* 80% */
153 } else {
154 ca->nv_base_rtt = 0;
155 ca->nv_lower_bound_rtt = 0;
156 }
157
138 ca->nv_allow_cwnd_growth = 1; 158 ca->nv_allow_cwnd_growth = 1;
139 ca->nv_min_rtt_reset_jiffies = jiffies + 2 * HZ; 159 ca->nv_min_rtt_reset_jiffies = jiffies + 2 * HZ;
140 ca->nv_min_rtt = NV_INIT_RTT; 160 ca->nv_min_rtt = NV_INIT_RTT;
@@ -144,6 +164,19 @@ static void tcpnv_init(struct sock *sk)
144 ca->cwnd_growth_factor = 0; 164 ca->cwnd_growth_factor = 0;
145} 165}
146 166
167/* If provided, apply upper (base_rtt) and lower (lower_bound_rtt)
168 * bounds to RTT.
169 */
170inline u32 nv_get_bounded_rtt(struct tcpnv *ca, u32 val)
171{
172 if (ca->nv_lower_bound_rtt > 0 && val < ca->nv_lower_bound_rtt)
173 return ca->nv_lower_bound_rtt;
174 else if (ca->nv_base_rtt > 0 && val > ca->nv_base_rtt)
175 return ca->nv_base_rtt;
176 else
177 return val;
178}
179
147static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked) 180static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked)
148{ 181{
149 struct tcp_sock *tp = tcp_sk(sk); 182 struct tcp_sock *tp = tcp_sk(sk);
@@ -209,7 +242,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
209 struct tcp_sock *tp = tcp_sk(sk); 242 struct tcp_sock *tp = tcp_sk(sk);
210 struct tcpnv *ca = inet_csk_ca(sk); 243 struct tcpnv *ca = inet_csk_ca(sk);
211 unsigned long now = jiffies; 244 unsigned long now = jiffies;
212 s64 rate64 = 0; 245 u64 rate64;
213 u32 rate, max_win, cwnd_by_slope; 246 u32 rate, max_win, cwnd_by_slope;
214 u32 avg_rtt; 247 u32 avg_rtt;
215 u32 bytes_acked = 0; 248 u32 bytes_acked = 0;
@@ -251,8 +284,9 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
251 } 284 }
252 285
253 /* rate in 100's bits per second */ 286 /* rate in 100's bits per second */
254 rate64 = ((u64)sample->in_flight) * 8000000; 287 rate64 = ((u64)sample->in_flight) * 80000;
255 rate = (u32)div64_u64(rate64, (u64)(avg_rtt * 100)); 288 do_div(rate64, avg_rtt ?: 1);
289 rate = (u32)rate64;
256 290
257 /* Remember the maximum rate seen during this RTT 291 /* Remember the maximum rate seen during this RTT
258 * Note: It may be more than one RTT. This function should be 292 * Note: It may be more than one RTT. This function should be
@@ -265,6 +299,9 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
265 if (ca->nv_eval_call_cnt < 255) 299 if (ca->nv_eval_call_cnt < 255)
266 ca->nv_eval_call_cnt++; 300 ca->nv_eval_call_cnt++;
267 301
302 /* Apply bounds to rtt. Only used to update min_rtt */
303 avg_rtt = nv_get_bounded_rtt(ca, avg_rtt);
304
268 /* update min rtt if necessary */ 305 /* update min rtt if necessary */
269 if (avg_rtt < ca->nv_min_rtt) 306 if (avg_rtt < ca->nv_min_rtt)
270 ca->nv_min_rtt = avg_rtt; 307 ca->nv_min_rtt = avg_rtt;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 11f69bbf9307..b6a2aa1dcf56 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -149,11 +149,19 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
149 * is freed by GSO engine 149 * is freed by GSO engine
150 */ 150 */
151 if (copy_destructor) { 151 if (copy_destructor) {
152 int delta;
153
152 swap(gso_skb->sk, skb->sk); 154 swap(gso_skb->sk, skb->sk);
153 swap(gso_skb->destructor, skb->destructor); 155 swap(gso_skb->destructor, skb->destructor);
154 sum_truesize += skb->truesize; 156 sum_truesize += skb->truesize;
155 refcount_add(sum_truesize - gso_skb->truesize, 157 delta = sum_truesize - gso_skb->truesize;
156 &skb->sk->sk_wmem_alloc); 158 /* In some pathological cases, delta can be negative.
159 * We need to either use refcount_add() or refcount_sub_and_test()
160 */
161 if (likely(delta >= 0))
162 refcount_add(delta, &skb->sk->sk_wmem_alloc);
163 else
164 WARN_ON_ONCE(refcount_sub_and_test(-delta, &skb->sk->sk_wmem_alloc));
157 } 165 }
158 166
159 delta = htonl(oldlen + (skb_tail_pointer(skb) - 167 delta = htonl(oldlen + (skb_tail_pointer(skb) -
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1c839c99114c..a4d214c7b506 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -41,40 +41,25 @@
41#include <linux/compiler.h> 41#include <linux/compiler.h>
42#include <linux/gfp.h> 42#include <linux/gfp.h>
43#include <linux/module.h> 43#include <linux/module.h>
44#include <linux/static_key.h>
44 45
45/* People can turn this off for buggy TCP's found in printers etc. */ 46#include <trace/events/tcp.h>
46int sysctl_tcp_retrans_collapse __read_mostly = 1;
47
48/* People can turn this on to work with those rare, broken TCPs that
49 * interpret the window field as a signed quantity.
50 */
51int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
52
53/* Default TSQ limit of four TSO segments */
54int sysctl_tcp_limit_output_bytes __read_mostly = 262144;
55
56/* This limits the percentage of the congestion window which we
57 * will allow a single TSO frame to consume. Building TSO frames
58 * which are too large can cause TCP streams to be bursty.
59 */
60int sysctl_tcp_tso_win_divisor __read_mostly = 3;
61
62/* By default, RFC2861 behavior. */
63int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
64 47
65static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, 48static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
66 int push_one, gfp_t gfp); 49 int push_one, gfp_t gfp);
67 50
68/* Account for new data that has been sent to the network. */ 51/* Account for new data that has been sent to the network. */
69static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) 52static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
70{ 53{
71 struct inet_connection_sock *icsk = inet_csk(sk); 54 struct inet_connection_sock *icsk = inet_csk(sk);
72 struct tcp_sock *tp = tcp_sk(sk); 55 struct tcp_sock *tp = tcp_sk(sk);
73 unsigned int prior_packets = tp->packets_out; 56 unsigned int prior_packets = tp->packets_out;
74 57
75 tcp_advance_send_head(sk, skb);
76 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; 58 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
77 59
60 __skb_unlink(skb, &sk->sk_write_queue);
61 tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
62
78 tp->packets_out += tcp_skb_pcount(skb); 63 tp->packets_out += tcp_skb_pcount(skb);
79 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) 64 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
80 tcp_rearm_rto(sk); 65 tcp_rearm_rto(sk);
@@ -203,7 +188,7 @@ u32 tcp_default_init_rwnd(u32 mss)
203 * be a multiple of mss if possible. We assume here that mss >= 1. 188 * be a multiple of mss if possible. We assume here that mss >= 1.
204 * This MUST be enforced by all callers. 189 * This MUST be enforced by all callers.
205 */ 190 */
206void tcp_select_initial_window(int __space, __u32 mss, 191void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
207 __u32 *rcv_wnd, __u32 *window_clamp, 192 __u32 *rcv_wnd, __u32 *window_clamp,
208 int wscale_ok, __u8 *rcv_wscale, 193 int wscale_ok, __u8 *rcv_wscale,
209 __u32 init_rcv_wnd) 194 __u32 init_rcv_wnd)
@@ -227,7 +212,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
227 * which we interpret as a sign the remote TCP is not 212 * which we interpret as a sign the remote TCP is not
228 * misinterpreting the window field as a signed quantity. 213 * misinterpreting the window field as a signed quantity.
229 */ 214 */
230 if (sysctl_tcp_workaround_signed_windows) 215 if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
231 (*rcv_wnd) = min(space, MAX_TCP_WINDOW); 216 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
232 else 217 else
233 (*rcv_wnd) = space; 218 (*rcv_wnd) = space;
@@ -235,7 +220,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
235 (*rcv_wscale) = 0; 220 (*rcv_wscale) = 0;
236 if (wscale_ok) { 221 if (wscale_ok) {
237 /* Set window scaling on max possible window */ 222 /* Set window scaling on max possible window */
238 space = max_t(u32, space, sysctl_tcp_rmem[2]); 223 space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
239 space = max_t(u32, space, sysctl_rmem_max); 224 space = max_t(u32, space, sysctl_rmem_max);
240 space = min_t(u32, space, *window_clamp); 225 space = min_t(u32, space, *window_clamp);
241 while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) { 226 while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
@@ -287,7 +272,8 @@ static u16 tcp_select_window(struct sock *sk)
287 /* Make sure we do not exceed the maximum possible 272 /* Make sure we do not exceed the maximum possible
288 * scaled window. 273 * scaled window.
289 */ 274 */
290 if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows) 275 if (!tp->rx_opt.rcv_wscale &&
276 sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
291 new_win = min(new_win, MAX_TCP_WINDOW); 277 new_win = min(new_win, MAX_TCP_WINDOW);
292 else 278 else
293 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); 279 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
@@ -395,7 +381,6 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
395static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) 381static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
396{ 382{
397 skb->ip_summed = CHECKSUM_PARTIAL; 383 skb->ip_summed = CHECKSUM_PARTIAL;
398 skb->csum = 0;
399 384
400 TCP_SKB_CB(skb)->tcp_flags = flags; 385 TCP_SKB_CB(skb)->tcp_flags = flags;
401 TCP_SKB_CB(skb)->sacked = 0; 386 TCP_SKB_CB(skb)->sacked = 0;
@@ -418,6 +403,22 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
418#define OPTION_MD5 (1 << 2) 403#define OPTION_MD5 (1 << 2)
419#define OPTION_WSCALE (1 << 3) 404#define OPTION_WSCALE (1 << 3)
420#define OPTION_FAST_OPEN_COOKIE (1 << 8) 405#define OPTION_FAST_OPEN_COOKIE (1 << 8)
406#define OPTION_SMC (1 << 9)
407
408static void smc_options_write(__be32 *ptr, u16 *options)
409{
410#if IS_ENABLED(CONFIG_SMC)
411 if (static_branch_unlikely(&tcp_have_smc)) {
412 if (unlikely(OPTION_SMC & *options)) {
413 *ptr++ = htonl((TCPOPT_NOP << 24) |
414 (TCPOPT_NOP << 16) |
415 (TCPOPT_EXP << 8) |
416 (TCPOLEN_EXP_SMC_BASE));
417 *ptr++ = htonl(TCPOPT_SMC_MAGIC);
418 }
419 }
420#endif
421}
421 422
422struct tcp_out_options { 423struct tcp_out_options {
423 u16 options; /* bit field of OPTION_* */ 424 u16 options; /* bit field of OPTION_* */
@@ -536,6 +537,41 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
536 } 537 }
537 ptr += (len + 3) >> 2; 538 ptr += (len + 3) >> 2;
538 } 539 }
540
541 smc_options_write(ptr, &options);
542}
543
544static void smc_set_option(const struct tcp_sock *tp,
545 struct tcp_out_options *opts,
546 unsigned int *remaining)
547{
548#if IS_ENABLED(CONFIG_SMC)
549 if (static_branch_unlikely(&tcp_have_smc)) {
550 if (tp->syn_smc) {
551 if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
552 opts->options |= OPTION_SMC;
553 *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
554 }
555 }
556 }
557#endif
558}
559
560static void smc_set_option_cond(const struct tcp_sock *tp,
561 const struct inet_request_sock *ireq,
562 struct tcp_out_options *opts,
563 unsigned int *remaining)
564{
565#if IS_ENABLED(CONFIG_SMC)
566 if (static_branch_unlikely(&tcp_have_smc)) {
567 if (tp->syn_smc && ireq->smc_ok) {
568 if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
569 opts->options |= OPTION_SMC;
570 *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
571 }
572 }
573 }
574#endif
539} 575}
540 576
541/* Compute TCP options for SYN packets. This is not the final 577/* Compute TCP options for SYN packets. This is not the final
@@ -603,11 +639,14 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
603 } 639 }
604 } 640 }
605 641
642 smc_set_option(tp, opts, &remaining);
643
606 return MAX_TCP_OPTION_SPACE - remaining; 644 return MAX_TCP_OPTION_SPACE - remaining;
607} 645}
608 646
609/* Set up TCP options for SYN-ACKs. */ 647/* Set up TCP options for SYN-ACKs. */
610static unsigned int tcp_synack_options(struct request_sock *req, 648static unsigned int tcp_synack_options(const struct sock *sk,
649 struct request_sock *req,
611 unsigned int mss, struct sk_buff *skb, 650 unsigned int mss, struct sk_buff *skb,
612 struct tcp_out_options *opts, 651 struct tcp_out_options *opts,
613 const struct tcp_md5sig_key *md5, 652 const struct tcp_md5sig_key *md5,
@@ -663,6 +702,8 @@ static unsigned int tcp_synack_options(struct request_sock *req,
663 } 702 }
664 } 703 }
665 704
705 smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
706
666 return MAX_TCP_OPTION_SPACE - remaining; 707 return MAX_TCP_OPTION_SPACE - remaining;
667} 708}
668 709
@@ -739,8 +780,10 @@ static void tcp_tsq_handler(struct sock *sk)
739 struct tcp_sock *tp = tcp_sk(sk); 780 struct tcp_sock *tp = tcp_sk(sk);
740 781
741 if (tp->lost_out > tp->retrans_out && 782 if (tp->lost_out > tp->retrans_out &&
742 tp->snd_cwnd > tcp_packets_in_flight(tp)) 783 tp->snd_cwnd > tcp_packets_in_flight(tp)) {
784 tcp_mstamp_refresh(tp);
743 tcp_xmit_retransmit_queue(sk); 785 tcp_xmit_retransmit_queue(sk);
786 }
744 787
745 tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle, 788 tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
746 0, GFP_ATOMIC); 789 0, GFP_ATOMIC);
@@ -971,6 +1014,12 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
971 HRTIMER_MODE_ABS_PINNED); 1014 HRTIMER_MODE_ABS_PINNED);
972} 1015}
973 1016
1017static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
1018{
1019 skb->skb_mstamp = tp->tcp_mstamp;
1020 list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
1021}
1022
974/* This routine actually transmits TCP packets queued in by 1023/* This routine actually transmits TCP packets queued in by
975 * tcp_do_sendmsg(). This is used by both the initial 1024 * tcp_do_sendmsg(). This is used by both the initial
976 * transmission and possible later retransmissions. 1025 * transmission and possible later retransmissions.
@@ -1003,10 +1052,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1003 TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq 1052 TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
1004 - tp->snd_una; 1053 - tp->snd_una;
1005 oskb = skb; 1054 oskb = skb;
1006 if (unlikely(skb_cloned(skb))) 1055
1007 skb = pskb_copy(skb, gfp_mask); 1056 tcp_skb_tsorted_save(oskb) {
1008 else 1057 if (unlikely(skb_cloned(oskb)))
1009 skb = skb_clone(skb, gfp_mask); 1058 skb = pskb_copy(oskb, gfp_mask);
1059 else
1060 skb = skb_clone(oskb, gfp_mask);
1061 } tcp_skb_tsorted_restore(oskb);
1062
1010 if (unlikely(!skb)) 1063 if (unlikely(!skb))
1011 return -ENOBUFS; 1064 return -ENOBUFS;
1012 } 1065 }
@@ -1127,7 +1180,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1127 err = net_xmit_eval(err); 1180 err = net_xmit_eval(err);
1128 } 1181 }
1129 if (!err && oskb) { 1182 if (!err && oskb) {
1130 oskb->skb_mstamp = tp->tcp_mstamp; 1183 tcp_update_skb_after_send(tp, oskb);
1131 tcp_rate_skb_sent(sk, oskb); 1184 tcp_rate_skb_sent(sk, oskb);
1132 } 1185 }
1133 return err; 1186 return err;
@@ -1165,21 +1218,6 @@ static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
1165 } 1218 }
1166} 1219}
1167 1220
1168/* When a modification to fackets out becomes necessary, we need to check
1169 * skb is counted to fackets_out or not.
1170 */
1171static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
1172 int decr)
1173{
1174 struct tcp_sock *tp = tcp_sk(sk);
1175
1176 if (!tp->sacked_out || tcp_is_reno(tp))
1177 return;
1178
1179 if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
1180 tp->fackets_out -= decr;
1181}
1182
1183/* Pcount in the middle of the write queue got changed, we need to do various 1221/* Pcount in the middle of the write queue got changed, we need to do various
1184 * tweaks to fix counters 1222 * tweaks to fix counters
1185 */ 1223 */
@@ -1200,11 +1238,9 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
1200 if (tcp_is_reno(tp) && decr > 0) 1238 if (tcp_is_reno(tp) && decr > 0)
1201 tp->sacked_out -= min_t(u32, tp->sacked_out, decr); 1239 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
1202 1240
1203 tcp_adjust_fackets_out(sk, skb, decr);
1204
1205 if (tp->lost_skb_hint && 1241 if (tp->lost_skb_hint &&
1206 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) && 1242 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
1207 (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))) 1243 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
1208 tp->lost_cnt_hint -= decr; 1244 tp->lost_cnt_hint -= decr;
1209 1245
1210 tcp_verify_left_out(tp); 1246 tcp_verify_left_out(tp);
@@ -1239,12 +1275,25 @@ static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2)
1239 TCP_SKB_CB(skb)->eor = 0; 1275 TCP_SKB_CB(skb)->eor = 0;
1240} 1276}
1241 1277
1278/* Insert buff after skb on the write or rtx queue of sk. */
1279static void tcp_insert_write_queue_after(struct sk_buff *skb,
1280 struct sk_buff *buff,
1281 struct sock *sk,
1282 enum tcp_queue tcp_queue)
1283{
1284 if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE)
1285 __skb_queue_after(&sk->sk_write_queue, skb, buff);
1286 else
1287 tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
1288}
1289
1242/* Function to create two new TCP segments. Shrinks the given segment 1290/* Function to create two new TCP segments. Shrinks the given segment
1243 * to the specified size and appends a new segment with the rest of the 1291 * to the specified size and appends a new segment with the rest of the
1244 * packet to the list. This won't be called frequently, I hope. 1292 * packet to the list. This won't be called frequently, I hope.
1245 * Remember, these are still headerless SKBs at this point. 1293 * Remember, these are still headerless SKBs at this point.
1246 */ 1294 */
1247int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, 1295int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
1296 struct sk_buff *skb, u32 len,
1248 unsigned int mss_now, gfp_t gfp) 1297 unsigned int mss_now, gfp_t gfp)
1249{ 1298{
1250 struct tcp_sock *tp = tcp_sk(sk); 1299 struct tcp_sock *tp = tcp_sk(sk);
@@ -1327,7 +1376,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1327 1376
1328 /* Link BUFF into the send queue. */ 1377 /* Link BUFF into the send queue. */
1329 __skb_header_release(buff); 1378 __skb_header_release(buff);
1330 tcp_insert_write_queue_after(skb, buff, sk); 1379 tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
1380 if (tcp_queue == TCP_FRAG_IN_RTX_QUEUE)
1381 list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor);
1331 1382
1332 return 0; 1383 return 0;
1333} 1384}
@@ -1605,7 +1656,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
1605 if (tp->packets_out > tp->snd_cwnd_used) 1656 if (tp->packets_out > tp->snd_cwnd_used)
1606 tp->snd_cwnd_used = tp->packets_out; 1657 tp->snd_cwnd_used = tp->packets_out;
1607 1658
1608 if (sysctl_tcp_slow_start_after_idle && 1659 if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
1609 (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && 1660 (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
1610 !ca_ops->cong_control) 1661 !ca_ops->cong_control)
1611 tcp_cwnd_application_limited(sk); 1662 tcp_cwnd_application_limited(sk);
@@ -1614,10 +1665,10 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
1614 * is caused by insufficient sender buffer: 1665 * is caused by insufficient sender buffer:
1615 * 1) just sent some data (see tcp_write_xmit) 1666 * 1) just sent some data (see tcp_write_xmit)
1616 * 2) not cwnd limited (this else condition) 1667 * 2) not cwnd limited (this else condition)
1617 * 3) no more data to send (null tcp_send_head ) 1668 * 3) no more data to send (tcp_write_queue_empty())
1618 * 4) application is hitting buffer limit (SOCK_NOSPACE) 1669 * 4) application is hitting buffer limit (SOCK_NOSPACE)
1619 */ 1670 */
1620 if (!tcp_send_head(sk) && sk->sk_socket && 1671 if (tcp_write_queue_empty(sk) && sk->sk_socket &&
1621 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) && 1672 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
1622 (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) 1673 (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
1623 tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED); 1674 tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
@@ -1669,7 +1720,7 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
1669{ 1720{
1670 u32 bytes, segs; 1721 u32 bytes, segs;
1671 1722
1672 bytes = min(sk->sk_pacing_rate >> 10, 1723 bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift,
1673 sk->sk_gso_max_size - 1 - MAX_TCP_HEADER); 1724 sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
1674 1725
1675 /* Goal is to send at least one packet per ms, 1726 /* Goal is to send at least one packet per ms,
@@ -1692,7 +1743,8 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
1692 u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; 1743 u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
1693 1744
1694 return tso_segs ? : 1745 return tso_segs ? :
1695 tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs); 1746 tcp_tso_autosize(sk, mss_now,
1747 sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
1696} 1748}
1697 1749
1698/* Returns the portion of skb which can be sent right away */ 1750/* Returns the portion of skb which can be sent right away */
@@ -1806,40 +1858,6 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
1806 return !after(end_seq, tcp_wnd_end(tp)); 1858 return !after(end_seq, tcp_wnd_end(tp));
1807} 1859}
1808 1860
1809/* This checks if the data bearing packet SKB (usually tcp_send_head(sk))
1810 * should be put on the wire right now. If so, it returns the number of
1811 * packets allowed by the congestion window.
1812 */
1813static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
1814 unsigned int cur_mss, int nonagle)
1815{
1816 const struct tcp_sock *tp = tcp_sk(sk);
1817 unsigned int cwnd_quota;
1818
1819 tcp_init_tso_segs(skb, cur_mss);
1820
1821 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
1822 return 0;
1823
1824 cwnd_quota = tcp_cwnd_test(tp, skb);
1825 if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
1826 cwnd_quota = 0;
1827
1828 return cwnd_quota;
1829}
1830
1831/* Test if sending is allowed right now. */
1832bool tcp_may_send_now(struct sock *sk)
1833{
1834 const struct tcp_sock *tp = tcp_sk(sk);
1835 struct sk_buff *skb = tcp_send_head(sk);
1836
1837 return skb &&
1838 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1839 (tcp_skb_is_last(sk, skb) ?
1840 tp->nonagle : TCP_NAGLE_PUSH));
1841}
1842
1843/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet 1861/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
1844 * which is put after SKB on the list. It is very much like 1862 * which is put after SKB on the list. It is very much like
1845 * tcp_fragment() except that it may make several kinds of assumptions 1863 * tcp_fragment() except that it may make several kinds of assumptions
@@ -1847,7 +1865,8 @@ bool tcp_may_send_now(struct sock *sk)
1847 * know that all the data is in scatter-gather pages, and that the 1865 * know that all the data is in scatter-gather pages, and that the
1848 * packet has never been sent out before (and thus is not cloned). 1866 * packet has never been sent out before (and thus is not cloned).
1849 */ 1867 */
1850static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, 1868static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
1869 struct sk_buff *skb, unsigned int len,
1851 unsigned int mss_now, gfp_t gfp) 1870 unsigned int mss_now, gfp_t gfp)
1852{ 1871{
1853 struct sk_buff *buff; 1872 struct sk_buff *buff;
@@ -1856,7 +1875,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1856 1875
1857 /* All of a TSO frame must be composed of paged data. */ 1876 /* All of a TSO frame must be composed of paged data. */
1858 if (skb->len != skb->data_len) 1877 if (skb->len != skb->data_len)
1859 return tcp_fragment(sk, skb, len, mss_now, gfp); 1878 return tcp_fragment(sk, tcp_queue, skb, len, mss_now, gfp);
1860 1879
1861 buff = sk_stream_alloc_skb(sk, 0, gfp, true); 1880 buff = sk_stream_alloc_skb(sk, 0, gfp, true);
1862 if (unlikely(!buff)) 1881 if (unlikely(!buff))
@@ -1892,7 +1911,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1892 1911
1893 /* Link BUFF into the send queue. */ 1912 /* Link BUFF into the send queue. */
1894 __skb_header_release(buff); 1913 __skb_header_release(buff);
1895 tcp_insert_write_queue_after(skb, buff, sk); 1914 tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
1896 1915
1897 return 0; 1916 return 0;
1898} 1917}
@@ -1942,7 +1961,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1942 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) 1961 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1943 goto send_now; 1962 goto send_now;
1944 1963
1945 win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor); 1964 win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
1946 if (win_divisor) { 1965 if (win_divisor) {
1947 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); 1966 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1948 1967
@@ -1962,8 +1981,10 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1962 goto send_now; 1981 goto send_now;
1963 } 1982 }
1964 1983
1965 head = tcp_write_queue_head(sk); 1984 /* TODO : use tsorted_sent_queue ? */
1966 1985 head = tcp_rtx_queue_head(sk);
1986 if (!head)
1987 goto send_now;
1967 age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp); 1988 age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
1968 /* If next ACK is likely to come too late (half srtt), do not defer */ 1989 /* If next ACK is likely to come too late (half srtt), do not defer */
1969 if (age < (tp->srtt_us >> 4)) 1990 if (age < (tp->srtt_us >> 4))
@@ -2094,6 +2115,7 @@ static int tcp_mtu_probe(struct sock *sk)
2094 nskb->ip_summed = skb->ip_summed; 2115 nskb->ip_summed = skb->ip_summed;
2095 2116
2096 tcp_insert_write_queue_before(nskb, skb, sk); 2117 tcp_insert_write_queue_before(nskb, skb, sk);
2118 tcp_highest_sack_replace(sk, skb, nskb);
2097 2119
2098 len = 0; 2120 len = 0;
2099 tcp_for_write_queue_from_safe(skb, next, sk) { 2121 tcp_for_write_queue_from_safe(skb, next, sk) {
@@ -2176,18 +2198,18 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
2176{ 2198{
2177 unsigned int limit; 2199 unsigned int limit;
2178 2200
2179 limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); 2201 limit = max(2 * skb->truesize, sk->sk_pacing_rate >> sk->sk_pacing_shift);
2180 limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); 2202 limit = min_t(u32, limit,
2203 sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
2181 limit <<= factor; 2204 limit <<= factor;
2182 2205
2183 if (refcount_read(&sk->sk_wmem_alloc) > limit) { 2206 if (refcount_read(&sk->sk_wmem_alloc) > limit) {
2184 /* Always send the 1st or 2nd skb in write queue. 2207 /* Always send skb if rtx queue is empty.
2185 * No need to wait for TX completion to call us back, 2208 * No need to wait for TX completion to call us back,
2186 * after softirq/tasklet schedule. 2209 * after softirq/tasklet schedule.
2187 * This helps when TX completions are delayed too much. 2210 * This helps when TX completions are delayed too much.
2188 */ 2211 */
2189 if (skb == sk->sk_write_queue.next || 2212 if (tcp_rtx_queue_empty(sk))
2190 skb->prev == sk->sk_write_queue.next)
2191 return false; 2213 return false;
2192 2214
2193 set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); 2215 set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
@@ -2238,7 +2260,7 @@ void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
2238 * it's the "most interesting" or current chrono we are 2260 * it's the "most interesting" or current chrono we are
2239 * tracking and starts busy chrono if we have pending data. 2261 * tracking and starts busy chrono if we have pending data.
2240 */ 2262 */
2241 if (tcp_write_queue_empty(sk)) 2263 if (tcp_rtx_and_write_queues_empty(sk))
2242 tcp_chrono_set(tp, TCP_CHRONO_UNSPEC); 2264 tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
2243 else if (type == tp->chrono_type) 2265 else if (type == tp->chrono_type)
2244 tcp_chrono_set(tp, TCP_CHRONO_BUSY); 2266 tcp_chrono_set(tp, TCP_CHRONO_BUSY);
@@ -2271,6 +2293,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2271 2293
2272 sent_pkts = 0; 2294 sent_pkts = 0;
2273 2295
2296 tcp_mstamp_refresh(tp);
2274 if (!push_one) { 2297 if (!push_one) {
2275 /* Do MTU probing. */ 2298 /* Do MTU probing. */
2276 result = tcp_mtu_probe(sk); 2299 result = tcp_mtu_probe(sk);
@@ -2282,7 +2305,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2282 } 2305 }
2283 2306
2284 max_segs = tcp_tso_segs(sk, mss_now); 2307 max_segs = tcp_tso_segs(sk, mss_now);
2285 tcp_mstamp_refresh(tp);
2286 while ((skb = tcp_send_head(sk))) { 2308 while ((skb = tcp_send_head(sk))) {
2287 unsigned int limit; 2309 unsigned int limit;
2288 2310
@@ -2294,7 +2316,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2294 2316
2295 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { 2317 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
2296 /* "skb_mstamp" is used as a start point for the retransmit timer */ 2318 /* "skb_mstamp" is used as a start point for the retransmit timer */
2297 skb->skb_mstamp = tp->tcp_mstamp; 2319 tcp_update_skb_after_send(tp, skb);
2298 goto repair; /* Skip network transmission */ 2320 goto repair; /* Skip network transmission */
2299 } 2321 }
2300 2322
@@ -2333,7 +2355,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2333 nonagle); 2355 nonagle);
2334 2356
2335 if (skb->len > limit && 2357 if (skb->len > limit &&
2336 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) 2358 unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
2359 skb, limit, mss_now, gfp)))
2337 break; 2360 break;
2338 2361
2339 if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) 2362 if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
@@ -2368,19 +2391,20 @@ repair:
2368 2391
2369 /* Send one loss probe per tail loss episode. */ 2392 /* Send one loss probe per tail loss episode. */
2370 if (push_one != 2) 2393 if (push_one != 2)
2371 tcp_schedule_loss_probe(sk); 2394 tcp_schedule_loss_probe(sk, false);
2372 is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd); 2395 is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
2373 tcp_cwnd_validate(sk, is_cwnd_limited); 2396 tcp_cwnd_validate(sk, is_cwnd_limited);
2374 return false; 2397 return false;
2375 } 2398 }
2376 return !tp->packets_out && tcp_send_head(sk); 2399 return !tp->packets_out && !tcp_write_queue_empty(sk);
2377} 2400}
2378 2401
2379bool tcp_schedule_loss_probe(struct sock *sk) 2402bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
2380{ 2403{
2381 struct inet_connection_sock *icsk = inet_csk(sk); 2404 struct inet_connection_sock *icsk = inet_csk(sk);
2382 struct tcp_sock *tp = tcp_sk(sk); 2405 struct tcp_sock *tp = tcp_sk(sk);
2383 u32 timeout, rto_delta_us; 2406 u32 timeout, rto_delta_us;
2407 int early_retrans;
2384 2408
2385 /* Don't do any loss probe on a Fast Open connection before 3WHS 2409 /* Don't do any loss probe on a Fast Open connection before 3WHS
2386 * finishes. 2410 * finishes.
@@ -2388,16 +2412,17 @@ bool tcp_schedule_loss_probe(struct sock *sk)
2388 if (tp->fastopen_rsk) 2412 if (tp->fastopen_rsk)
2389 return false; 2413 return false;
2390 2414
2415 early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
2391 /* Schedule a loss probe in 2*RTT for SACK capable connections 2416 /* Schedule a loss probe in 2*RTT for SACK capable connections
2392 * in Open state, that are either limited by cwnd or application. 2417 * in Open state, that are either limited by cwnd or application.
2393 */ 2418 */
2394 if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) || 2419 if ((early_retrans != 3 && early_retrans != 4) ||
2395 !tp->packets_out || !tcp_is_sack(tp) || 2420 !tp->packets_out || !tcp_is_sack(tp) ||
2396 icsk->icsk_ca_state != TCP_CA_Open) 2421 icsk->icsk_ca_state != TCP_CA_Open)
2397 return false; 2422 return false;
2398 2423
2399 if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && 2424 if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
2400 tcp_send_head(sk)) 2425 !tcp_write_queue_empty(sk))
2401 return false; 2426 return false;
2402 2427
2403 /* Probe timeout is 2*rtt. Add minimum RTO to account 2428 /* Probe timeout is 2*rtt. Add minimum RTO to account
@@ -2415,7 +2440,9 @@ bool tcp_schedule_loss_probe(struct sock *sk)
2415 } 2440 }
2416 2441
2417 /* If the RTO formula yields an earlier time, then use that time. */ 2442 /* If the RTO formula yields an earlier time, then use that time. */
2418 rto_delta_us = tcp_rto_delta_us(sk); /* How far in future is RTO? */ 2443 rto_delta_us = advancing_rto ?
2444 jiffies_to_usecs(inet_csk(sk)->icsk_rto) :
2445 tcp_rto_delta_us(sk); /* How far in future is RTO? */
2419 if (rto_delta_us > 0) 2446 if (rto_delta_us > 0)
2420 timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us)); 2447 timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us));
2421 2448
@@ -2450,18 +2477,14 @@ void tcp_send_loss_probe(struct sock *sk)
2450 int mss = tcp_current_mss(sk); 2477 int mss = tcp_current_mss(sk);
2451 2478
2452 skb = tcp_send_head(sk); 2479 skb = tcp_send_head(sk);
2453 if (skb) { 2480 if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
2454 if (tcp_snd_wnd_test(tp, skb, mss)) { 2481 pcount = tp->packets_out;
2455 pcount = tp->packets_out; 2482 tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
2456 tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); 2483 if (tp->packets_out > pcount)
2457 if (tp->packets_out > pcount) 2484 goto probe_sent;
2458 goto probe_sent; 2485 goto rearm_timer;
2459 goto rearm_timer;
2460 }
2461 skb = tcp_write_queue_prev(sk, skb);
2462 } else {
2463 skb = tcp_write_queue_tail(sk);
2464 } 2486 }
2487 skb = skb_rb_last(&sk->tcp_rtx_queue);
2465 2488
2466 /* At most one outstanding TLP retransmission. */ 2489 /* At most one outstanding TLP retransmission. */
2467 if (tp->tlp_high_seq) 2490 if (tp->tlp_high_seq)
@@ -2479,10 +2502,11 @@ void tcp_send_loss_probe(struct sock *sk)
2479 goto rearm_timer; 2502 goto rearm_timer;
2480 2503
2481 if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { 2504 if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
2482 if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss, 2505 if (unlikely(tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
2506 (pcount - 1) * mss, mss,
2483 GFP_ATOMIC))) 2507 GFP_ATOMIC)))
2484 goto rearm_timer; 2508 goto rearm_timer;
2485 skb = tcp_write_queue_next(sk, skb); 2509 skb = skb_rb_next(skb);
2486 } 2510 }
2487 2511
2488 if (WARN_ON(!skb || !tcp_skb_pcount(skb))) 2512 if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
@@ -2682,7 +2706,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
2682static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) 2706static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2683{ 2707{
2684 struct tcp_sock *tp = tcp_sk(sk); 2708 struct tcp_sock *tp = tcp_sk(sk);
2685 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); 2709 struct sk_buff *next_skb = skb_rb_next(skb);
2686 int skb_size, next_skb_size; 2710 int skb_size, next_skb_size;
2687 2711
2688 skb_size = skb->len; 2712 skb_size = skb->len;
@@ -2697,9 +2721,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2697 else if (!skb_shift(skb, next_skb, next_skb_size)) 2721 else if (!skb_shift(skb, next_skb, next_skb_size))
2698 return false; 2722 return false;
2699 } 2723 }
2700 tcp_highest_sack_combine(sk, next_skb, skb); 2724 tcp_highest_sack_replace(sk, next_skb, skb);
2701
2702 tcp_unlink_write_queue(next_skb, sk);
2703 2725
2704 if (next_skb->ip_summed == CHECKSUM_PARTIAL) 2726 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
2705 skb->ip_summed = CHECKSUM_PARTIAL; 2727 skb->ip_summed = CHECKSUM_PARTIAL;
@@ -2728,7 +2750,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2728 2750
2729 tcp_skb_collapse_tstamp(skb, next_skb); 2751 tcp_skb_collapse_tstamp(skb, next_skb);
2730 2752
2731 sk_wmem_free_skb(sk, next_skb); 2753 tcp_rtx_queue_unlink_and_free(next_skb, sk);
2732 return true; 2754 return true;
2733} 2755}
2734 2756
@@ -2739,8 +2761,6 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
2739 return false; 2761 return false;
2740 if (skb_cloned(skb)) 2762 if (skb_cloned(skb))
2741 return false; 2763 return false;
2742 if (skb == tcp_send_head(sk))
2743 return false;
2744 /* Some heuristics for collapsing over SACK'd could be invented */ 2764 /* Some heuristics for collapsing over SACK'd could be invented */
2745 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) 2765 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2746 return false; 2766 return false;
@@ -2758,12 +2778,12 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2758 struct sk_buff *skb = to, *tmp; 2778 struct sk_buff *skb = to, *tmp;
2759 bool first = true; 2779 bool first = true;
2760 2780
2761 if (!sysctl_tcp_retrans_collapse) 2781 if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
2762 return; 2782 return;
2763 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) 2783 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2764 return; 2784 return;
2765 2785
2766 tcp_for_write_queue_from_safe(skb, tmp, sk) { 2786 skb_rbtree_walk_from_safe(skb, tmp) {
2767 if (!tcp_can_collapse(sk, skb)) 2787 if (!tcp_can_collapse(sk, skb))
2768 break; 2788 break;
2769 2789
@@ -2838,7 +2858,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2838 2858
2839 len = cur_mss * segs; 2859 len = cur_mss * segs;
2840 if (skb->len > len) { 2860 if (skb->len > len) {
2841 if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC)) 2861 if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
2862 cur_mss, GFP_ATOMIC))
2842 return -ENOMEM; /* We'll try again later. */ 2863 return -ENOMEM; /* We'll try again later. */
2843 } else { 2864 } else {
2844 if (skb_unclone(skb, GFP_ATOMIC)) 2865 if (skb_unclone(skb, GFP_ATOMIC))
@@ -2872,17 +2893,23 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2872 skb_headroom(skb) >= 0xFFFF)) { 2893 skb_headroom(skb) >= 0xFFFF)) {
2873 struct sk_buff *nskb; 2894 struct sk_buff *nskb;
2874 2895
2875 nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); 2896 tcp_skb_tsorted_save(skb) {
2876 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : 2897 nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
2877 -ENOBUFS; 2898 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2878 if (!err) 2899 -ENOBUFS;
2879 skb->skb_mstamp = tp->tcp_mstamp; 2900 } tcp_skb_tsorted_restore(skb);
2901
2902 if (!err) {
2903 tcp_update_skb_after_send(tp, skb);
2904 tcp_rate_skb_sent(sk, skb);
2905 }
2880 } else { 2906 } else {
2881 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2907 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2882 } 2908 }
2883 2909
2884 if (likely(!err)) { 2910 if (likely(!err)) {
2885 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; 2911 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
2912 trace_tcp_retransmit_skb(sk, skb);
2886 } else if (err != -EBUSY) { 2913 } else if (err != -EBUSY) {
2887 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); 2914 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
2888 } 2915 }
@@ -2919,36 +2946,25 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2919 * retransmitted data is acknowledged. It tries to continue 2946 * retransmitted data is acknowledged. It tries to continue
2920 * resending the rest of the retransmit queue, until either 2947 * resending the rest of the retransmit queue, until either
2921 * we've sent it all or the congestion window limit is reached. 2948 * we've sent it all or the congestion window limit is reached.
2922 * If doing SACK, the first ACK which comes back for a timeout
2923 * based retransmit packet might feed us FACK information again.
2924 * If so, we use it to avoid unnecessarily retransmissions.
2925 */ 2949 */
2926void tcp_xmit_retransmit_queue(struct sock *sk) 2950void tcp_xmit_retransmit_queue(struct sock *sk)
2927{ 2951{
2928 const struct inet_connection_sock *icsk = inet_csk(sk); 2952 const struct inet_connection_sock *icsk = inet_csk(sk);
2953 struct sk_buff *skb, *rtx_head, *hole = NULL;
2929 struct tcp_sock *tp = tcp_sk(sk); 2954 struct tcp_sock *tp = tcp_sk(sk);
2930 struct sk_buff *skb;
2931 struct sk_buff *hole = NULL;
2932 u32 max_segs; 2955 u32 max_segs;
2933 int mib_idx; 2956 int mib_idx;
2934 2957
2935 if (!tp->packets_out) 2958 if (!tp->packets_out)
2936 return; 2959 return;
2937 2960
2938 if (tp->retransmit_skb_hint) { 2961 rtx_head = tcp_rtx_queue_head(sk);
2939 skb = tp->retransmit_skb_hint; 2962 skb = tp->retransmit_skb_hint ?: rtx_head;
2940 } else {
2941 skb = tcp_write_queue_head(sk);
2942 }
2943
2944 max_segs = tcp_tso_segs(sk, tcp_current_mss(sk)); 2963 max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
2945 tcp_for_write_queue_from(skb, sk) { 2964 skb_rbtree_walk_from(skb) {
2946 __u8 sacked; 2965 __u8 sacked;
2947 int segs; 2966 int segs;
2948 2967
2949 if (skb == tcp_send_head(sk))
2950 break;
2951
2952 if (tcp_pacing_check(sk)) 2968 if (tcp_pacing_check(sk))
2953 break; 2969 break;
2954 2970
@@ -2993,7 +3009,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
2993 if (tcp_in_cwnd_reduction(sk)) 3009 if (tcp_in_cwnd_reduction(sk))
2994 tp->prr_out += tcp_skb_pcount(skb); 3010 tp->prr_out += tcp_skb_pcount(skb);
2995 3011
2996 if (skb == tcp_write_queue_head(sk) && 3012 if (skb == rtx_head &&
2997 icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT) 3013 icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT)
2998 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 3014 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2999 inet_csk(sk)->icsk_rto, 3015 inet_csk(sk)->icsk_rto,
@@ -3035,12 +3051,15 @@ void tcp_send_fin(struct sock *sk)
3035 * Note: in the latter case, FIN packet will be sent after a timeout, 3051 * Note: in the latter case, FIN packet will be sent after a timeout,
3036 * as TCP stack thinks it has already been transmitted. 3052 * as TCP stack thinks it has already been transmitted.
3037 */ 3053 */
3038 if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) { 3054 if (!tskb && tcp_under_memory_pressure(sk))
3055 tskb = skb_rb_last(&sk->tcp_rtx_queue);
3056
3057 if (tskb) {
3039coalesce: 3058coalesce:
3040 TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; 3059 TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
3041 TCP_SKB_CB(tskb)->end_seq++; 3060 TCP_SKB_CB(tskb)->end_seq++;
3042 tp->write_seq++; 3061 tp->write_seq++;
3043 if (!tcp_send_head(sk)) { 3062 if (tcp_write_queue_empty(sk)) {
3044 /* This means tskb was already sent. 3063 /* This means tskb was already sent.
3045 * Pretend we included the FIN on previous transmit. 3064 * Pretend we included the FIN on previous transmit.
3046 * We need to set tp->snd_nxt to the value it would have 3065 * We need to set tp->snd_nxt to the value it would have
@@ -3057,6 +3076,7 @@ coalesce:
3057 goto coalesce; 3076 goto coalesce;
3058 return; 3077 return;
3059 } 3078 }
3079 INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
3060 skb_reserve(skb, MAX_TCP_HEADER); 3080 skb_reserve(skb, MAX_TCP_HEADER);
3061 sk_forced_mem_schedule(sk, skb->truesize); 3081 sk_forced_mem_schedule(sk, skb->truesize);
3062 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ 3082 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
@@ -3093,6 +3113,11 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
3093 /* Send it off. */ 3113 /* Send it off. */
3094 if (tcp_transmit_skb(sk, skb, 0, priority)) 3114 if (tcp_transmit_skb(sk, skb, 0, priority))
3095 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); 3115 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
3116
3117 /* skb of trace_tcp_send_reset() keeps the skb that caused RST,
3118 * skb here is different to the troublesome skb, so use NULL
3119 */
3120 trace_tcp_send_reset(sk, NULL);
3096} 3121}
3097 3122
3098/* Send a crossed SYN-ACK during socket establishment. 3123/* Send a crossed SYN-ACK during socket establishment.
@@ -3105,20 +3130,24 @@ int tcp_send_synack(struct sock *sk)
3105{ 3130{
3106 struct sk_buff *skb; 3131 struct sk_buff *skb;
3107 3132
3108 skb = tcp_write_queue_head(sk); 3133 skb = tcp_rtx_queue_head(sk);
3109 if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { 3134 if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
3110 pr_debug("%s: wrong queue state\n", __func__); 3135 pr_err("%s: wrong queue state\n", __func__);
3111 return -EFAULT; 3136 return -EFAULT;
3112 } 3137 }
3113 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { 3138 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
3114 if (skb_cloned(skb)) { 3139 if (skb_cloned(skb)) {
3115 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); 3140 struct sk_buff *nskb;
3141
3142 tcp_skb_tsorted_save(skb) {
3143 nskb = skb_copy(skb, GFP_ATOMIC);
3144 } tcp_skb_tsorted_restore(skb);
3116 if (!nskb) 3145 if (!nskb)
3117 return -ENOMEM; 3146 return -ENOMEM;
3118 tcp_unlink_write_queue(skb, sk); 3147 INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor);
3148 tcp_rtx_queue_unlink_and_free(skb, sk);
3119 __skb_header_release(nskb); 3149 __skb_header_release(nskb);
3120 __tcp_add_write_queue_head(sk, nskb); 3150 tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
3121 sk_wmem_free_skb(sk, skb);
3122 sk->sk_wmem_queued += nskb->truesize; 3151 sk->sk_wmem_queued += nskb->truesize;
3123 sk_mem_charge(sk, nskb->truesize); 3152 sk_mem_charge(sk, nskb->truesize);
3124 skb = nskb; 3153 skb = nskb;
@@ -3195,8 +3224,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
3195 md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); 3224 md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
3196#endif 3225#endif
3197 skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); 3226 skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
3198 tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) + 3227 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
3199 sizeof(*th); 3228 foc) + sizeof(*th);
3200 3229
3201 skb_push(skb, tcp_header_size); 3230 skb_push(skb, tcp_header_size);
3202 skb_reset_transport_header(skb); 3231 skb_reset_transport_header(skb);
@@ -3209,13 +3238,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
3209 th->source = htons(ireq->ir_num); 3238 th->source = htons(ireq->ir_num);
3210 th->dest = ireq->ir_rmt_port; 3239 th->dest = ireq->ir_rmt_port;
3211 skb->mark = ireq->ir_mark; 3240 skb->mark = ireq->ir_mark;
3212 /* Setting of flags are superfluous here for callers (and ECE is 3241 skb->ip_summed = CHECKSUM_PARTIAL;
3213 * not even correctly set) 3242 th->seq = htonl(tcp_rsk(req)->snt_isn);
3214 */
3215 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
3216 TCPHDR_SYN | TCPHDR_ACK);
3217
3218 th->seq = htonl(TCP_SKB_CB(skb)->seq);
3219 /* XXX data is queued and acked as is. No buffer/window check */ 3243 /* XXX data is queued and acked as is. No buffer/window check */
3220 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); 3244 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
3221 3245
@@ -3302,7 +3326,7 @@ static void tcp_connect_init(struct sock *sk)
3302 if (rcv_wnd == 0) 3326 if (rcv_wnd == 0)
3303 rcv_wnd = dst_metric(dst, RTAX_INITRWND); 3327 rcv_wnd = dst_metric(dst, RTAX_INITRWND);
3304 3328
3305 tcp_select_initial_window(tcp_full_space(sk), 3329 tcp_select_initial_window(sk, tcp_full_space(sk),
3306 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), 3330 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
3307 &tp->rcv_wnd, 3331 &tp->rcv_wnd,
3308 &tp->window_clamp, 3332 &tp->window_clamp,
@@ -3341,7 +3365,6 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
3341 3365
3342 tcb->end_seq += skb->len; 3366 tcb->end_seq += skb->len;
3343 __skb_header_release(skb); 3367 __skb_header_release(skb);
3344 __tcp_add_write_queue_tail(sk, skb);
3345 sk->sk_wmem_queued += skb->truesize; 3368 sk->sk_wmem_queued += skb->truesize;
3346 sk_mem_charge(sk, skb->truesize); 3369 sk_mem_charge(sk, skb->truesize);
3347 tp->write_seq = tcb->end_seq; 3370 tp->write_seq = tcb->end_seq;
@@ -3389,6 +3412,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3389 int copied = copy_from_iter(skb_put(syn_data, space), space, 3412 int copied = copy_from_iter(skb_put(syn_data, space), space,
3390 &fo->data->msg_iter); 3413 &fo->data->msg_iter);
3391 if (unlikely(!copied)) { 3414 if (unlikely(!copied)) {
3415 tcp_skb_tsorted_anchor_cleanup(syn_data);
3392 kfree_skb(syn_data); 3416 kfree_skb(syn_data);
3393 goto fallback; 3417 goto fallback;
3394 } 3418 }
@@ -3419,10 +3443,15 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3419 TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH; 3443 TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
3420 if (!err) { 3444 if (!err) {
3421 tp->syn_data = (fo->copied > 0); 3445 tp->syn_data = (fo->copied > 0);
3446 tcp_rbtree_insert(&sk->tcp_rtx_queue, syn_data);
3422 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT); 3447 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
3423 goto done; 3448 goto done;
3424 } 3449 }
3425 3450
3451 /* data was not sent, put it in write_queue */
3452 __skb_queue_tail(&sk->sk_write_queue, syn_data);
3453 tp->packets_out -= tcp_skb_pcount(syn_data);
3454
3426fallback: 3455fallback:
3427 /* Send a regular SYN with Fast Open cookie request option */ 3456 /* Send a regular SYN with Fast Open cookie request option */
3428 if (fo->cookie.len > 0) 3457 if (fo->cookie.len > 0)
@@ -3463,6 +3492,7 @@ int tcp_connect(struct sock *sk)
3463 tp->retrans_stamp = tcp_time_stamp(tp); 3492 tp->retrans_stamp = tcp_time_stamp(tp);
3464 tcp_connect_queue_skb(sk, buff); 3493 tcp_connect_queue_skb(sk, buff);
3465 tcp_ecn_send_syn(sk, buff); 3494 tcp_ecn_send_syn(sk, buff);
3495 tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
3466 3496
3467 /* Send off SYN; include data in Fast Open. */ 3497 /* Send off SYN; include data in Fast Open. */
3468 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : 3498 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
@@ -3475,6 +3505,11 @@ int tcp_connect(struct sock *sk)
3475 */ 3505 */
3476 tp->snd_nxt = tp->write_seq; 3506 tp->snd_nxt = tp->write_seq;
3477 tp->pushed_seq = tp->write_seq; 3507 tp->pushed_seq = tp->write_seq;
3508 buff = tcp_send_head(sk);
3509 if (unlikely(buff)) {
3510 tp->snd_nxt = TCP_SKB_CB(buff)->seq;
3511 tp->pushed_seq = TCP_SKB_CB(buff)->seq;
3512 }
3478 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS); 3513 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
3479 3514
3480 /* Timer for repeating the SYN until an answer. */ 3515 /* Timer for repeating the SYN until an answer. */
@@ -3652,7 +3687,8 @@ int tcp_write_wakeup(struct sock *sk, int mib)
3652 skb->len > mss) { 3687 skb->len > mss) {
3653 seg_size = min(seg_size, mss); 3688 seg_size = min(seg_size, mss);
3654 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; 3689 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3655 if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC)) 3690 if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
3691 skb, seg_size, mss, GFP_ATOMIC))
3656 return -1; 3692 return -1;
3657 } else if (!tcp_skb_pcount(skb)) 3693 } else if (!tcp_skb_pcount(skb))
3658 tcp_set_skb_tso_segs(skb, mss); 3694 tcp_set_skb_tso_segs(skb, mss);
@@ -3682,7 +3718,7 @@ void tcp_send_probe0(struct sock *sk)
3682 3718
3683 err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE); 3719 err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
3684 3720
3685 if (tp->packets_out || !tcp_send_head(sk)) { 3721 if (tp->packets_out || tcp_write_queue_empty(sk)) {
3686 /* Cancel probe timer, if it is not required. */ 3722 /* Cancel probe timer, if it is not required. */
3687 icsk->icsk_probes_out = 0; 3723 icsk->icsk_probes_out = 0;
3688 icsk->icsk_backoff = 0; 3724 icsk->icsk_backoff = 0;
@@ -3723,6 +3759,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
3723 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); 3759 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
3724 if (unlikely(tcp_passive_fastopen(sk))) 3760 if (unlikely(tcp_passive_fastopen(sk)))
3725 tcp_sk(sk)->total_retrans++; 3761 tcp_sk(sk)->total_retrans++;
3762 trace_tcp_retransmit_synack(sk, req);
3726 } 3763 }
3727 return res; 3764 return res;
3728} 3765}
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 449cd914d58e..d3ea89020c69 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -1,8 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/tcp.h> 2#include <linux/tcp.h>
2#include <net/tcp.h> 3#include <net/tcp.h>
3 4
4int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOSS_DETECTION;
5
6static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb) 5static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
7{ 6{
8 struct tcp_sock *tp = tcp_sk(sk); 7 struct tcp_sock *tp = tcp_sk(sk);
@@ -45,7 +44,8 @@ static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
45static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) 44static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
46{ 45{
47 struct tcp_sock *tp = tcp_sk(sk); 46 struct tcp_sock *tp = tcp_sk(sk);
48 struct sk_buff *skb; 47 u32 min_rtt = tcp_min_rtt(tp);
48 struct sk_buff *skb, *n;
49 u32 reo_wnd; 49 u32 reo_wnd;
50 50
51 *reo_timeout = 0; 51 *reo_timeout = 0;
@@ -55,48 +55,36 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
55 * to queuing or delayed ACKs. 55 * to queuing or delayed ACKs.
56 */ 56 */
57 reo_wnd = 1000; 57 reo_wnd = 1000;
58 if ((tp->rack.reord || !tp->lost_out) && tcp_min_rtt(tp) != ~0U) 58 if ((tp->rack.reord || !tp->lost_out) && min_rtt != ~0U) {
59 reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd); 59 reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd);
60 reo_wnd = min(reo_wnd, tp->srtt_us >> 3);
61 }
60 62
61 tcp_for_write_queue(skb, sk) { 63 list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue,
64 tcp_tsorted_anchor) {
62 struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 65 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
66 s32 remaining;
63 67
64 if (skb == tcp_send_head(sk)) 68 /* Skip ones marked lost but not yet retransmitted */
65 break; 69 if ((scb->sacked & TCPCB_LOST) &&
66 70 !(scb->sacked & TCPCB_SACKED_RETRANS))
67 /* Skip ones already (s)acked */
68 if (!after(scb->end_seq, tp->snd_una) ||
69 scb->sacked & TCPCB_SACKED_ACKED)
70 continue; 71 continue;
71 72
72 if (tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp, 73 if (!tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp,
73 tp->rack.end_seq, scb->end_seq)) { 74 tp->rack.end_seq, scb->end_seq))
74 /* Step 3 in draft-cheng-tcpm-rack-00.txt: 75 break;
75 * A packet is lost if its elapsed time is beyond
76 * the recent RTT plus the reordering window.
77 */
78 u32 elapsed = tcp_stamp_us_delta(tp->tcp_mstamp,
79 skb->skb_mstamp);
80 s32 remaining = tp->rack.rtt_us + reo_wnd - elapsed;
81
82 if (remaining < 0) {
83 tcp_rack_mark_skb_lost(sk, skb);
84 continue;
85 }
86
87 /* Skip ones marked lost but not yet retransmitted */
88 if ((scb->sacked & TCPCB_LOST) &&
89 !(scb->sacked & TCPCB_SACKED_RETRANS))
90 continue;
91 76
77 /* A packet is lost if it has not been s/acked beyond
78 * the recent RTT plus the reordering window.
79 */
80 remaining = tp->rack.rtt_us + reo_wnd -
81 tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp);
82 if (remaining < 0) {
83 tcp_rack_mark_skb_lost(sk, skb);
84 list_del_init(&skb->tcp_tsorted_anchor);
85 } else {
92 /* Record maximum wait time (+1 to avoid 0) */ 86 /* Record maximum wait time (+1 to avoid 0) */
93 *reo_timeout = max_t(u32, *reo_timeout, 1 + remaining); 87 *reo_timeout = max_t(u32, *reo_timeout, 1 + remaining);
94
95 } else if (!(scb->sacked & TCPCB_RETRANS)) {
96 /* Original data are sent sequentially so stop early
97 * b/c the rest are all sent after rack_sent
98 */
99 break;
100 } 88 }
101 } 89 }
102} 90}
@@ -175,3 +163,44 @@ void tcp_rack_reo_timeout(struct sock *sk)
175 if (inet_csk(sk)->icsk_pending != ICSK_TIME_RETRANS) 163 if (inet_csk(sk)->icsk_pending != ICSK_TIME_RETRANS)
176 tcp_rearm_rto(sk); 164 tcp_rearm_rto(sk);
177} 165}
166
167/* Updates the RACK's reo_wnd based on DSACK and no. of recoveries.
168 *
169 * If DSACK is received, increment reo_wnd by min_rtt/4 (upper bounded
170 * by srtt), since there is possibility that spurious retransmission was
171 * due to reordering delay longer than reo_wnd.
172 *
173 * Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16)
174 * no. of successful recoveries (accounts for full DSACK-based loss
175 * recovery undo). After that, reset it to default (min_rtt/4).
176 *
177 * At max, reo_wnd is incremented only once per rtt. So that the new
178 * DSACK on which we are reacting, is due to the spurious retx (approx)
179 * after the reo_wnd has been updated last time.
180 *
181 * reo_wnd is tracked in terms of steps (of min_rtt/4), rather than
182 * absolute value to account for change in rtt.
183 */
184void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
185{
186 struct tcp_sock *tp = tcp_sk(sk);
187
188 if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND ||
189 !rs->prior_delivered)
190 return;
191
192 /* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */
193 if (before(rs->prior_delivered, tp->rack.last_delivered))
194 tp->rack.dsack_seen = 0;
195
196 /* Adjust the reo_wnd if update is pending */
197 if (tp->rack.dsack_seen) {
198 tp->rack.reo_wnd_steps = min_t(u32, 0xFF,
199 tp->rack.reo_wnd_steps + 1);
200 tp->rack.dsack_seen = 0;
201 tp->rack.last_delivered = tp->delivered;
202 tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH;
203 } else if (!tp->rack.reo_wnd_persist) {
204 tp->rack.reo_wnd_steps = 1;
205 }
206}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 655dd8d7f064..16df6dd44b98 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -22,8 +22,6 @@
22#include <linux/gfp.h> 22#include <linux/gfp.h>
23#include <net/tcp.h> 23#include <net/tcp.h>
24 24
25int sysctl_tcp_thin_linear_timeouts __read_mostly;
26
27/** 25/**
28 * tcp_write_err() - close socket and save error info 26 * tcp_write_err() - close socket and save error info
29 * @sk: The socket the error has appeared on. 27 * @sk: The socket the error has appeared on.
@@ -109,26 +107,23 @@ static int tcp_orphan_retries(struct sock *sk, bool alive)
109 107
110static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) 108static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
111{ 109{
112 struct net *net = sock_net(sk); 110 const struct net *net = sock_net(sk);
111 int mss;
113 112
114 /* Black hole detection */ 113 /* Black hole detection */
115 if (net->ipv4.sysctl_tcp_mtu_probing) { 114 if (!net->ipv4.sysctl_tcp_mtu_probing)
116 if (!icsk->icsk_mtup.enabled) { 115 return;
117 icsk->icsk_mtup.enabled = 1; 116
118 icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; 117 if (!icsk->icsk_mtup.enabled) {
119 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); 118 icsk->icsk_mtup.enabled = 1;
120 } else { 119 icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
121 struct net *net = sock_net(sk); 120 } else {
122 struct tcp_sock *tp = tcp_sk(sk); 121 mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
123 int mss; 122 mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
124 123 mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len);
125 mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; 124 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
126 mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
127 mss = max(mss, 68 - tp->tcp_header_len);
128 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
129 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
130 }
131 } 125 }
126 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
132} 127}
133 128
134 129
@@ -156,8 +151,13 @@ static bool retransmits_timed_out(struct sock *sk,
156 return false; 151 return false;
157 152
158 start_ts = tcp_sk(sk)->retrans_stamp; 153 start_ts = tcp_sk(sk)->retrans_stamp;
159 if (unlikely(!start_ts)) 154 if (unlikely(!start_ts)) {
160 start_ts = tcp_skb_timestamp(tcp_write_queue_head(sk)); 155 struct sk_buff *head = tcp_rtx_queue_head(sk);
156
157 if (!head)
158 return false;
159 start_ts = tcp_skb_timestamp(head);
160 }
161 161
162 if (likely(timeout == 0)) { 162 if (likely(timeout == 0)) {
163 linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base); 163 linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
@@ -283,15 +283,17 @@ out:
283 * 283 *
284 * Returns: Nothing (void) 284 * Returns: Nothing (void)
285 */ 285 */
286static void tcp_delack_timer(unsigned long data) 286static void tcp_delack_timer(struct timer_list *t)
287{ 287{
288 struct sock *sk = (struct sock *)data; 288 struct inet_connection_sock *icsk =
289 from_timer(icsk, t, icsk_delack_timer);
290 struct sock *sk = &icsk->icsk_inet.sk;
289 291
290 bh_lock_sock(sk); 292 bh_lock_sock(sk);
291 if (!sock_owned_by_user(sk)) { 293 if (!sock_owned_by_user(sk)) {
292 tcp_delack_timer_handler(sk); 294 tcp_delack_timer_handler(sk);
293 } else { 295 } else {
294 inet_csk(sk)->icsk_ack.blocked = 1; 296 icsk->icsk_ack.blocked = 1;
295 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); 297 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
296 /* deleguate our work to tcp_release_cb() */ 298 /* deleguate our work to tcp_release_cb() */
297 if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags)) 299 if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
@@ -304,11 +306,12 @@ static void tcp_delack_timer(unsigned long data)
304static void tcp_probe_timer(struct sock *sk) 306static void tcp_probe_timer(struct sock *sk)
305{ 307{
306 struct inet_connection_sock *icsk = inet_csk(sk); 308 struct inet_connection_sock *icsk = inet_csk(sk);
309 struct sk_buff *skb = tcp_send_head(sk);
307 struct tcp_sock *tp = tcp_sk(sk); 310 struct tcp_sock *tp = tcp_sk(sk);
308 int max_probes; 311 int max_probes;
309 u32 start_ts; 312 u32 start_ts;
310 313
311 if (tp->packets_out || !tcp_send_head(sk)) { 314 if (tp->packets_out || !skb) {
312 icsk->icsk_probes_out = 0; 315 icsk->icsk_probes_out = 0;
313 return; 316 return;
314 } 317 }
@@ -321,9 +324,9 @@ static void tcp_probe_timer(struct sock *sk)
321 * corresponding system limit. We also implement similar policy when 324 * corresponding system limit. We also implement similar policy when
322 * we use RTO to probe window in tcp_retransmit_timer(). 325 * we use RTO to probe window in tcp_retransmit_timer().
323 */ 326 */
324 start_ts = tcp_skb_timestamp(tcp_send_head(sk)); 327 start_ts = tcp_skb_timestamp(skb);
325 if (!start_ts) 328 if (!start_ts)
326 tcp_send_head(sk)->skb_mstamp = tp->tcp_mstamp; 329 skb->skb_mstamp = tp->tcp_mstamp;
327 else if (icsk->icsk_user_timeout && 330 else if (icsk->icsk_user_timeout &&
328 (s32)(tcp_time_stamp(tp) - start_ts) > 331 (s32)(tcp_time_stamp(tp) - start_ts) >
329 jiffies_to_msecs(icsk->icsk_user_timeout)) 332 jiffies_to_msecs(icsk->icsk_user_timeout))
@@ -408,7 +411,7 @@ void tcp_retransmit_timer(struct sock *sk)
408 if (!tp->packets_out) 411 if (!tp->packets_out)
409 goto out; 412 goto out;
410 413
411 WARN_ON(tcp_write_queue_empty(sk)); 414 WARN_ON(tcp_rtx_queue_empty(sk));
412 415
413 tp->tlp_high_seq = 0; 416 tp->tlp_high_seq = 0;
414 417
@@ -441,7 +444,7 @@ void tcp_retransmit_timer(struct sock *sk)
441 goto out; 444 goto out;
442 } 445 }
443 tcp_enter_loss(sk); 446 tcp_enter_loss(sk);
444 tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1); 447 tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1);
445 __sk_dst_reset(sk); 448 __sk_dst_reset(sk);
446 goto out_reset_timer; 449 goto out_reset_timer;
447 } 450 }
@@ -473,7 +476,7 @@ void tcp_retransmit_timer(struct sock *sk)
473 476
474 tcp_enter_loss(sk); 477 tcp_enter_loss(sk);
475 478
476 if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1) > 0) { 479 if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) {
477 /* Retransmission failed because of local congestion, 480 /* Retransmission failed because of local congestion,
478 * do not backoff. 481 * do not backoff.
479 */ 482 */
@@ -514,7 +517,7 @@ out_reset_timer:
514 * linear-timeout retransmissions into a black hole 517 * linear-timeout retransmissions into a black hole
515 */ 518 */
516 if (sk->sk_state == TCP_ESTABLISHED && 519 if (sk->sk_state == TCP_ESTABLISHED &&
517 (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) && 520 (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
518 tcp_stream_is_thin(tp) && 521 tcp_stream_is_thin(tp) &&
519 icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { 522 icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
520 icsk->icsk_backoff = 0; 523 icsk->icsk_backoff = 0;
@@ -570,9 +573,11 @@ out:
570 sk_mem_reclaim(sk); 573 sk_mem_reclaim(sk);
571} 574}
572 575
573static void tcp_write_timer(unsigned long data) 576static void tcp_write_timer(struct timer_list *t)
574{ 577{
575 struct sock *sk = (struct sock *)data; 578 struct inet_connection_sock *icsk =
579 from_timer(icsk, t, icsk_retransmit_timer);
580 struct sock *sk = &icsk->icsk_inet.sk;
576 581
577 bh_lock_sock(sk); 582 bh_lock_sock(sk);
578 if (!sock_owned_by_user(sk)) { 583 if (!sock_owned_by_user(sk)) {
@@ -607,9 +612,9 @@ void tcp_set_keepalive(struct sock *sk, int val)
607EXPORT_SYMBOL_GPL(tcp_set_keepalive); 612EXPORT_SYMBOL_GPL(tcp_set_keepalive);
608 613
609 614
610static void tcp_keepalive_timer (unsigned long data) 615static void tcp_keepalive_timer (struct timer_list *t)
611{ 616{
612 struct sock *sk = (struct sock *) data; 617 struct sock *sk = from_timer(sk, t, sk_timer);
613 struct inet_connection_sock *icsk = inet_csk(sk); 618 struct inet_connection_sock *icsk = inet_csk(sk);
614 struct tcp_sock *tp = tcp_sk(sk); 619 struct tcp_sock *tp = tcp_sk(sk);
615 u32 elapsed; 620 u32 elapsed;
@@ -647,7 +652,7 @@ static void tcp_keepalive_timer (unsigned long data)
647 elapsed = keepalive_time_when(tp); 652 elapsed = keepalive_time_when(tp);
648 653
649 /* It is alive without keepalive 8) */ 654 /* It is alive without keepalive 8) */
650 if (tp->packets_out || tcp_send_head(sk)) 655 if (tp->packets_out || !tcp_write_queue_empty(sk))
651 goto resched; 656 goto resched;
652 657
653 elapsed = keepalive_time_elapsed(tp); 658 elapsed = keepalive_time_elapsed(tp);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 218cfcc77650..ee113ff15fd0 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -158,7 +158,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
158 158
159static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) 159static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
160{ 160{
161 return min(tp->snd_ssthresh, tp->snd_cwnd-1); 161 return min(tp->snd_ssthresh, tp->snd_cwnd);
162} 162}
163 163
164static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) 164static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h
index 248cfc0ff9ae..4f24d0e37d9c 100644
--- a/net/ipv4/tcp_vegas.h
+++ b/net/ipv4/tcp_vegas.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * TCP Vegas congestion control interface 3 * TCP Vegas congestion control interface
3 */ 4 */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ef29df8648e4..e4ff25c947c5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -231,10 +231,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot)
231 } 231 }
232 } 232 }
233 233
234 /* Initial allocation may have already happened via setsockopt */ 234 return reuseport_alloc(sk);
235 if (!rcu_access_pointer(sk->sk_reuseport_cb))
236 return reuseport_alloc(sk);
237 return 0;
238} 235}
239 236
240/** 237/**
@@ -1061,7 +1058,7 @@ back_from_confirm:
1061 /* ... which is an evident application bug. --ANK */ 1058 /* ... which is an evident application bug. --ANK */
1062 release_sock(sk); 1059 release_sock(sk);
1063 1060
1064 net_dbg_ratelimited("cork app bug 2\n"); 1061 net_dbg_ratelimited("socket already corked\n");
1065 err = -EINVAL; 1062 err = -EINVAL;
1066 goto out; 1063 goto out;
1067 } 1064 }
@@ -1144,7 +1141,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
1144 if (unlikely(!up->pending)) { 1141 if (unlikely(!up->pending)) {
1145 release_sock(sk); 1142 release_sock(sk);
1146 1143
1147 net_dbg_ratelimited("udp cork app bug 3\n"); 1144 net_dbg_ratelimited("cork failed\n");
1148 return -EINVAL; 1145 return -EINVAL;
1149 } 1146 }
1150 1147
@@ -1212,8 +1209,7 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
1212 if (likely(partial)) { 1209 if (likely(partial)) {
1213 up->forward_deficit += size; 1210 up->forward_deficit += size;
1214 size = up->forward_deficit; 1211 size = up->forward_deficit;
1215 if (size < (sk->sk_rcvbuf >> 2) && 1212 if (size < (sk->sk_rcvbuf >> 2))
1216 !skb_queue_empty(&up->reader_queue))
1217 return; 1213 return;
1218 } else { 1214 } else {
1219 size += up->forward_deficit; 1215 size += up->forward_deficit;
@@ -1856,7 +1852,7 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1856 */ 1852 */
1857 1853
1858 /* if we're overly short, let UDP handle it */ 1854 /* if we're overly short, let UDP handle it */
1859 encap_rcv = ACCESS_ONCE(up->encap_rcv); 1855 encap_rcv = READ_ONCE(up->encap_rcv);
1860 if (encap_rcv) { 1856 if (encap_rcv) {
1861 int ret; 1857 int ret;
1862 1858
@@ -2221,9 +2217,10 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
2221 return NULL; 2217 return NULL;
2222} 2218}
2223 2219
2224void udp_v4_early_demux(struct sk_buff *skb) 2220int udp_v4_early_demux(struct sk_buff *skb)
2225{ 2221{
2226 struct net *net = dev_net(skb->dev); 2222 struct net *net = dev_net(skb->dev);
2223 struct in_device *in_dev = NULL;
2227 const struct iphdr *iph; 2224 const struct iphdr *iph;
2228 const struct udphdr *uh; 2225 const struct udphdr *uh;
2229 struct sock *sk = NULL; 2226 struct sock *sk = NULL;
@@ -2234,25 +2231,21 @@ void udp_v4_early_demux(struct sk_buff *skb)
2234 2231
2235 /* validate the packet */ 2232 /* validate the packet */
2236 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) 2233 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)))
2237 return; 2234 return 0;
2238 2235
2239 iph = ip_hdr(skb); 2236 iph = ip_hdr(skb);
2240 uh = udp_hdr(skb); 2237 uh = udp_hdr(skb);
2241 2238
2242 if (skb->pkt_type == PACKET_BROADCAST || 2239 if (skb->pkt_type == PACKET_MULTICAST) {
2243 skb->pkt_type == PACKET_MULTICAST) { 2240 in_dev = __in_dev_get_rcu(skb->dev);
2244 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
2245 2241
2246 if (!in_dev) 2242 if (!in_dev)
2247 return; 2243 return 0;
2248 2244
2249 /* we are supposed to accept bcast packets */ 2245 ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
2250 if (skb->pkt_type == PACKET_MULTICAST) { 2246 iph->protocol);
2251 ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, 2247 if (!ours)
2252 iph->protocol); 2248 return 0;
2253 if (!ours)
2254 return;
2255 }
2256 2249
2257 sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, 2250 sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
2258 uh->source, iph->saddr, 2251 uh->source, iph->saddr,
@@ -2263,7 +2256,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
2263 } 2256 }
2264 2257
2265 if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) 2258 if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
2266 return; 2259 return 0;
2267 2260
2268 skb->sk = sk; 2261 skb->sk = sk;
2269 skb->destructor = sock_efree; 2262 skb->destructor = sock_efree;
@@ -2272,12 +2265,23 @@ void udp_v4_early_demux(struct sk_buff *skb)
2272 if (dst) 2265 if (dst)
2273 dst = dst_check(dst, 0); 2266 dst = dst_check(dst, 0);
2274 if (dst) { 2267 if (dst) {
2268 u32 itag = 0;
2269
2275 /* set noref for now. 2270 /* set noref for now.
2276 * any place which wants to hold dst has to call 2271 * any place which wants to hold dst has to call
2277 * dst_hold_safe() 2272 * dst_hold_safe()
2278 */ 2273 */
2279 skb_dst_set_noref(skb, dst); 2274 skb_dst_set_noref(skb, dst);
2275
2276 /* for unconnected multicast sockets we need to validate
2277 * the source on each packet
2278 */
2279 if (!inet_sk(sk)->inet_daddr && in_dev)
2280 return ip_mc_validate_source(skb, iph->daddr,
2281 iph->saddr, iph->tos,
2282 skb->dev, in_dev, &itag);
2280 } 2283 }
2284 return 0;
2281} 2285}
2282 2286
2283int udp_rcv(struct sk_buff *skb) 2287int udp_rcv(struct sk_buff *skb)
@@ -2293,7 +2297,7 @@ void udp_destroy_sock(struct sock *sk)
2293 unlock_sock_fast(sk, slow); 2297 unlock_sock_fast(sk, slow);
2294 if (static_key_false(&udp_encap_needed) && up->encap_type) { 2298 if (static_key_false(&udp_encap_needed) && up->encap_type) {
2295 void (*encap_destroy)(struct sock *sk); 2299 void (*encap_destroy)(struct sock *sk);
2296 encap_destroy = ACCESS_ONCE(up->encap_destroy); 2300 encap_destroy = READ_ONCE(up->encap_destroy);
2297 if (encap_destroy) 2301 if (encap_destroy)
2298 encap_destroy(sk); 2302 encap_destroy(sk);
2299 } 2303 }
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index a8cf8c6fb60c..e7d18b140287 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef _UDP4_IMPL_H 2#ifndef _UDP4_IMPL_H
2#define _UDP4_IMPL_H 3#define _UDP4_IMPL_H
3#include <net/udp.h> 4#include <net/udp.h>
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 97658bfc1b58..01801b77bd0d 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -120,7 +120,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
120 * will be using a length value equal to only one MSS sized 120 * will be using a length value equal to only one MSS sized
121 * segment instead of the entire frame. 121 * segment instead of the entire frame.
122 */ 122 */
123 if (gso_partial) { 123 if (gso_partial && skb_is_gso(skb)) {
124 uh->len = htons(skb_shinfo(skb)->gso_size + 124 uh->len = htons(skb_shinfo(skb)->gso_size +
125 SKB_GSO_CB(skb)->data_offset + 125 SKB_GSO_CB(skb)->data_offset +
126 skb->head - (unsigned char *)uh); 126 skb->head - (unsigned char *)uh);
@@ -187,16 +187,57 @@ out_unlock:
187} 187}
188EXPORT_SYMBOL(skb_udp_tunnel_segment); 188EXPORT_SYMBOL(skb_udp_tunnel_segment);
189 189
190static struct sk_buff *udp4_tunnel_segment(struct sk_buff *skb, 190static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
191 netdev_features_t features) 191 netdev_features_t features)
192{ 192{
193 struct sk_buff *segs = ERR_PTR(-EINVAL); 193 struct sk_buff *segs = ERR_PTR(-EINVAL);
194 unsigned int mss;
195 __wsum csum;
196 struct udphdr *uh;
197 struct iphdr *iph;
194 198
195 if (skb->encapsulation && 199 if (skb->encapsulation &&
196 (skb_shinfo(skb)->gso_type & 200 (skb_shinfo(skb)->gso_type &
197 (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) 201 (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
198 segs = skb_udp_tunnel_segment(skb, features, false); 202 segs = skb_udp_tunnel_segment(skb, features, false);
203 goto out;
204 }
205
206 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
207 goto out;
208
209 mss = skb_shinfo(skb)->gso_size;
210 if (unlikely(skb->len <= mss))
211 goto out;
212
213 /* Do software UFO. Complete and fill in the UDP checksum as
214 * HW cannot do checksum of UDP packets sent as multiple
215 * IP fragments.
216 */
199 217
218 uh = udp_hdr(skb);
219 iph = ip_hdr(skb);
220
221 uh->check = 0;
222 csum = skb_checksum(skb, 0, skb->len, 0);
223 uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum);
224 if (uh->check == 0)
225 uh->check = CSUM_MANGLED_0;
226
227 skb->ip_summed = CHECKSUM_UNNECESSARY;
228
229 /* If there is no outer header we can fake a checksum offload
230 * due to the fact that we have already done the checksum in
231 * software prior to segmenting the frame.
232 */
233 if (!skb->encap_hdr_csum)
234 features |= NETIF_F_HW_CSUM;
235
236 /* Fragment the skb. IP headers of the fragments are updated in
237 * inet_gso_segment()
238 */
239 segs = skb_segment(skb, features);
240out:
200 return segs; 241 return segs;
201} 242}
202 243
@@ -330,7 +371,7 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
330 371
331static const struct net_offload udpv4_offload = { 372static const struct net_offload udpv4_offload = {
332 .callbacks = { 373 .callbacks = {
333 .gso_segment = udp4_tunnel_segment, 374 .gso_segment = udp4_ufo_fragment,
334 .gro_receive = udp4_gro_receive, 375 .gro_receive = udp4_gro_receive,
335 .gro_complete = udp4_gro_complete, 376 .gro_complete = udp4_gro_complete,
336 }, 377 },
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 1fc684111ce6..e50b7fea57ee 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * xfrm4_input.c 3 * xfrm4_input.c
3 * 4 *
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index d7bf0b041885..05017e2c849c 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * xfrm4_policy.c 3 * xfrm4_policy.c
3 * 4 *
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index d6660a8c0ea5..80c40b4981bb 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * xfrm4_state.c 3 * xfrm4_state.c
3 * 4 *
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 10e342363793..e0026fa1261b 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the Linux TCP/IP (INET6) layer. 3# Makefile for the Linux TCP/IP (INET6) layer.
3# 4#
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c2e2a78787ec..f49bd7897e95 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -152,11 +152,13 @@ static void ipv6_regen_rndid(struct inet6_dev *idev);
152static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); 152static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
153 153
154static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); 154static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
155static int ipv6_count_addresses(struct inet6_dev *idev); 155static int ipv6_count_addresses(const struct inet6_dev *idev);
156static int ipv6_generate_stable_address(struct in6_addr *addr, 156static int ipv6_generate_stable_address(struct in6_addr *addr,
157 u8 dad_count, 157 u8 dad_count,
158 const struct inet6_dev *idev); 158 const struct inet6_dev *idev);
159 159
160#define IN6_ADDR_HSIZE_SHIFT 8
161#define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT)
160/* 162/*
161 * Configured unicast address hash table 163 * Configured unicast address hash table
162 */ 164 */
@@ -186,14 +188,12 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp);
186static void addrconf_dad_work(struct work_struct *w); 188static void addrconf_dad_work(struct work_struct *w);
187static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id); 189static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id);
188static void addrconf_dad_run(struct inet6_dev *idev); 190static void addrconf_dad_run(struct inet6_dev *idev);
189static void addrconf_rs_timer(unsigned long data); 191static void addrconf_rs_timer(struct timer_list *t);
190static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); 192static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
191static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); 193static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
192 194
193static void inet6_prefix_notify(int event, struct inet6_dev *idev, 195static void inet6_prefix_notify(int event, struct inet6_dev *idev,
194 struct prefix_info *pinfo); 196 struct prefix_info *pinfo);
195static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
196 struct net_device *dev);
197 197
198static struct ipv6_devconf ipv6_devconf __read_mostly = { 198static struct ipv6_devconf ipv6_devconf __read_mostly = {
199 .forwarding = 0, 199 .forwarding = 0,
@@ -231,7 +231,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
231 .proxy_ndp = 0, 231 .proxy_ndp = 0,
232 .accept_source_route = 0, /* we do not accept RH0 by default. */ 232 .accept_source_route = 0, /* we do not accept RH0 by default. */
233 .disable_ipv6 = 0, 233 .disable_ipv6 = 0,
234 .accept_dad = 1, 234 .accept_dad = 0,
235 .suppress_frag_ndisc = 1, 235 .suppress_frag_ndisc = 1,
236 .accept_ra_mtu = 1, 236 .accept_ra_mtu = 1,
237 .stable_secret = { 237 .stable_secret = {
@@ -303,10 +303,10 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
303 .disable_policy = 0, 303 .disable_policy = 0,
304}; 304};
305 305
306/* Check if a valid qdisc is available */ 306/* Check if link is ready: is it up and is a valid qdisc available */
307static inline bool addrconf_qdisc_ok(const struct net_device *dev) 307static inline bool addrconf_link_ready(const struct net_device *dev)
308{ 308{
309 return !qdisc_tx_is_noop(dev); 309 return netif_oper_up(dev) && !qdisc_tx_is_noop(dev);
310} 310}
311 311
312static void addrconf_del_rs_timer(struct inet6_dev *idev) 312static void addrconf_del_rs_timer(struct inet6_dev *idev)
@@ -388,8 +388,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
388 rwlock_init(&ndev->lock); 388 rwlock_init(&ndev->lock);
389 ndev->dev = dev; 389 ndev->dev = dev;
390 INIT_LIST_HEAD(&ndev->addr_list); 390 INIT_LIST_HEAD(&ndev->addr_list);
391 setup_timer(&ndev->rs_timer, addrconf_rs_timer, 391 timer_setup(&ndev->rs_timer, addrconf_rs_timer, 0);
392 (unsigned long)ndev);
393 memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf)); 392 memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
394 393
395 if (ndev->cnf.stable_secret.initialized) 394 if (ndev->cnf.stable_secret.initialized)
@@ -451,7 +450,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
451 450
452 ndev->token = in6addr_any; 451 ndev->token = in6addr_any;
453 452
454 if (netif_running(dev) && addrconf_qdisc_ok(dev)) 453 if (netif_running(dev) && addrconf_link_ready(dev))
455 ndev->if_flags |= IF_READY; 454 ndev->if_flags |= IF_READY;
456 455
457 ipv6_mc_init_dev(ndev); 456 ipv6_mc_init_dev(ndev);
@@ -616,23 +615,23 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
616{ 615{
617 struct net *net = sock_net(in_skb->sk); 616 struct net *net = sock_net(in_skb->sk);
618 struct nlattr *tb[NETCONFA_MAX+1]; 617 struct nlattr *tb[NETCONFA_MAX+1];
618 struct inet6_dev *in6_dev = NULL;
619 struct net_device *dev = NULL;
619 struct netconfmsg *ncm; 620 struct netconfmsg *ncm;
620 struct sk_buff *skb; 621 struct sk_buff *skb;
621 struct ipv6_devconf *devconf; 622 struct ipv6_devconf *devconf;
622 struct inet6_dev *in6_dev;
623 struct net_device *dev;
624 int ifindex; 623 int ifindex;
625 int err; 624 int err;
626 625
627 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, 626 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
628 devconf_ipv6_policy, extack); 627 devconf_ipv6_policy, extack);
629 if (err < 0) 628 if (err < 0)
630 goto errout; 629 return err;
631 630
632 err = -EINVAL;
633 if (!tb[NETCONFA_IFINDEX]) 631 if (!tb[NETCONFA_IFINDEX])
634 goto errout; 632 return -EINVAL;
635 633
634 err = -EINVAL;
636 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); 635 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
637 switch (ifindex) { 636 switch (ifindex) {
638 case NETCONFA_IFINDEX_ALL: 637 case NETCONFA_IFINDEX_ALL:
@@ -642,10 +641,10 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
642 devconf = net->ipv6.devconf_dflt; 641 devconf = net->ipv6.devconf_dflt;
643 break; 642 break;
644 default: 643 default:
645 dev = __dev_get_by_index(net, ifindex); 644 dev = dev_get_by_index(net, ifindex);
646 if (!dev) 645 if (!dev)
647 goto errout; 646 return -EINVAL;
648 in6_dev = __in6_dev_get(dev); 647 in6_dev = in6_dev_get(dev);
649 if (!in6_dev) 648 if (!in6_dev)
650 goto errout; 649 goto errout;
651 devconf = &in6_dev->cnf; 650 devconf = &in6_dev->cnf;
@@ -653,7 +652,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
653 } 652 }
654 653
655 err = -ENOBUFS; 654 err = -ENOBUFS;
656 skb = nlmsg_new(inet6_netconf_msgsize_devconf(NETCONFA_ALL), GFP_ATOMIC); 655 skb = nlmsg_new(inet6_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
657 if (!skb) 656 if (!skb)
658 goto errout; 657 goto errout;
659 658
@@ -669,6 +668,10 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
669 } 668 }
670 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 669 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
671errout: 670errout:
671 if (in6_dev)
672 in6_dev_put(in6_dev);
673 if (dev)
674 dev_put(dev);
672 return err; 675 return err;
673} 676}
674 677
@@ -945,12 +948,50 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
945 break; 948 break;
946 } 949 }
947 950
948 list_add_tail(&ifp->if_list, p); 951 list_add_tail_rcu(&ifp->if_list, p);
949} 952}
950 953
951static u32 inet6_addr_hash(const struct in6_addr *addr) 954static u32 inet6_addr_hash(const struct net *net, const struct in6_addr *addr)
952{ 955{
953 return hash_32(ipv6_addr_hash(addr), IN6_ADDR_HSIZE_SHIFT); 956 u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
957
958 return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
959}
960
961static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
962 struct net_device *dev, unsigned int hash)
963{
964 struct inet6_ifaddr *ifp;
965
966 hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) {
967 if (!net_eq(dev_net(ifp->idev->dev), net))
968 continue;
969 if (ipv6_addr_equal(&ifp->addr, addr)) {
970 if (!dev || ifp->idev->dev == dev)
971 return true;
972 }
973 }
974 return false;
975}
976
977static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
978{
979 unsigned int hash = inet6_addr_hash(dev_net(dev), &ifa->addr);
980 int err = 0;
981
982 spin_lock(&addrconf_hash_lock);
983
984 /* Ignore adding duplicate addresses on an interface */
985 if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) {
986 ADBG("ipv6_add_addr: already assigned\n");
987 err = -EEXIST;
988 } else {
989 hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
990 }
991
992 spin_unlock(&addrconf_hash_lock);
993
994 return err;
954} 995}
955 996
956/* On success it returns ifp with increased reference count */ 997/* On success it returns ifp with increased reference count */
@@ -958,13 +999,13 @@ static u32 inet6_addr_hash(const struct in6_addr *addr)
958static struct inet6_ifaddr * 999static struct inet6_ifaddr *
959ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, 1000ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
960 const struct in6_addr *peer_addr, int pfxlen, 1001 const struct in6_addr *peer_addr, int pfxlen,
961 int scope, u32 flags, u32 valid_lft, u32 prefered_lft) 1002 int scope, u32 flags, u32 valid_lft, u32 prefered_lft,
1003 bool can_block, struct netlink_ext_ack *extack)
962{ 1004{
1005 gfp_t gfp_flags = can_block ? GFP_KERNEL : GFP_ATOMIC;
963 struct net *net = dev_net(idev->dev); 1006 struct net *net = dev_net(idev->dev);
964 struct inet6_ifaddr *ifa = NULL; 1007 struct inet6_ifaddr *ifa = NULL;
965 struct rt6_info *rt; 1008 struct rt6_info *rt = NULL;
966 struct in6_validator_info i6vi;
967 unsigned int hash;
968 int err = 0; 1009 int err = 0;
969 int addr_type = ipv6_addr_type(addr); 1010 int addr_type = ipv6_addr_type(addr);
970 1011
@@ -974,42 +1015,33 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
974 addr_type & IPV6_ADDR_LOOPBACK)) 1015 addr_type & IPV6_ADDR_LOOPBACK))
975 return ERR_PTR(-EADDRNOTAVAIL); 1016 return ERR_PTR(-EADDRNOTAVAIL);
976 1017
977 rcu_read_lock_bh();
978
979 in6_dev_hold(idev);
980
981 if (idev->dead) { 1018 if (idev->dead) {
982 err = -ENODEV; /*XXX*/ 1019 err = -ENODEV; /*XXX*/
983 goto out2; 1020 goto out;
984 } 1021 }
985 1022
986 if (idev->cnf.disable_ipv6) { 1023 if (idev->cnf.disable_ipv6) {
987 err = -EACCES; 1024 err = -EACCES;
988 goto out2;
989 }
990
991 i6vi.i6vi_addr = *addr;
992 i6vi.i6vi_dev = idev;
993 rcu_read_unlock_bh();
994
995 err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi);
996
997 rcu_read_lock_bh();
998 err = notifier_to_errno(err);
999 if (err)
1000 goto out2;
1001
1002 spin_lock(&addrconf_hash_lock);
1003
1004 /* Ignore adding duplicate addresses on an interface */
1005 if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
1006 ADBG("ipv6_add_addr: already assigned\n");
1007 err = -EEXIST;
1008 goto out; 1025 goto out;
1009 } 1026 }
1010 1027
1011 ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC); 1028 /* validator notifier needs to be blocking;
1029 * do not call in atomic context
1030 */
1031 if (can_block) {
1032 struct in6_validator_info i6vi = {
1033 .i6vi_addr = *addr,
1034 .i6vi_dev = idev,
1035 .extack = extack,
1036 };
1037
1038 err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi);
1039 err = notifier_to_errno(err);
1040 if (err < 0)
1041 goto out;
1042 }
1012 1043
1044 ifa = kzalloc(sizeof(*ifa), gfp_flags);
1013 if (!ifa) { 1045 if (!ifa) {
1014 ADBG("ipv6_add_addr: malloc failed\n"); 1046 ADBG("ipv6_add_addr: malloc failed\n");
1015 err = -ENOBUFS; 1047 err = -ENOBUFS;
@@ -1019,6 +1051,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
1019 rt = addrconf_dst_alloc(idev, addr, false); 1051 rt = addrconf_dst_alloc(idev, addr, false);
1020 if (IS_ERR(rt)) { 1052 if (IS_ERR(rt)) {
1021 err = PTR_ERR(rt); 1053 err = PTR_ERR(rt);
1054 rt = NULL;
1022 goto out; 1055 goto out;
1023 } 1056 }
1024 1057
@@ -1049,16 +1082,21 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
1049 ifa->rt = rt; 1082 ifa->rt = rt;
1050 1083
1051 ifa->idev = idev; 1084 ifa->idev = idev;
1085 in6_dev_hold(idev);
1086
1052 /* For caller */ 1087 /* For caller */
1053 refcount_set(&ifa->refcnt, 1); 1088 refcount_set(&ifa->refcnt, 1);
1054 1089
1055 /* Add to big hash table */ 1090 rcu_read_lock_bh();
1056 hash = inet6_addr_hash(addr);
1057 1091
1058 hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]); 1092 err = ipv6_add_addr_hash(idev->dev, ifa);
1059 spin_unlock(&addrconf_hash_lock); 1093 if (err < 0) {
1094 rcu_read_unlock_bh();
1095 goto out;
1096 }
1060 1097
1061 write_lock(&idev->lock); 1098 write_lock(&idev->lock);
1099
1062 /* Add to inet6_dev unicast addr list. */ 1100 /* Add to inet6_dev unicast addr list. */
1063 ipv6_link_dev_addr(idev, ifa); 1101 ipv6_link_dev_addr(idev, ifa);
1064 1102
@@ -1069,21 +1107,23 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
1069 1107
1070 in6_ifa_hold(ifa); 1108 in6_ifa_hold(ifa);
1071 write_unlock(&idev->lock); 1109 write_unlock(&idev->lock);
1072out2: 1110
1073 rcu_read_unlock_bh(); 1111 rcu_read_unlock_bh();
1074 1112
1075 if (likely(err == 0)) 1113 inet6addr_notifier_call_chain(NETDEV_UP, ifa);
1076 inet6addr_notifier_call_chain(NETDEV_UP, ifa); 1114out:
1077 else { 1115 if (unlikely(err < 0)) {
1078 kfree(ifa); 1116 if (rt)
1079 in6_dev_put(idev); 1117 ip6_rt_put(rt);
1118 if (ifa) {
1119 if (ifa->idev)
1120 in6_dev_put(ifa->idev);
1121 kfree(ifa);
1122 }
1080 ifa = ERR_PTR(err); 1123 ifa = ERR_PTR(err);
1081 } 1124 }
1082 1125
1083 return ifa; 1126 return ifa;
1084out:
1085 spin_unlock(&addrconf_hash_lock);
1086 goto out2;
1087} 1127}
1088 1128
1089enum cleanup_prefix_rt_t { 1129enum cleanup_prefix_rt_t {
@@ -1204,7 +1244,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
1204 if (ifp->flags & IFA_F_PERMANENT && !(ifp->flags & IFA_F_NOPREFIXROUTE)) 1244 if (ifp->flags & IFA_F_PERMANENT && !(ifp->flags & IFA_F_NOPREFIXROUTE))
1205 action = check_cleanup_prefix_route(ifp, &expires); 1245 action = check_cleanup_prefix_route(ifp, &expires);
1206 1246
1207 list_del_init(&ifp->if_list); 1247 list_del_rcu(&ifp->if_list);
1208 __in6_ifa_put(ifp); 1248 __in6_ifa_put(ifp);
1209 1249
1210 write_unlock_bh(&ifp->idev->lock); 1250 write_unlock_bh(&ifp->idev->lock);
@@ -1226,7 +1266,9 @@ out:
1226 in6_ifa_put(ifp); 1266 in6_ifa_put(ifp);
1227} 1267}
1228 1268
1229static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift) 1269static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp,
1270 struct inet6_ifaddr *ift,
1271 bool block)
1230{ 1272{
1231 struct inet6_dev *idev = ifp->idev; 1273 struct inet6_dev *idev = ifp->idev;
1232 struct in6_addr addr, *tmpaddr; 1274 struct in6_addr addr, *tmpaddr;
@@ -1330,7 +1372,7 @@ retry:
1330 1372
1331 ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen, 1373 ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen,
1332 ipv6_addr_scope(&addr), addr_flags, 1374 ipv6_addr_scope(&addr), addr_flags,
1333 tmp_valid_lft, tmp_prefered_lft); 1375 tmp_valid_lft, tmp_prefered_lft, block, NULL);
1334 if (IS_ERR(ift)) { 1376 if (IS_ERR(ift)) {
1335 in6_ifa_put(ifp); 1377 in6_ifa_put(ifp);
1336 in6_dev_put(idev); 1378 in6_dev_put(idev);
@@ -1399,10 +1441,18 @@ static inline int ipv6_saddr_preferred(int type)
1399 return 0; 1441 return 0;
1400} 1442}
1401 1443
1402static inline bool ipv6_use_optimistic_addr(struct inet6_dev *idev) 1444static bool ipv6_use_optimistic_addr(struct net *net,
1445 struct inet6_dev *idev)
1403{ 1446{
1404#ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1447#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1405 return idev && idev->cnf.optimistic_dad && idev->cnf.use_optimistic; 1448 if (!idev)
1449 return false;
1450 if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
1451 return false;
1452 if (!net->ipv6.devconf_all->use_optimistic && !idev->cnf.use_optimistic)
1453 return false;
1454
1455 return true;
1406#else 1456#else
1407 return false; 1457 return false;
1408#endif 1458#endif
@@ -1472,7 +1522,7 @@ static int ipv6_get_saddr_eval(struct net *net,
1472 /* Rule 3: Avoid deprecated and optimistic addresses */ 1522 /* Rule 3: Avoid deprecated and optimistic addresses */
1473 u8 avoid = IFA_F_DEPRECATED; 1523 u8 avoid = IFA_F_DEPRECATED;
1474 1524
1475 if (!ipv6_use_optimistic_addr(score->ifa->idev)) 1525 if (!ipv6_use_optimistic_addr(net, score->ifa->idev))
1476 avoid |= IFA_F_OPTIMISTIC; 1526 avoid |= IFA_F_OPTIMISTIC;
1477 ret = ipv6_saddr_preferred(score->addr_type) || 1527 ret = ipv6_saddr_preferred(score->addr_type) ||
1478 !(score->ifa->flags & avoid); 1528 !(score->ifa->flags & avoid);
@@ -1550,8 +1600,7 @@ static int __ipv6_dev_get_saddr(struct net *net,
1550{ 1600{
1551 struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx]; 1601 struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx];
1552 1602
1553 read_lock_bh(&idev->lock); 1603 list_for_each_entry_rcu(score->ifa, &idev->addr_list, if_list) {
1554 list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
1555 int i; 1604 int i;
1556 1605
1557 /* 1606 /*
@@ -1601,11 +1650,6 @@ static int __ipv6_dev_get_saddr(struct net *net,
1601 } 1650 }
1602 break; 1651 break;
1603 } else if (minihiscore < miniscore) { 1652 } else if (minihiscore < miniscore) {
1604 if (hiscore->ifa)
1605 in6_ifa_put(hiscore->ifa);
1606
1607 in6_ifa_hold(score->ifa);
1608
1609 swap(hiscore, score); 1653 swap(hiscore, score);
1610 hiscore_idx = 1 - hiscore_idx; 1654 hiscore_idx = 1 - hiscore_idx;
1611 1655
@@ -1617,7 +1661,6 @@ static int __ipv6_dev_get_saddr(struct net *net,
1617 } 1661 }
1618 } 1662 }
1619out: 1663out:
1620 read_unlock_bh(&idev->lock);
1621 return hiscore_idx; 1664 return hiscore_idx;
1622} 1665}
1623 1666
@@ -1654,6 +1697,7 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
1654 int dst_type; 1697 int dst_type;
1655 bool use_oif_addr = false; 1698 bool use_oif_addr = false;
1656 int hiscore_idx = 0; 1699 int hiscore_idx = 0;
1700 int ret = 0;
1657 1701
1658 dst_type = __ipv6_addr_type(daddr); 1702 dst_type = __ipv6_addr_type(daddr);
1659 dst.addr = daddr; 1703 dst.addr = daddr;
@@ -1729,15 +1773,14 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
1729 } 1773 }
1730 1774
1731out: 1775out:
1732 rcu_read_unlock();
1733
1734 hiscore = &scores[hiscore_idx]; 1776 hiscore = &scores[hiscore_idx];
1735 if (!hiscore->ifa) 1777 if (!hiscore->ifa)
1736 return -EADDRNOTAVAIL; 1778 ret = -EADDRNOTAVAIL;
1779 else
1780 *saddr = hiscore->ifa->addr;
1737 1781
1738 *saddr = hiscore->ifa->addr; 1782 rcu_read_unlock();
1739 in6_ifa_put(hiscore->ifa); 1783 return ret;
1740 return 0;
1741} 1784}
1742EXPORT_SYMBOL(ipv6_dev_get_saddr); 1785EXPORT_SYMBOL(ipv6_dev_get_saddr);
1743 1786
@@ -1777,15 +1820,15 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
1777 return err; 1820 return err;
1778} 1821}
1779 1822
1780static int ipv6_count_addresses(struct inet6_dev *idev) 1823static int ipv6_count_addresses(const struct inet6_dev *idev)
1781{ 1824{
1825 const struct inet6_ifaddr *ifp;
1782 int cnt = 0; 1826 int cnt = 0;
1783 struct inet6_ifaddr *ifp;
1784 1827
1785 read_lock_bh(&idev->lock); 1828 rcu_read_lock();
1786 list_for_each_entry(ifp, &idev->addr_list, if_list) 1829 list_for_each_entry_rcu(ifp, &idev->addr_list, if_list)
1787 cnt++; 1830 cnt++;
1788 read_unlock_bh(&idev->lock); 1831 rcu_read_unlock();
1789 return cnt; 1832 return cnt;
1790} 1833}
1791 1834
@@ -1800,11 +1843,11 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
1800 const struct net_device *dev, int strict, 1843 const struct net_device *dev, int strict,
1801 u32 banned_flags) 1844 u32 banned_flags)
1802{ 1845{
1846 unsigned int hash = inet6_addr_hash(net, addr);
1803 struct inet6_ifaddr *ifp; 1847 struct inet6_ifaddr *ifp;
1804 unsigned int hash = inet6_addr_hash(addr);
1805 u32 ifp_flags; 1848 u32 ifp_flags;
1806 1849
1807 rcu_read_lock_bh(); 1850 rcu_read_lock();
1808 hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { 1851 hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
1809 if (!net_eq(dev_net(ifp->idev->dev), net)) 1852 if (!net_eq(dev_net(ifp->idev->dev), net))
1810 continue; 1853 continue;
@@ -1818,32 +1861,16 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
1818 !(ifp_flags&banned_flags) && 1861 !(ifp_flags&banned_flags) &&
1819 (!dev || ifp->idev->dev == dev || 1862 (!dev || ifp->idev->dev == dev ||
1820 !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) { 1863 !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) {
1821 rcu_read_unlock_bh(); 1864 rcu_read_unlock();
1822 return 1; 1865 return 1;
1823 } 1866 }
1824 } 1867 }
1825 1868
1826 rcu_read_unlock_bh(); 1869 rcu_read_unlock();
1827 return 0; 1870 return 0;
1828} 1871}
1829EXPORT_SYMBOL(ipv6_chk_addr_and_flags); 1872EXPORT_SYMBOL(ipv6_chk_addr_and_flags);
1830 1873
1831static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
1832 struct net_device *dev)
1833{
1834 unsigned int hash = inet6_addr_hash(addr);
1835 struct inet6_ifaddr *ifp;
1836
1837 hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) {
1838 if (!net_eq(dev_net(ifp->idev->dev), net))
1839 continue;
1840 if (ipv6_addr_equal(&ifp->addr, addr)) {
1841 if (!dev || ifp->idev->dev == dev)
1842 return true;
1843 }
1844 }
1845 return false;
1846}
1847 1874
1848/* Compares an address/prefix_len with addresses on device @dev. 1875/* Compares an address/prefix_len with addresses on device @dev.
1849 * If one is found it returns true. 1876 * If one is found it returns true.
@@ -1851,20 +1878,18 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
1851bool ipv6_chk_custom_prefix(const struct in6_addr *addr, 1878bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
1852 const unsigned int prefix_len, struct net_device *dev) 1879 const unsigned int prefix_len, struct net_device *dev)
1853{ 1880{
1854 struct inet6_dev *idev; 1881 const struct inet6_ifaddr *ifa;
1855 struct inet6_ifaddr *ifa; 1882 const struct inet6_dev *idev;
1856 bool ret = false; 1883 bool ret = false;
1857 1884
1858 rcu_read_lock(); 1885 rcu_read_lock();
1859 idev = __in6_dev_get(dev); 1886 idev = __in6_dev_get(dev);
1860 if (idev) { 1887 if (idev) {
1861 read_lock_bh(&idev->lock); 1888 list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
1862 list_for_each_entry(ifa, &idev->addr_list, if_list) {
1863 ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len); 1889 ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
1864 if (ret) 1890 if (ret)
1865 break; 1891 break;
1866 } 1892 }
1867 read_unlock_bh(&idev->lock);
1868 } 1893 }
1869 rcu_read_unlock(); 1894 rcu_read_unlock();
1870 1895
@@ -1874,22 +1899,20 @@ EXPORT_SYMBOL(ipv6_chk_custom_prefix);
1874 1899
1875int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) 1900int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
1876{ 1901{
1877 struct inet6_dev *idev; 1902 const struct inet6_ifaddr *ifa;
1878 struct inet6_ifaddr *ifa; 1903 const struct inet6_dev *idev;
1879 int onlink; 1904 int onlink;
1880 1905
1881 onlink = 0; 1906 onlink = 0;
1882 rcu_read_lock(); 1907 rcu_read_lock();
1883 idev = __in6_dev_get(dev); 1908 idev = __in6_dev_get(dev);
1884 if (idev) { 1909 if (idev) {
1885 read_lock_bh(&idev->lock); 1910 list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
1886 list_for_each_entry(ifa, &idev->addr_list, if_list) {
1887 onlink = ipv6_prefix_equal(addr, &ifa->addr, 1911 onlink = ipv6_prefix_equal(addr, &ifa->addr,
1888 ifa->prefix_len); 1912 ifa->prefix_len);
1889 if (onlink) 1913 if (onlink)
1890 break; 1914 break;
1891 } 1915 }
1892 read_unlock_bh(&idev->lock);
1893 } 1916 }
1894 rcu_read_unlock(); 1917 rcu_read_unlock();
1895 return onlink; 1918 return onlink;
@@ -1899,11 +1922,11 @@ EXPORT_SYMBOL(ipv6_chk_prefix);
1899struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr, 1922struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr,
1900 struct net_device *dev, int strict) 1923 struct net_device *dev, int strict)
1901{ 1924{
1925 unsigned int hash = inet6_addr_hash(net, addr);
1902 struct inet6_ifaddr *ifp, *result = NULL; 1926 struct inet6_ifaddr *ifp, *result = NULL;
1903 unsigned int hash = inet6_addr_hash(addr);
1904 1927
1905 rcu_read_lock_bh(); 1928 rcu_read_lock();
1906 hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) { 1929 hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
1907 if (!net_eq(dev_net(ifp->idev->dev), net)) 1930 if (!net_eq(dev_net(ifp->idev->dev), net))
1908 continue; 1931 continue;
1909 if (ipv6_addr_equal(&ifp->addr, addr)) { 1932 if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -1915,7 +1938,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
1915 } 1938 }
1916 } 1939 }
1917 } 1940 }
1918 rcu_read_unlock_bh(); 1941 rcu_read_unlock();
1919 1942
1920 return result; 1943 return result;
1921} 1944}
@@ -1934,7 +1957,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
1934 if (ifpub) { 1957 if (ifpub) {
1935 in6_ifa_hold(ifpub); 1958 in6_ifa_hold(ifpub);
1936 spin_unlock_bh(&ifp->lock); 1959 spin_unlock_bh(&ifp->lock);
1937 ipv6_create_tempaddr(ifpub, ifp); 1960 ipv6_create_tempaddr(ifpub, ifp, true);
1938 in6_ifa_put(ifpub); 1961 in6_ifa_put(ifpub);
1939 } else { 1962 } else {
1940 spin_unlock_bh(&ifp->lock); 1963 spin_unlock_bh(&ifp->lock);
@@ -1967,7 +1990,7 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp)
1967 return err; 1990 return err;
1968} 1991}
1969 1992
1970void addrconf_dad_failure(struct inet6_ifaddr *ifp) 1993void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
1971{ 1994{
1972 struct inet6_dev *idev = ifp->idev; 1995 struct inet6_dev *idev = ifp->idev;
1973 struct net *net = dev_net(ifp->idev->dev); 1996 struct net *net = dev_net(ifp->idev->dev);
@@ -1977,8 +2000,8 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
1977 return; 2000 return;
1978 } 2001 }
1979 2002
1980 net_info_ratelimited("%s: IPv6 duplicate address %pI6c detected!\n", 2003 net_info_ratelimited("%s: IPv6 duplicate address %pI6c used by %pM detected!\n",
1981 ifp->idev->dev->name, &ifp->addr); 2004 ifp->idev->dev->name, &ifp->addr, eth_hdr(skb)->h_source);
1982 2005
1983 spin_lock_bh(&ifp->lock); 2006 spin_lock_bh(&ifp->lock);
1984 2007
@@ -2017,7 +2040,7 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
2017 2040
2018 ifp2 = ipv6_add_addr(idev, &new_addr, NULL, pfxlen, 2041 ifp2 = ipv6_add_addr(idev, &new_addr, NULL, pfxlen,
2019 scope, flags, valid_lft, 2042 scope, flags, valid_lft,
2020 preferred_lft); 2043 preferred_lft, false, NULL);
2021 if (IS_ERR(ifp2)) 2044 if (IS_ERR(ifp2))
2022 goto lock_errdad; 2045 goto lock_errdad;
2023 2046
@@ -2313,24 +2336,24 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
2313 if (!table) 2336 if (!table)
2314 return NULL; 2337 return NULL;
2315 2338
2316 read_lock_bh(&table->tb6_lock); 2339 rcu_read_lock();
2317 fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0); 2340 fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0, true);
2318 if (!fn) 2341 if (!fn)
2319 goto out; 2342 goto out;
2320 2343
2321 noflags |= RTF_CACHE; 2344 for_each_fib6_node_rt_rcu(fn) {
2322 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2323 if (rt->dst.dev->ifindex != dev->ifindex) 2345 if (rt->dst.dev->ifindex != dev->ifindex)
2324 continue; 2346 continue;
2325 if ((rt->rt6i_flags & flags) != flags) 2347 if ((rt->rt6i_flags & flags) != flags)
2326 continue; 2348 continue;
2327 if ((rt->rt6i_flags & noflags) != 0) 2349 if ((rt->rt6i_flags & noflags) != 0)
2328 continue; 2350 continue;
2329 dst_hold(&rt->dst); 2351 if (!dst_hold_safe(&rt->dst))
2352 rt = NULL;
2330 break; 2353 break;
2331 } 2354 }
2332out: 2355out:
2333 read_unlock_bh(&table->tb6_lock); 2356 rcu_read_unlock();
2334 return rt; 2357 return rt;
2335} 2358}
2336 2359
@@ -2434,7 +2457,7 @@ static void manage_tempaddrs(struct inet6_dev *idev,
2434 * no temporary address currently exists. 2457 * no temporary address currently exists.
2435 */ 2458 */
2436 read_unlock_bh(&idev->lock); 2459 read_unlock_bh(&idev->lock);
2437 ipv6_create_tempaddr(ifp, NULL); 2460 ipv6_create_tempaddr(ifp, NULL, false);
2438 } else { 2461 } else {
2439 read_unlock_bh(&idev->lock); 2462 read_unlock_bh(&idev->lock);
2440 } 2463 }
@@ -2460,7 +2483,8 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
2460 int max_addresses = in6_dev->cnf.max_addresses; 2483 int max_addresses = in6_dev->cnf.max_addresses;
2461 2484
2462#ifdef CONFIG_IPV6_OPTIMISTIC_DAD 2485#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
2463 if (in6_dev->cnf.optimistic_dad && 2486 if ((net->ipv6.devconf_all->optimistic_dad ||
2487 in6_dev->cnf.optimistic_dad) &&
2464 !net->ipv6.devconf_all->forwarding && sllao) 2488 !net->ipv6.devconf_all->forwarding && sllao)
2465 addr_flags |= IFA_F_OPTIMISTIC; 2489 addr_flags |= IFA_F_OPTIMISTIC;
2466#endif 2490#endif
@@ -2474,7 +2498,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
2474 pinfo->prefix_len, 2498 pinfo->prefix_len,
2475 addr_type&IPV6_ADDR_SCOPE_MASK, 2499 addr_type&IPV6_ADDR_SCOPE_MASK,
2476 addr_flags, valid_lft, 2500 addr_flags, valid_lft,
2477 prefered_lft); 2501 prefered_lft, false, NULL);
2478 2502
2479 if (IS_ERR_OR_NULL(ifp)) 2503 if (IS_ERR_OR_NULL(ifp))
2480 return -1; 2504 return -1;
@@ -2784,7 +2808,8 @@ static int inet6_addr_add(struct net *net, int ifindex,
2784 const struct in6_addr *pfx, 2808 const struct in6_addr *pfx,
2785 const struct in6_addr *peer_pfx, 2809 const struct in6_addr *peer_pfx,
2786 unsigned int plen, __u32 ifa_flags, 2810 unsigned int plen, __u32 ifa_flags,
2787 __u32 prefered_lft, __u32 valid_lft) 2811 __u32 prefered_lft, __u32 valid_lft,
2812 struct netlink_ext_ack *extack)
2788{ 2813{
2789 struct inet6_ifaddr *ifp; 2814 struct inet6_ifaddr *ifp;
2790 struct inet6_dev *idev; 2815 struct inet6_dev *idev;
@@ -2843,7 +2868,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
2843 } 2868 }
2844 2869
2845 ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags, 2870 ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags,
2846 valid_lft, prefered_lft); 2871 valid_lft, prefered_lft, true, extack);
2847 2872
2848 if (!IS_ERR(ifp)) { 2873 if (!IS_ERR(ifp)) {
2849 if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) { 2874 if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
@@ -2928,7 +2953,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
2928 rtnl_lock(); 2953 rtnl_lock();
2929 err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL, 2954 err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL,
2930 ireq.ifr6_prefixlen, IFA_F_PERMANENT, 2955 ireq.ifr6_prefixlen, IFA_F_PERMANENT,
2931 INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); 2956 INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, NULL);
2932 rtnl_unlock(); 2957 rtnl_unlock();
2933 return err; 2958 return err;
2934} 2959}
@@ -2958,7 +2983,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
2958 2983
2959 ifp = ipv6_add_addr(idev, addr, NULL, plen, 2984 ifp = ipv6_add_addr(idev, addr, NULL, plen,
2960 scope, IFA_F_PERMANENT, 2985 scope, IFA_F_PERMANENT,
2961 INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); 2986 INFINITY_LIFE_TIME, INFINITY_LIFE_TIME,
2987 true, NULL);
2962 if (!IS_ERR(ifp)) { 2988 if (!IS_ERR(ifp)) {
2963 spin_lock_bh(&ifp->lock); 2989 spin_lock_bh(&ifp->lock);
2964 ifp->flags &= ~IFA_F_TENTATIVE; 2990 ifp->flags &= ~IFA_F_TENTATIVE;
@@ -3051,13 +3077,14 @@ void addrconf_add_linklocal(struct inet6_dev *idev,
3051 u32 addr_flags = flags | IFA_F_PERMANENT; 3077 u32 addr_flags = flags | IFA_F_PERMANENT;
3052 3078
3053#ifdef CONFIG_IPV6_OPTIMISTIC_DAD 3079#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
3054 if (idev->cnf.optimistic_dad && 3080 if ((dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad ||
3081 idev->cnf.optimistic_dad) &&
3055 !dev_net(idev->dev)->ipv6.devconf_all->forwarding) 3082 !dev_net(idev->dev)->ipv6.devconf_all->forwarding)
3056 addr_flags |= IFA_F_OPTIMISTIC; 3083 addr_flags |= IFA_F_OPTIMISTIC;
3057#endif 3084#endif
3058 3085
3059 ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, 3086 ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags,
3060 INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); 3087 INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, true, NULL);
3061 if (!IS_ERR(ifp)) { 3088 if (!IS_ERR(ifp)) {
3062 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); 3089 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
3063 addrconf_dad_start(ifp); 3090 addrconf_dad_start(ifp);
@@ -3287,7 +3314,7 @@ static int fixup_permanent_addr(struct inet6_dev *idev,
3287 struct rt6_info *rt, *prev; 3314 struct rt6_info *rt, *prev;
3288 3315
3289 rt = addrconf_dst_alloc(idev, &ifp->addr, false); 3316 rt = addrconf_dst_alloc(idev, &ifp->addr, false);
3290 if (unlikely(IS_ERR(rt))) 3317 if (IS_ERR(rt))
3291 return PTR_ERR(rt); 3318 return PTR_ERR(rt);
3292 3319
3293 /* ifp->rt can be accessed outside of rtnl */ 3320 /* ifp->rt can be accessed outside of rtnl */
@@ -3325,6 +3352,7 @@ static void addrconf_permanent_addr(struct net_device *dev)
3325 if ((ifp->flags & IFA_F_PERMANENT) && 3352 if ((ifp->flags & IFA_F_PERMANENT) &&
3326 fixup_permanent_addr(idev, ifp) < 0) { 3353 fixup_permanent_addr(idev, ifp) < 0) {
3327 write_unlock_bh(&idev->lock); 3354 write_unlock_bh(&idev->lock);
3355 in6_ifa_hold(ifp);
3328 ipv6_del_addr(ifp); 3356 ipv6_del_addr(ifp);
3329 write_lock_bh(&idev->lock); 3357 write_lock_bh(&idev->lock);
3330 3358
@@ -3393,7 +3421,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
3393 /* restore routes for permanent addresses */ 3421 /* restore routes for permanent addresses */
3394 addrconf_permanent_addr(dev); 3422 addrconf_permanent_addr(dev);
3395 3423
3396 if (!addrconf_qdisc_ok(dev)) { 3424 if (!addrconf_link_ready(dev)) {
3397 /* device is not ready yet. */ 3425 /* device is not ready yet. */
3398 pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n", 3426 pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n",
3399 dev->name); 3427 dev->name);
@@ -3408,7 +3436,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
3408 run_pending = 1; 3436 run_pending = 1;
3409 } 3437 }
3410 } else if (event == NETDEV_CHANGE) { 3438 } else if (event == NETDEV_CHANGE) {
3411 if (!addrconf_qdisc_ok(dev)) { 3439 if (!addrconf_link_ready(dev)) {
3412 /* device is still not ready. */ 3440 /* device is still not ready. */
3413 break; 3441 break;
3414 } 3442 }
@@ -3552,7 +3580,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
3552 struct net *net = dev_net(dev); 3580 struct net *net = dev_net(dev);
3553 struct inet6_dev *idev; 3581 struct inet6_dev *idev;
3554 struct inet6_ifaddr *ifa, *tmp; 3582 struct inet6_ifaddr *ifa, *tmp;
3555 struct list_head del_list;
3556 int _keep_addr; 3583 int _keep_addr;
3557 bool keep_addr; 3584 bool keep_addr;
3558 int state, i; 3585 int state, i;
@@ -3644,7 +3671,6 @@ restart:
3644 */ 3671 */
3645 keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6); 3672 keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
3646 3673
3647 INIT_LIST_HEAD(&del_list);
3648 list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { 3674 list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
3649 struct rt6_info *rt = NULL; 3675 struct rt6_info *rt = NULL;
3650 bool keep; 3676 bool keep;
@@ -3653,8 +3679,6 @@ restart:
3653 3679
3654 keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) && 3680 keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
3655 !addr_is_local(&ifa->addr); 3681 !addr_is_local(&ifa->addr);
3656 if (!keep)
3657 list_move(&ifa->if_list, &del_list);
3658 3682
3659 write_unlock_bh(&idev->lock); 3683 write_unlock_bh(&idev->lock);
3660 spin_lock_bh(&ifa->lock); 3684 spin_lock_bh(&ifa->lock);
@@ -3688,19 +3712,14 @@ restart:
3688 } 3712 }
3689 3713
3690 write_lock_bh(&idev->lock); 3714 write_lock_bh(&idev->lock);
3715 if (!keep) {
3716 list_del_rcu(&ifa->if_list);
3717 in6_ifa_put(ifa);
3718 }
3691 } 3719 }
3692 3720
3693 write_unlock_bh(&idev->lock); 3721 write_unlock_bh(&idev->lock);
3694 3722
3695 /* now clean up addresses to be removed */
3696 while (!list_empty(&del_list)) {
3697 ifa = list_first_entry(&del_list,
3698 struct inet6_ifaddr, if_list);
3699 list_del(&ifa->if_list);
3700
3701 in6_ifa_put(ifa);
3702 }
3703
3704 /* Step 5: Discard anycast and multicast list */ 3723 /* Step 5: Discard anycast and multicast list */
3705 if (how) { 3724 if (how) {
3706 ipv6_ac_destroy_dev(idev); 3725 ipv6_ac_destroy_dev(idev);
@@ -3721,9 +3740,9 @@ restart:
3721 return 0; 3740 return 0;
3722} 3741}
3723 3742
3724static void addrconf_rs_timer(unsigned long data) 3743static void addrconf_rs_timer(struct timer_list *t)
3725{ 3744{
3726 struct inet6_dev *idev = (struct inet6_dev *)data; 3745 struct inet6_dev *idev = from_timer(idev, t, rs_timer);
3727 struct net_device *dev = idev->dev; 3746 struct net_device *dev = idev->dev;
3728 struct in6_addr lladdr; 3747 struct in6_addr lladdr;
3729 3748
@@ -3810,7 +3829,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
3810 goto out; 3829 goto out;
3811 3830
3812 if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || 3831 if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
3813 idev->cnf.accept_dad < 1 || 3832 (dev_net(dev)->ipv6.devconf_all->accept_dad < 1 &&
3833 idev->cnf.accept_dad < 1) ||
3814 !(ifp->flags&IFA_F_TENTATIVE) || 3834 !(ifp->flags&IFA_F_TENTATIVE) ||
3815 ifp->flags & IFA_F_NODAD) { 3835 ifp->flags & IFA_F_NODAD) {
3816 bump_id = ifp->flags & IFA_F_TENTATIVE; 3836 bump_id = ifp->flags & IFA_F_TENTATIVE;
@@ -3841,7 +3861,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
3841 */ 3861 */
3842 if (ifp->flags & IFA_F_OPTIMISTIC) { 3862 if (ifp->flags & IFA_F_OPTIMISTIC) {
3843 ip6_ins_rt(ifp->rt); 3863 ip6_ins_rt(ifp->rt);
3844 if (ipv6_use_optimistic_addr(idev)) { 3864 if (ipv6_use_optimistic_addr(dev_net(dev), idev)) {
3845 /* Because optimistic nodes can use this address, 3865 /* Because optimistic nodes can use this address,
3846 * notify listeners. If DAD fails, RTM_DELADDR is sent. 3866 * notify listeners. If DAD fails, RTM_DELADDR is sent.
3847 */ 3867 */
@@ -3897,7 +3917,9 @@ static void addrconf_dad_work(struct work_struct *w)
3897 action = DAD_ABORT; 3917 action = DAD_ABORT;
3898 ifp->state = INET6_IFADDR_STATE_POSTDAD; 3918 ifp->state = INET6_IFADDR_STATE_POSTDAD;
3899 3919
3900 if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6 && 3920 if ((dev_net(idev->dev)->ipv6.devconf_all->accept_dad > 1 ||
3921 idev->cnf.accept_dad > 1) &&
3922 !idev->cnf.disable_ipv6 &&
3901 !(ifp->flags & IFA_F_STABLE_PRIVACY)) { 3923 !(ifp->flags & IFA_F_STABLE_PRIVACY)) {
3902 struct in6_addr addr; 3924 struct in6_addr addr;
3903 3925
@@ -4079,9 +4101,9 @@ struct if6_iter_state {
4079 4101
4080static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos) 4102static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
4081{ 4103{
4082 struct inet6_ifaddr *ifa = NULL;
4083 struct if6_iter_state *state = seq->private; 4104 struct if6_iter_state *state = seq->private;
4084 struct net *net = seq_file_net(seq); 4105 struct net *net = seq_file_net(seq);
4106 struct inet6_ifaddr *ifa = NULL;
4085 int p = 0; 4107 int p = 0;
4086 4108
4087 /* initial bucket if pos is 0 */ 4109 /* initial bucket if pos is 0 */
@@ -4091,7 +4113,7 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
4091 } 4113 }
4092 4114
4093 for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { 4115 for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
4094 hlist_for_each_entry_rcu_bh(ifa, &inet6_addr_lst[state->bucket], 4116 hlist_for_each_entry_rcu(ifa, &inet6_addr_lst[state->bucket],
4095 addr_lst) { 4117 addr_lst) {
4096 if (!net_eq(dev_net(ifa->idev->dev), net)) 4118 if (!net_eq(dev_net(ifa->idev->dev), net))
4097 continue; 4119 continue;
@@ -4117,7 +4139,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
4117 struct if6_iter_state *state = seq->private; 4139 struct if6_iter_state *state = seq->private;
4118 struct net *net = seq_file_net(seq); 4140 struct net *net = seq_file_net(seq);
4119 4141
4120 hlist_for_each_entry_continue_rcu_bh(ifa, addr_lst) { 4142 hlist_for_each_entry_continue_rcu(ifa, addr_lst) {
4121 if (!net_eq(dev_net(ifa->idev->dev), net)) 4143 if (!net_eq(dev_net(ifa->idev->dev), net))
4122 continue; 4144 continue;
4123 state->offset++; 4145 state->offset++;
@@ -4126,7 +4148,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
4126 4148
4127 while (++state->bucket < IN6_ADDR_HSIZE) { 4149 while (++state->bucket < IN6_ADDR_HSIZE) {
4128 state->offset = 0; 4150 state->offset = 0;
4129 hlist_for_each_entry_rcu_bh(ifa, 4151 hlist_for_each_entry_rcu(ifa,
4130 &inet6_addr_lst[state->bucket], addr_lst) { 4152 &inet6_addr_lst[state->bucket], addr_lst) {
4131 if (!net_eq(dev_net(ifa->idev->dev), net)) 4153 if (!net_eq(dev_net(ifa->idev->dev), net))
4132 continue; 4154 continue;
@@ -4139,9 +4161,9 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
4139} 4161}
4140 4162
4141static void *if6_seq_start(struct seq_file *seq, loff_t *pos) 4163static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
4142 __acquires(rcu_bh) 4164 __acquires(rcu)
4143{ 4165{
4144 rcu_read_lock_bh(); 4166 rcu_read_lock();
4145 return if6_get_first(seq, *pos); 4167 return if6_get_first(seq, *pos);
4146} 4168}
4147 4169
@@ -4155,9 +4177,9 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4155} 4177}
4156 4178
4157static void if6_seq_stop(struct seq_file *seq, void *v) 4179static void if6_seq_stop(struct seq_file *seq, void *v)
4158 __releases(rcu_bh) 4180 __releases(rcu)
4159{ 4181{
4160 rcu_read_unlock_bh(); 4182 rcu_read_unlock();
4161} 4183}
4162 4184
4163static int if6_seq_show(struct seq_file *seq, void *v) 4185static int if6_seq_show(struct seq_file *seq, void *v)
@@ -4226,12 +4248,12 @@ void if6_proc_exit(void)
4226/* Check if address is a home address configured on any interface. */ 4248/* Check if address is a home address configured on any interface. */
4227int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) 4249int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
4228{ 4250{
4229 int ret = 0; 4251 unsigned int hash = inet6_addr_hash(net, addr);
4230 struct inet6_ifaddr *ifp = NULL; 4252 struct inet6_ifaddr *ifp = NULL;
4231 unsigned int hash = inet6_addr_hash(addr); 4253 int ret = 0;
4232 4254
4233 rcu_read_lock_bh(); 4255 rcu_read_lock();
4234 hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) { 4256 hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
4235 if (!net_eq(dev_net(ifp->idev->dev), net)) 4257 if (!net_eq(dev_net(ifp->idev->dev), net))
4236 continue; 4258 continue;
4237 if (ipv6_addr_equal(&ifp->addr, addr) && 4259 if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -4240,7 +4262,7 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
4240 break; 4262 break;
4241 } 4263 }
4242 } 4264 }
4243 rcu_read_unlock_bh(); 4265 rcu_read_unlock();
4244 return ret; 4266 return ret;
4245} 4267}
4246#endif 4268#endif
@@ -4330,7 +4352,7 @@ restart:
4330 spin_lock(&ifpub->lock); 4352 spin_lock(&ifpub->lock);
4331 ifpub->regen_count = 0; 4353 ifpub->regen_count = 0;
4332 spin_unlock(&ifpub->lock); 4354 spin_unlock(&ifpub->lock);
4333 ipv6_create_tempaddr(ifpub, ifp); 4355 ipv6_create_tempaddr(ifpub, ifp, true);
4334 in6_ifa_put(ifpub); 4356 in6_ifa_put(ifpub);
4335 in6_ifa_put(ifp); 4357 in6_ifa_put(ifp);
4336 goto restart; 4358 goto restart;
@@ -4566,7 +4588,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
4566 */ 4588 */
4567 return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx, 4589 return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx,
4568 ifm->ifa_prefixlen, ifa_flags, 4590 ifm->ifa_prefixlen, ifa_flags,
4569 preferred_lft, valid_lft); 4591 preferred_lft, valid_lft, extack);
4570 } 4592 }
4571 4593
4572 if (nlh->nlmsg_flags & NLM_F_EXCL || 4594 if (nlh->nlmsg_flags & NLM_F_EXCL ||
@@ -4893,17 +4915,15 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4893 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, 4915 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy,
4894 extack); 4916 extack);
4895 if (err < 0) 4917 if (err < 0)
4896 goto errout; 4918 return err;
4897 4919
4898 addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer); 4920 addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
4899 if (!addr) { 4921 if (!addr)
4900 err = -EINVAL; 4922 return -EINVAL;
4901 goto errout;
4902 }
4903 4923
4904 ifm = nlmsg_data(nlh); 4924 ifm = nlmsg_data(nlh);
4905 if (ifm->ifa_index) 4925 if (ifm->ifa_index)
4906 dev = __dev_get_by_index(net, ifm->ifa_index); 4926 dev = dev_get_by_index(net, ifm->ifa_index);
4907 4927
4908 ifa = ipv6_get_ifaddr(net, addr, dev, 1); 4928 ifa = ipv6_get_ifaddr(net, addr, dev, 1);
4909 if (!ifa) { 4929 if (!ifa) {
@@ -4929,6 +4949,8 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4929errout_ifa: 4949errout_ifa:
4930 in6_ifa_put(ifa); 4950 in6_ifa_put(ifa);
4931errout: 4951errout:
4952 if (dev)
4953 dev_put(dev);
4932 return err; 4954 return err;
4933} 4955}
4934 4956
@@ -4940,9 +4962,10 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
4940 4962
4941 /* Don't send DELADDR notification for TENTATIVE address, 4963 /* Don't send DELADDR notification for TENTATIVE address,
4942 * since NEWADDR notification is sent only after removing 4964 * since NEWADDR notification is sent only after removing
4943 * TENTATIVE flag. 4965 * TENTATIVE flag, if DAD has not failed.
4944 */ 4966 */
4945 if (ifa->flags & IFA_F_TENTATIVE && event == RTM_DELADDR) 4967 if (ifa->flags & IFA_F_TENTATIVE && !(ifa->flags & IFA_F_DADFAILED) &&
4968 event == RTM_DELADDR)
4946 return; 4969 return;
4947 4970
4948 skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); 4971 skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
@@ -5035,6 +5058,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
5035 array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad; 5058 array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad;
5036 array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode; 5059 array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode;
5037 array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy; 5060 array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
5061 array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass;
5038} 5062}
5039 5063
5040static inline size_t inet6_ifla6_size(void) 5064static inline size_t inet6_ifla6_size(void)
@@ -5884,10 +5908,9 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
5884 spin_lock(&ifa->lock); 5908 spin_lock(&ifa->lock);
5885 if (ifa->rt) { 5909 if (ifa->rt) {
5886 struct rt6_info *rt = ifa->rt; 5910 struct rt6_info *rt = ifa->rt;
5887 struct fib6_table *table = rt->rt6i_table;
5888 int cpu; 5911 int cpu;
5889 5912
5890 read_lock(&table->tb6_lock); 5913 rcu_read_lock();
5891 addrconf_set_nopolicy(ifa->rt, val); 5914 addrconf_set_nopolicy(ifa->rt, val);
5892 if (rt->rt6i_pcpu) { 5915 if (rt->rt6i_pcpu) {
5893 for_each_possible_cpu(cpu) { 5916 for_each_possible_cpu(cpu) {
@@ -5897,7 +5920,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
5897 addrconf_set_nopolicy(*rtp, val); 5920 addrconf_set_nopolicy(*rtp, val);
5898 } 5921 }
5899 } 5922 }
5900 read_unlock(&table->tb6_lock); 5923 rcu_read_unlock();
5901 } 5924 }
5902 spin_unlock(&ifa->lock); 5925 spin_unlock(&ifa->lock);
5903 } 5926 }
@@ -5963,6 +5986,7 @@ int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
5963} 5986}
5964 5987
5965static int minus_one = -1; 5988static int minus_one = -1;
5989static const int zero = 0;
5966static const int one = 1; 5990static const int one = 1;
5967static const int two_five_five = 255; 5991static const int two_five_five = 255;
5968 5992
@@ -6334,6 +6358,15 @@ static const struct ctl_table addrconf_sysctl[] = {
6334 .proc_handler = addrconf_sysctl_disable_policy, 6358 .proc_handler = addrconf_sysctl_disable_policy,
6335 }, 6359 },
6336 { 6360 {
6361 .procname = "ndisc_tclass",
6362 .data = &ipv6_devconf.ndisc_tclass,
6363 .maxlen = sizeof(int),
6364 .mode = 0644,
6365 .proc_handler = proc_dointvec_minmax,
6366 .extra1 = (void *)&zero,
6367 .extra2 = (void *)&two_five_five,
6368 },
6369 {
6337 /* sentinel */ 6370 /* sentinel */
6338 } 6371 }
6339}; 6372};
@@ -6571,13 +6604,13 @@ int __init addrconf_init(void)
6571 __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0); 6604 __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0);
6572 __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0); 6605 __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0);
6573 __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, 6606 __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr,
6574 inet6_dump_ifaddr, 0); 6607 inet6_dump_ifaddr, RTNL_FLAG_DOIT_UNLOCKED);
6575 __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, 6608 __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL,
6576 inet6_dump_ifmcaddr, 0); 6609 inet6_dump_ifmcaddr, 0);
6577 __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, 6610 __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
6578 inet6_dump_ifacaddr, 0); 6611 inet6_dump_ifacaddr, 0);
6579 __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, 6612 __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
6580 inet6_netconf_dump_devconf, 0); 6613 inet6_netconf_dump_devconf, RTNL_FLAG_DOIT_UNLOCKED);
6581 6614
6582 ipv6_addr_label_rtnl_register(); 6615 ipv6_addr_label_rtnl_register();
6583 6616
@@ -6604,9 +6637,9 @@ void addrconf_cleanup(void)
6604 unregister_pernet_subsys(&addrconf_ops); 6637 unregister_pernet_subsys(&addrconf_ops);
6605 ipv6_addr_label_cleanup(); 6638 ipv6_addr_label_cleanup();
6606 6639
6607 rtnl_lock(); 6640 rtnl_af_unregister(&inet6_ops);
6608 6641
6609 __rtnl_af_unregister(&inet6_ops); 6642 rtnl_lock();
6610 6643
6611 /* clean dev list */ 6644 /* clean dev list */
6612 for_each_netdev(&init_net, dev) { 6645 for_each_netdev(&init_net, dev) {
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 9e3488d50b15..32b564dfd02a 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -88,7 +88,7 @@ int __ipv6_addr_type(const struct in6_addr *addr)
88EXPORT_SYMBOL(__ipv6_addr_type); 88EXPORT_SYMBOL(__ipv6_addr_type);
89 89
90static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); 90static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
91static ATOMIC_NOTIFIER_HEAD(inet6addr_validator_chain); 91static BLOCKING_NOTIFIER_HEAD(inet6addr_validator_chain);
92 92
93int register_inet6addr_notifier(struct notifier_block *nb) 93int register_inet6addr_notifier(struct notifier_block *nb)
94{ 94{
@@ -110,19 +110,20 @@ EXPORT_SYMBOL(inet6addr_notifier_call_chain);
110 110
111int register_inet6addr_validator_notifier(struct notifier_block *nb) 111int register_inet6addr_validator_notifier(struct notifier_block *nb)
112{ 112{
113 return atomic_notifier_chain_register(&inet6addr_validator_chain, nb); 113 return blocking_notifier_chain_register(&inet6addr_validator_chain, nb);
114} 114}
115EXPORT_SYMBOL(register_inet6addr_validator_notifier); 115EXPORT_SYMBOL(register_inet6addr_validator_notifier);
116 116
117int unregister_inet6addr_validator_notifier(struct notifier_block *nb) 117int unregister_inet6addr_validator_notifier(struct notifier_block *nb)
118{ 118{
119 return atomic_notifier_chain_unregister(&inet6addr_validator_chain, nb); 119 return blocking_notifier_chain_unregister(&inet6addr_validator_chain,
120 nb);
120} 121}
121EXPORT_SYMBOL(unregister_inet6addr_validator_notifier); 122EXPORT_SYMBOL(unregister_inet6addr_validator_notifier);
122 123
123int inet6addr_validator_notifier_call_chain(unsigned long val, void *v) 124int inet6addr_validator_notifier_call_chain(unsigned long val, void *v)
124{ 125{
125 return atomic_notifier_call_chain(&inet6addr_validator_chain, val, v); 126 return blocking_notifier_call_chain(&inet6addr_validator_chain, val, v);
126} 127}
127EXPORT_SYMBOL(inet6addr_validator_notifier_call_chain); 128EXPORT_SYMBOL(inet6addr_validator_notifier_call_chain);
128 129
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index b055bc79f56d..00e1f8ee08f8 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * IPv6 Address Label subsystem 3 * IPv6 Address Label subsystem
3 * for the IPv6 "Default" Source Address Selection 4 * for the IPv6 "Default" Source Address Selection
@@ -18,7 +19,6 @@
18#include <linux/if_addrlabel.h> 19#include <linux/if_addrlabel.h>
19#include <linux/netlink.h> 20#include <linux/netlink.h>
20#include <linux/rtnetlink.h> 21#include <linux/rtnetlink.h>
21#include <linux/refcount.h>
22 22
23#if 0 23#if 0
24#define ADDRLABEL(x...) printk(x) 24#define ADDRLABEL(x...) printk(x)
@@ -30,30 +30,15 @@
30 * Policy Table 30 * Policy Table
31 */ 31 */
32struct ip6addrlbl_entry { 32struct ip6addrlbl_entry {
33 possible_net_t lbl_net;
34 struct in6_addr prefix; 33 struct in6_addr prefix;
35 int prefixlen; 34 int prefixlen;
36 int ifindex; 35 int ifindex;
37 int addrtype; 36 int addrtype;
38 u32 label; 37 u32 label;
39 struct hlist_node list; 38 struct hlist_node list;
40 refcount_t refcnt;
41 struct rcu_head rcu; 39 struct rcu_head rcu;
42}; 40};
43 41
44static struct ip6addrlbl_table
45{
46 struct hlist_head head;
47 spinlock_t lock;
48 u32 seq;
49} ip6addrlbl_table;
50
51static inline
52struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
53{
54 return read_pnet(&lbl->lbl_net);
55}
56
57/* 42/*
58 * Default policy table (RFC6724 + extensions) 43 * Default policy table (RFC6724 + extensions)
59 * 44 *
@@ -125,36 +110,11 @@ static const __net_initconst struct ip6addrlbl_init_table
125 } 110 }
126}; 111};
127 112
128/* Object management */
129static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p)
130{
131 kfree(p);
132}
133
134static void ip6addrlbl_free_rcu(struct rcu_head *h)
135{
136 ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu));
137}
138
139static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p)
140{
141 return refcount_inc_not_zero(&p->refcnt);
142}
143
144static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
145{
146 if (refcount_dec_and_test(&p->refcnt))
147 call_rcu(&p->rcu, ip6addrlbl_free_rcu);
148}
149
150/* Find label */ 113/* Find label */
151static bool __ip6addrlbl_match(struct net *net, 114static bool __ip6addrlbl_match(const struct ip6addrlbl_entry *p,
152 const struct ip6addrlbl_entry *p,
153 const struct in6_addr *addr, 115 const struct in6_addr *addr,
154 int addrtype, int ifindex) 116 int addrtype, int ifindex)
155{ 117{
156 if (!net_eq(ip6addrlbl_net(p), net))
157 return false;
158 if (p->ifindex && p->ifindex != ifindex) 118 if (p->ifindex && p->ifindex != ifindex)
159 return false; 119 return false;
160 if (p->addrtype && p->addrtype != addrtype) 120 if (p->addrtype && p->addrtype != addrtype)
@@ -169,8 +129,9 @@ static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net,
169 int type, int ifindex) 129 int type, int ifindex)
170{ 130{
171 struct ip6addrlbl_entry *p; 131 struct ip6addrlbl_entry *p;
172 hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { 132
173 if (__ip6addrlbl_match(net, p, addr, type, ifindex)) 133 hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
134 if (__ip6addrlbl_match(p, addr, type, ifindex))
174 return p; 135 return p;
175 } 136 }
176 return NULL; 137 return NULL;
@@ -196,8 +157,7 @@ u32 ipv6_addr_label(struct net *net,
196} 157}
197 158
198/* allocate one entry */ 159/* allocate one entry */
199static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, 160static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
200 const struct in6_addr *prefix,
201 int prefixlen, int ifindex, 161 int prefixlen, int ifindex,
202 u32 label) 162 u32 label)
203{ 163{
@@ -236,24 +196,22 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
236 newp->addrtype = addrtype; 196 newp->addrtype = addrtype;
237 newp->label = label; 197 newp->label = label;
238 INIT_HLIST_NODE(&newp->list); 198 INIT_HLIST_NODE(&newp->list);
239 write_pnet(&newp->lbl_net, net);
240 refcount_set(&newp->refcnt, 1);
241 return newp; 199 return newp;
242} 200}
243 201
244/* add a label */ 202/* add a label */
245static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) 203static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp,
204 int replace)
246{ 205{
247 struct hlist_node *n;
248 struct ip6addrlbl_entry *last = NULL, *p = NULL; 206 struct ip6addrlbl_entry *last = NULL, *p = NULL;
207 struct hlist_node *n;
249 int ret = 0; 208 int ret = 0;
250 209
251 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp, 210 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp,
252 replace); 211 replace);
253 212
254 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 213 hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
255 if (p->prefixlen == newp->prefixlen && 214 if (p->prefixlen == newp->prefixlen &&
256 net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
257 p->ifindex == newp->ifindex && 215 p->ifindex == newp->ifindex &&
258 ipv6_addr_equal(&p->prefix, &newp->prefix)) { 216 ipv6_addr_equal(&p->prefix, &newp->prefix)) {
259 if (!replace) { 217 if (!replace) {
@@ -261,7 +219,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
261 goto out; 219 goto out;
262 } 220 }
263 hlist_replace_rcu(&p->list, &newp->list); 221 hlist_replace_rcu(&p->list, &newp->list);
264 ip6addrlbl_put(p); 222 kfree_rcu(p, rcu);
265 goto out; 223 goto out;
266 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || 224 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
267 (p->prefixlen < newp->prefixlen)) { 225 (p->prefixlen < newp->prefixlen)) {
@@ -273,10 +231,10 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
273 if (last) 231 if (last)
274 hlist_add_behind_rcu(&newp->list, &last->list); 232 hlist_add_behind_rcu(&newp->list, &last->list);
275 else 233 else
276 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); 234 hlist_add_head_rcu(&newp->list, &net->ipv6.ip6addrlbl_table.head);
277out: 235out:
278 if (!ret) 236 if (!ret)
279 ip6addrlbl_table.seq++; 237 net->ipv6.ip6addrlbl_table.seq++;
280 return ret; 238 return ret;
281} 239}
282 240
@@ -292,14 +250,14 @@ static int ip6addrlbl_add(struct net *net,
292 __func__, prefix, prefixlen, ifindex, (unsigned int)label, 250 __func__, prefix, prefixlen, ifindex, (unsigned int)label,
293 replace); 251 replace);
294 252
295 newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); 253 newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label);
296 if (IS_ERR(newp)) 254 if (IS_ERR(newp))
297 return PTR_ERR(newp); 255 return PTR_ERR(newp);
298 spin_lock(&ip6addrlbl_table.lock); 256 spin_lock(&net->ipv6.ip6addrlbl_table.lock);
299 ret = __ip6addrlbl_add(newp, replace); 257 ret = __ip6addrlbl_add(net, newp, replace);
300 spin_unlock(&ip6addrlbl_table.lock); 258 spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
301 if (ret) 259 if (ret)
302 ip6addrlbl_free(newp); 260 kfree(newp);
303 return ret; 261 return ret;
304} 262}
305 263
@@ -315,13 +273,12 @@ static int __ip6addrlbl_del(struct net *net,
315 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 273 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
316 __func__, prefix, prefixlen, ifindex); 274 __func__, prefix, prefixlen, ifindex);
317 275
318 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 276 hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
319 if (p->prefixlen == prefixlen && 277 if (p->prefixlen == prefixlen &&
320 net_eq(ip6addrlbl_net(p), net) &&
321 p->ifindex == ifindex && 278 p->ifindex == ifindex &&
322 ipv6_addr_equal(&p->prefix, prefix)) { 279 ipv6_addr_equal(&p->prefix, prefix)) {
323 hlist_del_rcu(&p->list); 280 hlist_del_rcu(&p->list);
324 ip6addrlbl_put(p); 281 kfree_rcu(p, rcu);
325 ret = 0; 282 ret = 0;
326 break; 283 break;
327 } 284 }
@@ -340,9 +297,9 @@ static int ip6addrlbl_del(struct net *net,
340 __func__, prefix, prefixlen, ifindex); 297 __func__, prefix, prefixlen, ifindex);
341 298
342 ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); 299 ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
343 spin_lock(&ip6addrlbl_table.lock); 300 spin_lock(&net->ipv6.ip6addrlbl_table.lock);
344 ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex); 301 ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex);
345 spin_unlock(&ip6addrlbl_table.lock); 302 spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
346 return ret; 303 return ret;
347} 304}
348 305
@@ -354,6 +311,9 @@ static int __net_init ip6addrlbl_net_init(struct net *net)
354 311
355 ADDRLABEL(KERN_DEBUG "%s\n", __func__); 312 ADDRLABEL(KERN_DEBUG "%s\n", __func__);
356 313
314 spin_lock_init(&net->ipv6.ip6addrlbl_table.lock);
315 INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head);
316
357 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { 317 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
358 int ret = ip6addrlbl_add(net, 318 int ret = ip6addrlbl_add(net,
359 ip6addrlbl_init_table[i].prefix, 319 ip6addrlbl_init_table[i].prefix,
@@ -373,14 +333,12 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net)
373 struct hlist_node *n; 333 struct hlist_node *n;
374 334
375 /* Remove all labels belonging to the exiting net */ 335 /* Remove all labels belonging to the exiting net */
376 spin_lock(&ip6addrlbl_table.lock); 336 spin_lock(&net->ipv6.ip6addrlbl_table.lock);
377 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 337 hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
378 if (net_eq(ip6addrlbl_net(p), net)) { 338 hlist_del_rcu(&p->list);
379 hlist_del_rcu(&p->list); 339 kfree_rcu(p, rcu);
380 ip6addrlbl_put(p);
381 }
382 } 340 }
383 spin_unlock(&ip6addrlbl_table.lock); 341 spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
384} 342}
385 343
386static struct pernet_operations ipv6_addr_label_ops = { 344static struct pernet_operations ipv6_addr_label_ops = {
@@ -390,8 +348,6 @@ static struct pernet_operations ipv6_addr_label_ops = {
390 348
391int __init ipv6_addr_label_init(void) 349int __init ipv6_addr_label_init(void)
392{ 350{
393 spin_lock_init(&ip6addrlbl_table.lock);
394
395 return register_pernet_subsys(&ipv6_addr_label_ops); 351 return register_pernet_subsys(&ipv6_addr_label_ops);
396} 352}
397 353
@@ -510,11 +466,10 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
510 int err; 466 int err;
511 467
512 rcu_read_lock(); 468 rcu_read_lock();
513 hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { 469 hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
514 if (idx >= s_idx && 470 if (idx >= s_idx) {
515 net_eq(ip6addrlbl_net(p), net)) {
516 err = ip6addrlbl_fill(skb, p, 471 err = ip6addrlbl_fill(skb, p,
517 ip6addrlbl_table.seq, 472 net->ipv6.ip6addrlbl_table.seq,
518 NETLINK_CB(cb->skb).portid, 473 NETLINK_CB(cb->skb).portid,
519 cb->nlh->nlmsg_seq, 474 cb->nlh->nlmsg_seq,
520 RTM_NEWADDRLABEL, 475 RTM_NEWADDRLABEL,
@@ -567,38 +522,28 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
567 return -EINVAL; 522 return -EINVAL;
568 addr = nla_data(tb[IFAL_ADDRESS]); 523 addr = nla_data(tb[IFAL_ADDRESS]);
569 524
570 rcu_read_lock();
571 p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
572 if (p && !ip6addrlbl_hold(p))
573 p = NULL;
574 lseq = ip6addrlbl_table.seq;
575 rcu_read_unlock();
576
577 if (!p) {
578 err = -ESRCH;
579 goto out;
580 }
581
582 skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL); 525 skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL);
583 if (!skb) { 526 if (!skb)
584 ip6addrlbl_put(p);
585 return -ENOBUFS; 527 return -ENOBUFS;
586 }
587 528
588 err = ip6addrlbl_fill(skb, p, lseq, 529 err = -ESRCH;
589 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
590 RTM_NEWADDRLABEL, 0);
591 530
592 ip6addrlbl_put(p); 531 rcu_read_lock();
532 p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
533 lseq = net->ipv6.ip6addrlbl_table.seq;
534 if (p)
535 err = ip6addrlbl_fill(skb, p, lseq,
536 NETLINK_CB(in_skb).portid,
537 nlh->nlmsg_seq,
538 RTM_NEWADDRLABEL, 0);
539 rcu_read_unlock();
593 540
594 if (err < 0) { 541 if (err < 0) {
595 WARN_ON(err == -EMSGSIZE); 542 WARN_ON(err == -EMSGSIZE);
596 kfree_skb(skb); 543 kfree_skb(skb);
597 goto out; 544 } else {
545 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
598 } 546 }
599
600 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
601out:
602 return err; 547 return err;
603} 548}
604 549
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index fe5262fd6aa5..c26f71234b9c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -810,6 +810,10 @@ static int __net_init inet6_net_init(struct net *net)
810 net->ipv6.sysctl.idgen_retries = 3; 810 net->ipv6.sysctl.idgen_retries = 3;
811 net->ipv6.sysctl.idgen_delay = 1 * HZ; 811 net->ipv6.sysctl.idgen_delay = 1 * HZ;
812 net->ipv6.sysctl.flowlabel_state_ranges = 0; 812 net->ipv6.sysctl.flowlabel_state_ranges = 0;
813 net->ipv6.sysctl.max_dst_opts_cnt = IP6_DEFAULT_MAX_DST_OPTS_CNT;
814 net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT;
815 net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN;
816 net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN;
813 atomic_set(&net->ipv6.fib6_sernum, 1); 817 atomic_set(&net->ipv6.fib6_sernum, 1);
814 818
815 err = ipv6_init_mibs(net); 819 err = ipv6_init_mibs(net);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 7802b72196f3..78c974391567 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -271,6 +271,7 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
271 case NEXTHDR_DEST: 271 case NEXTHDR_DEST:
272 if (dir == XFRM_POLICY_OUT) 272 if (dir == XFRM_POLICY_OUT)
273 ipv6_rearrange_destopt(iph, exthdr.opth); 273 ipv6_rearrange_destopt(iph, exthdr.opth);
274 /* fall through */
274 case NEXTHDR_HOP: 275 case NEXTHDR_HOP:
275 if (!zero_out_mutable_opts(exthdr.opth)) { 276 if (!zero_out_mutable_opts(exthdr.opth)) {
276 net_dbg_ratelimited("overrun %sopts\n", 277 net_dbg_ratelimited("overrun %sopts\n",
@@ -443,7 +444,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
443 if (err == -EINPROGRESS) 444 if (err == -EINPROGRESS)
444 goto out; 445 goto out;
445 446
446 if (err == -EBUSY) 447 if (err == -ENOSPC)
447 err = NET_XMIT_DROP; 448 err = NET_XMIT_DROP;
448 goto out_free; 449 goto out_free;
449 } 450 }
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 89910e2c10f4..a902ff8f59be 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -396,7 +396,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
396 case -EINPROGRESS: 396 case -EINPROGRESS:
397 goto error; 397 goto error;
398 398
399 case -EBUSY: 399 case -ENOSPC:
400 err = NET_XMIT_DROP; 400 err = NET_XMIT_DROP;
401 break; 401 break;
402 402
@@ -483,8 +483,8 @@ static inline int esp_remove_trailer(struct sk_buff *skb)
483 goto out; 483 goto out;
484 } 484 }
485 485
486 if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2)) 486 ret = skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2);
487 BUG(); 487 BUG_ON(ret);
488 488
489 ret = -EINVAL; 489 ret = -EINVAL;
490 padlen = nexthdr[0]; 490 padlen = nexthdr[0];
@@ -559,14 +559,14 @@ static void esp_input_restore_header(struct sk_buff *skb)
559static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi) 559static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
560{ 560{
561 struct xfrm_state *x = xfrm_input_state(skb); 561 struct xfrm_state *x = xfrm_input_state(skb);
562 struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data;
563 562
564 /* For ESN we move the header forward by 4 bytes to 563 /* For ESN we move the header forward by 4 bytes to
565 * accomodate the high bits. We will move it back after 564 * accomodate the high bits. We will move it back after
566 * decryption. 565 * decryption.
567 */ 566 */
568 if ((x->props.flags & XFRM_STATE_ESN)) { 567 if ((x->props.flags & XFRM_STATE_ESN)) {
569 esph = skb_push(skb, 4); 568 struct ip_esp_hdr *esph = skb_push(skb, 4);
569
570 *seqhi = esph->spi; 570 *seqhi = esph->spi;
571 esph->spi = esph->seq_no; 571 esph->spi = esph->seq_no;
572 esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; 572 esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 95516138e861..83bd75713535 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -74,8 +74,20 @@ struct tlvtype_proc {
74 74
75/* An unknown option is detected, decide what to do */ 75/* An unknown option is detected, decide what to do */
76 76
77static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) 77static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff,
78 bool disallow_unknowns)
78{ 79{
80 if (disallow_unknowns) {
81 /* If unknown TLVs are disallowed by configuration
82 * then always silently drop packet. Note this also
83 * means no ICMP parameter problem is sent which
84 * could be a good property to mitigate a reflection DOS
85 * attack.
86 */
87
88 goto drop;
89 }
90
79 switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) { 91 switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
80 case 0: /* ignore */ 92 case 0: /* ignore */
81 return true; 93 return true;
@@ -89,25 +101,36 @@ static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
89 */ 101 */
90 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) 102 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr))
91 break; 103 break;
104 /* fall through */
92 case 2: /* send ICMP PARM PROB regardless and drop packet */ 105 case 2: /* send ICMP PARM PROB regardless and drop packet */
93 icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff); 106 icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
94 return false; 107 return false;
95 } 108 }
96 109
110drop:
97 kfree_skb(skb); 111 kfree_skb(skb);
98 return false; 112 return false;
99} 113}
100 114
101/* Parse tlv encoded option header (hop-by-hop or destination) */ 115/* Parse tlv encoded option header (hop-by-hop or destination) */
102 116
103static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb) 117static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
118 struct sk_buff *skb,
119 int max_count)
104{ 120{
105 const struct tlvtype_proc *curr; 121 int len = (skb_transport_header(skb)[1] + 1) << 3;
106 const unsigned char *nh = skb_network_header(skb); 122 const unsigned char *nh = skb_network_header(skb);
107 int off = skb_network_header_len(skb); 123 int off = skb_network_header_len(skb);
108 int len = (skb_transport_header(skb)[1] + 1) << 3; 124 const struct tlvtype_proc *curr;
125 bool disallow_unknowns = false;
126 int tlv_count = 0;
109 int padlen = 0; 127 int padlen = 0;
110 128
129 if (unlikely(max_count < 0)) {
130 disallow_unknowns = true;
131 max_count = -max_count;
132 }
133
111 if (skb_transport_offset(skb) + len > skb_headlen(skb)) 134 if (skb_transport_offset(skb) + len > skb_headlen(skb))
112 goto bad; 135 goto bad;
113 136
@@ -148,6 +171,11 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
148 default: /* Other TLV code so scan list */ 171 default: /* Other TLV code so scan list */
149 if (optlen > len) 172 if (optlen > len)
150 goto bad; 173 goto bad;
174
175 tlv_count++;
176 if (tlv_count > max_count)
177 goto bad;
178
151 for (curr = procs; curr->type >= 0; curr++) { 179 for (curr = procs; curr->type >= 0; curr++) {
152 if (curr->type == nh[off]) { 180 if (curr->type == nh[off]) {
153 /* type specific length/alignment 181 /* type specific length/alignment
@@ -158,10 +186,10 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
158 break; 186 break;
159 } 187 }
160 } 188 }
161 if (curr->type < 0) { 189 if (curr->type < 0 &&
162 if (ip6_tlvopt_unknown(skb, off) == 0) 190 !ip6_tlvopt_unknown(skb, off, disallow_unknowns))
163 return false; 191 return false;
164 } 192
165 padlen = 0; 193 padlen = 0;
166 break; 194 break;
167 } 195 }
@@ -186,7 +214,6 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
186 struct ipv6_destopt_hao *hao; 214 struct ipv6_destopt_hao *hao;
187 struct inet6_skb_parm *opt = IP6CB(skb); 215 struct inet6_skb_parm *opt = IP6CB(skb);
188 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 216 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
189 struct in6_addr tmp_addr;
190 int ret; 217 int ret;
191 218
192 if (opt->dsthao) { 219 if (opt->dsthao) {
@@ -228,9 +255,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
228 if (skb->ip_summed == CHECKSUM_COMPLETE) 255 if (skb->ip_summed == CHECKSUM_COMPLETE)
229 skb->ip_summed = CHECKSUM_NONE; 256 skb->ip_summed = CHECKSUM_NONE;
230 257
231 tmp_addr = ipv6h->saddr; 258 swap(ipv6h->saddr, hao->addr);
232 ipv6h->saddr = hao->addr;
233 hao->addr = tmp_addr;
234 259
235 if (skb->tstamp == 0) 260 if (skb->tstamp == 0)
236 __net_timestamp(skb); 261 __net_timestamp(skb);
@@ -260,23 +285,31 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
260 __u16 dstbuf; 285 __u16 dstbuf;
261#endif 286#endif
262 struct dst_entry *dst = skb_dst(skb); 287 struct dst_entry *dst = skb_dst(skb);
288 struct net *net = dev_net(skb->dev);
289 int extlen;
263 290
264 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || 291 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
265 !pskb_may_pull(skb, (skb_transport_offset(skb) + 292 !pskb_may_pull(skb, (skb_transport_offset(skb) +
266 ((skb_transport_header(skb)[1] + 1) << 3)))) { 293 ((skb_transport_header(skb)[1] + 1) << 3)))) {
267 __IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 294 __IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
268 IPSTATS_MIB_INHDRERRORS); 295 IPSTATS_MIB_INHDRERRORS);
296fail_and_free:
269 kfree_skb(skb); 297 kfree_skb(skb);
270 return -1; 298 return -1;
271 } 299 }
272 300
301 extlen = (skb_transport_header(skb)[1] + 1) << 3;
302 if (extlen > net->ipv6.sysctl.max_dst_opts_len)
303 goto fail_and_free;
304
273 opt->lastopt = opt->dst1 = skb_network_header_len(skb); 305 opt->lastopt = opt->dst1 = skb_network_header_len(skb);
274#if IS_ENABLED(CONFIG_IPV6_MIP6) 306#if IS_ENABLED(CONFIG_IPV6_MIP6)
275 dstbuf = opt->dst1; 307 dstbuf = opt->dst1;
276#endif 308#endif
277 309
278 if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { 310 if (ip6_parse_tlv(tlvprocdestopt_lst, skb,
279 skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; 311 init_net.ipv6.sysctl.max_dst_opts_cnt)) {
312 skb->transport_header += extlen;
280 opt = IP6CB(skb); 313 opt = IP6CB(skb);
281#if IS_ENABLED(CONFIG_IPV6_MIP6) 314#if IS_ENABLED(CONFIG_IPV6_MIP6)
282 opt->nhoff = dstbuf; 315 opt->nhoff = dstbuf;
@@ -805,6 +838,8 @@ static const struct tlvtype_proc tlvprochopopt_lst[] = {
805int ipv6_parse_hopopts(struct sk_buff *skb) 838int ipv6_parse_hopopts(struct sk_buff *skb)
806{ 839{
807 struct inet6_skb_parm *opt = IP6CB(skb); 840 struct inet6_skb_parm *opt = IP6CB(skb);
841 struct net *net = dev_net(skb->dev);
842 int extlen;
808 843
809 /* 844 /*
810 * skb_network_header(skb) is equal to skb->data, and 845 * skb_network_header(skb) is equal to skb->data, and
@@ -815,13 +850,19 @@ int ipv6_parse_hopopts(struct sk_buff *skb)
815 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) || 850 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) ||
816 !pskb_may_pull(skb, (sizeof(struct ipv6hdr) + 851 !pskb_may_pull(skb, (sizeof(struct ipv6hdr) +
817 ((skb_transport_header(skb)[1] + 1) << 3)))) { 852 ((skb_transport_header(skb)[1] + 1) << 3)))) {
853fail_and_free:
818 kfree_skb(skb); 854 kfree_skb(skb);
819 return -1; 855 return -1;
820 } 856 }
821 857
858 extlen = (skb_transport_header(skb)[1] + 1) << 3;
859 if (extlen > net->ipv6.sysctl.max_hbh_opts_len)
860 goto fail_and_free;
861
822 opt->flags |= IP6SKB_HOPBYHOP; 862 opt->flags |= IP6SKB_HOPBYHOP;
823 if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { 863 if (ip6_parse_tlv(tlvprochopopt_lst, skb,
824 skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; 864 init_net.ipv6.sysctl.max_hbh_opts_cnt)) {
865 skb->transport_header += extlen;
825 opt = IP6CB(skb); 866 opt = IP6CB(skb);
826 opt->nhoff = sizeof(struct ipv6hdr); 867 opt->nhoff = sizeof(struct ipv6hdr);
827 return 1; 868 return 1;
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 305e2ed730bf..11025f8d124b 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -99,7 +99,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
99 break; 99 break;
100 hdrlen = 8; 100 hdrlen = 8;
101 } else if (nexthdr == NEXTHDR_AUTH) 101 } else if (nexthdr == NEXTHDR_AUTH)
102 hdrlen = (hp->hdrlen+2)<<2; 102 hdrlen = ipv6_authlen(hp);
103 else 103 else
104 hdrlen = ipv6_optlen(hp); 104 hdrlen = ipv6_optlen(hp);
105 105
@@ -187,7 +187,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
187{ 187{
188 unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); 188 unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
189 u8 nexthdr = ipv6_hdr(skb)->nexthdr; 189 u8 nexthdr = ipv6_hdr(skb)->nexthdr;
190 unsigned int len;
191 bool found; 190 bool found;
192 191
193 if (fragoff) 192 if (fragoff)
@@ -204,7 +203,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
204 start = *offset + sizeof(struct ipv6hdr); 203 start = *offset + sizeof(struct ipv6hdr);
205 nexthdr = ip6->nexthdr; 204 nexthdr = ip6->nexthdr;
206 } 205 }
207 len = skb->len - start;
208 206
209 do { 207 do {
210 struct ipv6_opt_hdr _hdr, *hp; 208 struct ipv6_opt_hdr _hdr, *hp;
@@ -273,7 +271,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
273 271
274 if (!found) { 272 if (!found) {
275 nexthdr = hp->nexthdr; 273 nexthdr = hp->nexthdr;
276 len -= hdrlen;
277 start += hdrlen; 274 start += hdrlen;
278 } 275 }
279 } while (!found); 276 } while (!found);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 5acb54405b10..6ae5dd3f4d0d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -250,16 +250,15 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset)
250 return (*op & 0xC0) == 0x80; 250 return (*op & 0xC0) == 0x80;
251} 251}
252 252
253int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, 253void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
254 struct icmp6hdr *thdr, int len) 254 struct icmp6hdr *thdr, int len)
255{ 255{
256 struct sk_buff *skb; 256 struct sk_buff *skb;
257 struct icmp6hdr *icmp6h; 257 struct icmp6hdr *icmp6h;
258 int err = 0;
259 258
260 skb = skb_peek(&sk->sk_write_queue); 259 skb = skb_peek(&sk->sk_write_queue);
261 if (!skb) 260 if (!skb)
262 goto out; 261 return;
263 262
264 icmp6h = icmp6_hdr(skb); 263 icmp6h = icmp6_hdr(skb);
265 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr)); 264 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
@@ -287,8 +286,6 @@ int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
287 tmp_csum); 286 tmp_csum);
288 } 287 }
289 ip6_push_pending_frames(sk); 288 ip6_push_pending_frames(sk);
290out:
291 return err;
292} 289}
293 290
294struct icmpv6_msg { 291struct icmpv6_msg {
@@ -438,7 +435,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
438 int iif = 0; 435 int iif = 0;
439 int addr_type = 0; 436 int addr_type = 0;
440 int len; 437 int len;
441 int err = 0;
442 u32 mark = IP6_REPLY_MARK(net, skb->mark); 438 u32 mark = IP6_REPLY_MARK(net, skb->mark);
443 439
444 if ((u8 *)hdr < skb->head || 440 if ((u8 *)hdr < skb->head ||
@@ -575,17 +571,16 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
575 rcu_read_lock(); 571 rcu_read_lock();
576 idev = __in6_dev_get(skb->dev); 572 idev = __in6_dev_get(skb->dev);
577 573
578 err = ip6_append_data(sk, icmpv6_getfrag, &msg, 574 if (ip6_append_data(sk, icmpv6_getfrag, &msg,
579 len + sizeof(struct icmp6hdr), 575 len + sizeof(struct icmp6hdr),
580 sizeof(struct icmp6hdr), 576 sizeof(struct icmp6hdr),
581 &ipc6, &fl6, (struct rt6_info *)dst, 577 &ipc6, &fl6, (struct rt6_info *)dst,
582 MSG_DONTWAIT, &sockc_unused); 578 MSG_DONTWAIT, &sockc_unused)) {
583 if (err) {
584 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); 579 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
585 ip6_flush_pending_frames(sk); 580 ip6_flush_pending_frames(sk);
586 } else { 581 } else {
587 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, 582 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
588 len + sizeof(struct icmp6hdr)); 583 len + sizeof(struct icmp6hdr));
589 } 584 }
590 rcu_read_unlock(); 585 rcu_read_unlock();
591out_dst_release: 586out_dst_release:
@@ -682,7 +677,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
682 struct icmpv6_msg msg; 677 struct icmpv6_msg msg;
683 struct dst_entry *dst; 678 struct dst_entry *dst;
684 struct ipcm6_cookie ipc6; 679 struct ipcm6_cookie ipc6;
685 int err = 0;
686 u32 mark = IP6_REPLY_MARK(net, skb->mark); 680 u32 mark = IP6_REPLY_MARK(net, skb->mark);
687 struct sockcm_cookie sockc_unused = {0}; 681 struct sockcm_cookie sockc_unused = {0};
688 682
@@ -719,8 +713,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
719 else if (!fl6.flowi6_oif) 713 else if (!fl6.flowi6_oif)
720 fl6.flowi6_oif = np->ucast_oif; 714 fl6.flowi6_oif = np->ucast_oif;
721 715
722 err = ip6_dst_lookup(net, sk, &dst, &fl6); 716 if (ip6_dst_lookup(net, sk, &dst, &fl6))
723 if (err)
724 goto out; 717 goto out;
725 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); 718 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
726 if (IS_ERR(dst)) 719 if (IS_ERR(dst))
@@ -737,17 +730,16 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
737 ipc6.dontfrag = np->dontfrag; 730 ipc6.dontfrag = np->dontfrag;
738 ipc6.opt = NULL; 731 ipc6.opt = NULL;
739 732
740 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), 733 if (ip6_append_data(sk, icmpv6_getfrag, &msg,
741 sizeof(struct icmp6hdr), &ipc6, &fl6, 734 skb->len + sizeof(struct icmp6hdr),
742 (struct rt6_info *)dst, MSG_DONTWAIT, 735 sizeof(struct icmp6hdr), &ipc6, &fl6,
743 &sockc_unused); 736 (struct rt6_info *)dst, MSG_DONTWAIT,
744 737 &sockc_unused)) {
745 if (err) {
746 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); 738 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
747 ip6_flush_pending_frames(sk); 739 ip6_flush_pending_frames(sk);
748 } else { 740 } else {
749 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, 741 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
750 skb->len + sizeof(struct icmp6hdr)); 742 skb->len + sizeof(struct icmp6hdr));
751 } 743 }
752 dst_release(dst); 744 dst_release(dst);
753out: 745out:
@@ -872,10 +864,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
872 goto discard_it; 864 goto discard_it;
873 hdr = icmp6_hdr(skb); 865 hdr = icmp6_hdr(skb);
874 866
875 /* 867 /* to notify */
876 * Drop through to notify 868 /* fall through */
877 */
878
879 case ICMPV6_DEST_UNREACH: 869 case ICMPV6_DEST_UNREACH:
880 case ICMPV6_TIME_EXCEED: 870 case ICMPV6_TIME_EXCEED:
881 case ICMPV6_PARAMPROB: 871 case ICMPV6_PARAMPROB:
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index e0170f62bc39..3c7a11b62334 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -55,17 +55,6 @@ struct ila_identifier {
55 }; 55 };
56}; 56};
57 57
58enum {
59 ILA_ATYPE_IID = 0,
60 ILA_ATYPE_LUID,
61 ILA_ATYPE_VIRT_V4,
62 ILA_ATYPE_VIRT_UNI_V6,
63 ILA_ATYPE_VIRT_MULTI_V6,
64 ILA_ATYPE_RSVD_1,
65 ILA_ATYPE_RSVD_2,
66 ILA_ATYPE_RSVD_3,
67};
68
69#define CSUM_NEUTRAL_FLAG htonl(0x10000000) 58#define CSUM_NEUTRAL_FLAG htonl(0x10000000)
70 59
71struct ila_addr { 60struct ila_addr {
@@ -93,6 +82,7 @@ struct ila_params {
93 struct ila_locator locator_match; 82 struct ila_locator locator_match;
94 __wsum csum_diff; 83 __wsum csum_diff;
95 u8 csum_mode; 84 u8 csum_mode;
85 u8 ident_type;
96}; 86};
97 87
98static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) 88static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index aba0998ddbfb..8c88ecf29b93 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -13,30 +13,37 @@
13#include <uapi/linux/ila.h> 13#include <uapi/linux/ila.h>
14#include "ila.h" 14#include "ila.h"
15 15
16static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) 16void ila_init_saved_csum(struct ila_params *p)
17{ 17{
18 struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); 18 if (!p->locator_match.v64)
19 return;
19 20
21 p->csum_diff = compute_csum_diff8(
22 (__be32 *)&p->locator,
23 (__be32 *)&p->locator_match);
24}
25
26static __wsum get_csum_diff_iaddr(struct ila_addr *iaddr, struct ila_params *p)
27{
20 if (p->locator_match.v64) 28 if (p->locator_match.v64)
21 return p->csum_diff; 29 return p->csum_diff;
22 else 30 else
23 return compute_csum_diff8((__be32 *)&iaddr->loc, 31 return compute_csum_diff8((__be32 *)&p->locator,
24 (__be32 *)&p->locator); 32 (__be32 *)&iaddr->loc);
25} 33}
26 34
27static void ila_csum_do_neutral(struct ila_addr *iaddr, 35static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
28 struct ila_params *p) 36{
37 return get_csum_diff_iaddr(ila_a2i(&ip6h->daddr), p);
38}
39
40static void ila_csum_do_neutral_fmt(struct ila_addr *iaddr,
41 struct ila_params *p)
29{ 42{
30 __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3]; 43 __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3];
31 __wsum diff, fval; 44 __wsum diff, fval;
32 45
33 /* Check if checksum adjust value has been cached */ 46 diff = get_csum_diff_iaddr(iaddr, p);
34 if (p->locator_match.v64) {
35 diff = p->csum_diff;
36 } else {
37 diff = compute_csum_diff8((__be32 *)&p->locator,
38 (__be32 *)iaddr);
39 }
40 47
41 fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ? 48 fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ?
42 CSUM_NEUTRAL_FLAG : ~CSUM_NEUTRAL_FLAG); 49 CSUM_NEUTRAL_FLAG : ~CSUM_NEUTRAL_FLAG);
@@ -53,13 +60,23 @@ static void ila_csum_do_neutral(struct ila_addr *iaddr,
53 iaddr->ident.csum_neutral ^= 1; 60 iaddr->ident.csum_neutral ^= 1;
54} 61}
55 62
56static void ila_csum_adjust_transport(struct sk_buff *skb, 63static void ila_csum_do_neutral_nofmt(struct ila_addr *iaddr,
57 struct ila_params *p) 64 struct ila_params *p)
58{ 65{
66 __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3];
59 __wsum diff; 67 __wsum diff;
60 struct ipv6hdr *ip6h = ipv6_hdr(skb); 68
61 struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); 69 diff = get_csum_diff_iaddr(iaddr, p);
70
71 *adjust = ~csum_fold(csum_add(diff, csum_unfold(*adjust)));
72}
73
74static void ila_csum_adjust_transport(struct sk_buff *skb,
75 struct ila_params *p)
76{
62 size_t nhoff = sizeof(struct ipv6hdr); 77 size_t nhoff = sizeof(struct ipv6hdr);
78 struct ipv6hdr *ip6h = ipv6_hdr(skb);
79 __wsum diff;
63 80
64 switch (ip6h->nexthdr) { 81 switch (ip6h->nexthdr) {
65 case NEXTHDR_TCP: 82 case NEXTHDR_TCP:
@@ -98,52 +115,45 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
98 } 115 }
99 break; 116 break;
100 } 117 }
101
102 /* Now change destination address */
103 iaddr->loc = p->locator;
104} 118}
105 119
106void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p, 120void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p,
107 bool set_csum_neutral) 121 bool sir2ila)
108{ 122{
109 struct ipv6hdr *ip6h = ipv6_hdr(skb); 123 struct ipv6hdr *ip6h = ipv6_hdr(skb);
110 struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); 124 struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
111 125
112 /* First deal with the transport checksum */ 126 switch (p->csum_mode) {
113 if (ila_csum_neutral_set(iaddr->ident)) { 127 case ILA_CSUM_ADJUST_TRANSPORT:
114 /* C-bit is set in the locator indicating that this 128 ila_csum_adjust_transport(skb, p);
115 * is a locator being translated to a SIR address. 129 break;
116 * Perform (receiver) checksum-neutral translation. 130 case ILA_CSUM_NEUTRAL_MAP:
117 */ 131 if (sir2ila) {
118 if (!set_csum_neutral) 132 if (WARN_ON(ila_csum_neutral_set(iaddr->ident))) {
119 ila_csum_do_neutral(iaddr, p); 133 /* Checksum flag should never be
120 } else { 134 * set in a formatted SIR address.
121 switch (p->csum_mode) { 135 */
122 case ILA_CSUM_ADJUST_TRANSPORT: 136 break;
123 ila_csum_adjust_transport(skb, p); 137 }
124 break; 138 } else if (!ila_csum_neutral_set(iaddr->ident)) {
125 case ILA_CSUM_NEUTRAL_MAP: 139 /* ILA to SIR translation and C-bit isn't
126 ila_csum_do_neutral(iaddr, p); 140 * set so we're good.
127 break; 141 */
128 case ILA_CSUM_NO_ACTION:
129 break; 142 break;
130 } 143 }
144 ila_csum_do_neutral_fmt(iaddr, p);
145 break;
146 case ILA_CSUM_NEUTRAL_MAP_AUTO:
147 ila_csum_do_neutral_nofmt(iaddr, p);
148 break;
149 case ILA_CSUM_NO_ACTION:
150 break;
131 } 151 }
132 152
133 /* Now change destination address */ 153 /* Now change destination address */
134 iaddr->loc = p->locator; 154 iaddr->loc = p->locator;
135} 155}
136 156
137void ila_init_saved_csum(struct ila_params *p)
138{
139 if (!p->locator_match.v64)
140 return;
141
142 p->csum_diff = compute_csum_diff8(
143 (__be32 *)&p->locator,
144 (__be32 *)&p->locator_match);
145}
146
147static int __init ila_init(void) 157static int __init ila_init(void)
148{ 158{
149 int ret; 159 int ret;
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 0c02a09bc351..3d56a2fb6f86 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/errno.h> 2#include <linux/errno.h>
2#include <linux/ip.h> 3#include <linux/ip.h>
3#include <linux/kernel.h> 4#include <linux/kernel.h>
@@ -19,6 +20,7 @@ struct ila_lwt {
19 struct ila_params p; 20 struct ila_params p;
20 struct dst_cache dst_cache; 21 struct dst_cache dst_cache;
21 u32 connected : 1; 22 u32 connected : 1;
23 u32 lwt_output : 1;
22}; 24};
23 25
24static inline struct ila_lwt *ila_lwt_lwtunnel( 26static inline struct ila_lwt *ila_lwt_lwtunnel(
@@ -44,8 +46,10 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
44 if (skb->protocol != htons(ETH_P_IPV6)) 46 if (skb->protocol != htons(ETH_P_IPV6))
45 goto drop; 47 goto drop;
46 48
47 ila_update_ipv6_locator(skb, ila_params_lwtunnel(orig_dst->lwtstate), 49 if (ilwt->lwt_output)
48 true); 50 ila_update_ipv6_locator(skb,
51 ila_params_lwtunnel(orig_dst->lwtstate),
52 true);
49 53
50 if (rt->rt6i_flags & (RTF_GATEWAY | RTF_CACHE)) { 54 if (rt->rt6i_flags & (RTF_GATEWAY | RTF_CACHE)) {
51 /* Already have a next hop address in route, no need for 55 /* Already have a next hop address in route, no need for
@@ -97,11 +101,15 @@ drop:
97static int ila_input(struct sk_buff *skb) 101static int ila_input(struct sk_buff *skb)
98{ 102{
99 struct dst_entry *dst = skb_dst(skb); 103 struct dst_entry *dst = skb_dst(skb);
104 struct ila_lwt *ilwt = ila_lwt_lwtunnel(dst->lwtstate);
100 105
101 if (skb->protocol != htons(ETH_P_IPV6)) 106 if (skb->protocol != htons(ETH_P_IPV6))
102 goto drop; 107 goto drop;
103 108
104 ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), false); 109 if (!ilwt->lwt_output)
110 ila_update_ipv6_locator(skb,
111 ila_params_lwtunnel(dst->lwtstate),
112 false);
105 113
106 return dst->lwtstate->orig_input(skb); 114 return dst->lwtstate->orig_input(skb);
107 115
@@ -113,6 +121,8 @@ drop:
113static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { 121static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
114 [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, 122 [ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
115 [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, 123 [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
124 [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, },
125 [ILA_ATTR_HOOK_TYPE] = { .type = NLA_U8, },
116}; 126};
117 127
118static int ila_build_state(struct nlattr *nla, 128static int ila_build_state(struct nlattr *nla,
@@ -126,33 +136,84 @@ static int ila_build_state(struct nlattr *nla,
126 struct lwtunnel_state *newts; 136 struct lwtunnel_state *newts;
127 const struct fib6_config *cfg6 = cfg; 137 const struct fib6_config *cfg6 = cfg;
128 struct ila_addr *iaddr; 138 struct ila_addr *iaddr;
139 u8 ident_type = ILA_ATYPE_USE_FORMAT;
140 u8 hook_type = ILA_HOOK_ROUTE_OUTPUT;
141 u8 csum_mode = ILA_CSUM_NO_ACTION;
142 bool lwt_output = true;
143 u8 eff_ident_type;
129 int ret; 144 int ret;
130 145
131 if (family != AF_INET6) 146 if (family != AF_INET6)
132 return -EINVAL; 147 return -EINVAL;
133 148
134 if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) { 149 ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack);
135 /* Need to have full locator and at least type field 150 if (ret < 0)
136 * included in destination 151 return ret;
137 */ 152
153 if (!tb[ILA_ATTR_LOCATOR])
138 return -EINVAL; 154 return -EINVAL;
139 }
140 155
141 iaddr = (struct ila_addr *)&cfg6->fc_dst; 156 iaddr = (struct ila_addr *)&cfg6->fc_dst;
142 157
143 if (!ila_addr_is_ila(iaddr) || ila_csum_neutral_set(iaddr->ident)) { 158 if (tb[ILA_ATTR_IDENT_TYPE])
144 /* Don't allow translation for a non-ILA address or checksum 159 ident_type = nla_get_u8(tb[ILA_ATTR_IDENT_TYPE]);
145 * neutral flag to be set. 160
161 if (ident_type == ILA_ATYPE_USE_FORMAT) {
162 /* Infer identifier type from type field in formatted
163 * identifier.
146 */ 164 */
165
166 if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) {
167 /* Need to have full locator and at least type field
168 * included in destination
169 */
170 return -EINVAL;
171 }
172
173 eff_ident_type = iaddr->ident.type;
174 } else {
175 eff_ident_type = ident_type;
176 }
177
178 switch (eff_ident_type) {
179 case ILA_ATYPE_IID:
180 /* Don't allow ILA for IID type */
181 return -EINVAL;
182 case ILA_ATYPE_LUID:
183 break;
184 case ILA_ATYPE_VIRT_V4:
185 case ILA_ATYPE_VIRT_UNI_V6:
186 case ILA_ATYPE_VIRT_MULTI_V6:
187 case ILA_ATYPE_NONLOCAL_ADDR:
188 /* These ILA formats are not supported yet. */
189 default:
147 return -EINVAL; 190 return -EINVAL;
148 } 191 }
149 192
150 ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack); 193 if (tb[ILA_ATTR_HOOK_TYPE])
151 if (ret < 0) 194 hook_type = nla_get_u8(tb[ILA_ATTR_HOOK_TYPE]);
152 return ret; 195
196 switch (hook_type) {
197 case ILA_HOOK_ROUTE_OUTPUT:
198 lwt_output = true;
199 break;
200 case ILA_HOOK_ROUTE_INPUT:
201 lwt_output = false;
202 break;
203 default:
204 return -EINVAL;
205 }
153 206
154 if (!tb[ILA_ATTR_LOCATOR]) 207 if (tb[ILA_ATTR_CSUM_MODE])
208 csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]);
209
210 if (csum_mode == ILA_CSUM_NEUTRAL_MAP &&
211 ila_csum_neutral_set(iaddr->ident)) {
212 /* Don't allow translation if checksum neutral bit is
213 * configured and it's set in the SIR address.
214 */
155 return -EINVAL; 215 return -EINVAL;
216 }
156 217
157 newts = lwtunnel_state_alloc(sizeof(*ilwt)); 218 newts = lwtunnel_state_alloc(sizeof(*ilwt));
158 if (!newts) 219 if (!newts)
@@ -165,19 +226,18 @@ static int ila_build_state(struct nlattr *nla,
165 return ret; 226 return ret;
166 } 227 }
167 228
229 ilwt->lwt_output = !!lwt_output;
230
168 p = ila_params_lwtunnel(newts); 231 p = ila_params_lwtunnel(newts);
169 232
233 p->csum_mode = csum_mode;
234 p->ident_type = ident_type;
170 p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); 235 p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]);
171 236
172 /* Precompute checksum difference for translation since we 237 /* Precompute checksum difference for translation since we
173 * know both the old locator and the new one. 238 * know both the old locator and the new one.
174 */ 239 */
175 p->locator_match = iaddr->loc; 240 p->locator_match = iaddr->loc;
176 p->csum_diff = compute_csum_diff8(
177 (__be32 *)&p->locator_match, (__be32 *)&p->locator);
178
179 if (tb[ILA_ATTR_CSUM_MODE])
180 p->csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]);
181 241
182 ila_init_saved_csum(p); 242 ila_init_saved_csum(p);
183 243
@@ -202,13 +262,23 @@ static int ila_fill_encap_info(struct sk_buff *skb,
202 struct lwtunnel_state *lwtstate) 262 struct lwtunnel_state *lwtstate)
203{ 263{
204 struct ila_params *p = ila_params_lwtunnel(lwtstate); 264 struct ila_params *p = ila_params_lwtunnel(lwtstate);
265 struct ila_lwt *ilwt = ila_lwt_lwtunnel(lwtstate);
205 266
206 if (nla_put_u64_64bit(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator.v64, 267 if (nla_put_u64_64bit(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator.v64,
207 ILA_ATTR_PAD)) 268 ILA_ATTR_PAD))
208 goto nla_put_failure; 269 goto nla_put_failure;
270
209 if (nla_put_u8(skb, ILA_ATTR_CSUM_MODE, (__force u8)p->csum_mode)) 271 if (nla_put_u8(skb, ILA_ATTR_CSUM_MODE, (__force u8)p->csum_mode))
210 goto nla_put_failure; 272 goto nla_put_failure;
211 273
274 if (nla_put_u8(skb, ILA_ATTR_IDENT_TYPE, (__force u8)p->ident_type))
275 goto nla_put_failure;
276
277 if (nla_put_u8(skb, ILA_ATTR_HOOK_TYPE,
278 ilwt->lwt_output ? ILA_HOOK_ROUTE_OUTPUT :
279 ILA_HOOK_ROUTE_INPUT))
280 goto nla_put_failure;
281
212 return 0; 282 return 0;
213 283
214nla_put_failure: 284nla_put_failure:
@@ -219,6 +289,8 @@ static int ila_encap_nlsize(struct lwtunnel_state *lwtstate)
219{ 289{
220 return nla_total_size_64bit(sizeof(u64)) + /* ILA_ATTR_LOCATOR */ 290 return nla_total_size_64bit(sizeof(u64)) + /* ILA_ATTR_LOCATOR */
221 nla_total_size(sizeof(u8)) + /* ILA_ATTR_CSUM_MODE */ 291 nla_total_size(sizeof(u8)) + /* ILA_ATTR_CSUM_MODE */
292 nla_total_size(sizeof(u8)) + /* ILA_ATTR_IDENT_TYPE */
293 nla_total_size(sizeof(u8)) + /* ILA_ATTR_HOOK_TYPE */
222 0; 294 0;
223} 295}
224 296
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 5bd419c1abc8..6eb5e68f112a 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/jhash.h> 2#include <linux/jhash.h>
2#include <linux/netfilter.h> 3#include <linux/netfilter.h>
3#include <linux/rcupdate.h> 4#include <linux/rcupdate.h>
@@ -120,6 +121,7 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
120 [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, }, 121 [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
121 [ILA_ATTR_IFINDEX] = { .type = NLA_U32, }, 122 [ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
122 [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, 123 [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
124 [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, },
123}; 125};
124 126
125static int parse_nl_config(struct genl_info *info, 127static int parse_nl_config(struct genl_info *info,
@@ -137,6 +139,14 @@ static int parse_nl_config(struct genl_info *info,
137 139
138 if (info->attrs[ILA_ATTR_CSUM_MODE]) 140 if (info->attrs[ILA_ATTR_CSUM_MODE])
139 xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]); 141 xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]);
142 else
143 xp->ip.csum_mode = ILA_CSUM_NO_ACTION;
144
145 if (info->attrs[ILA_ATTR_IDENT_TYPE])
146 xp->ip.ident_type = nla_get_u8(
147 info->attrs[ILA_ATTR_IDENT_TYPE]);
148 else
149 xp->ip.ident_type = ILA_ATYPE_USE_FORMAT;
140 150
141 if (info->attrs[ILA_ATTR_IFINDEX]) 151 if (info->attrs[ILA_ATTR_IFINDEX])
142 xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]); 152 xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
@@ -197,7 +207,7 @@ static void ila_free_cb(void *ptr, void *arg)
197 } 207 }
198} 208}
199 209
200static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral); 210static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila);
201 211
202static unsigned int 212static unsigned int
203ila_nf_input(void *priv, 213ila_nf_input(void *priv,
@@ -395,7 +405,8 @@ static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg)
395 (__force u64)ila->xp.ip.locator_match.v64, 405 (__force u64)ila->xp.ip.locator_match.v64,
396 ILA_ATTR_PAD) || 406 ILA_ATTR_PAD) ||
397 nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) || 407 nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) ||
398 nla_put_u32(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode)) 408 nla_put_u8(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode) ||
409 nla_put_u8(msg, ILA_ATTR_IDENT_TYPE, ila->xp.ip.ident_type))
399 return -1; 410 return -1;
400 411
401 return 0; 412 return 0;
@@ -606,7 +617,7 @@ static struct pernet_operations ila_net_ops = {
606 .size = sizeof(struct ila_net), 617 .size = sizeof(struct ila_net),
607}; 618};
608 619
609static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral) 620static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
610{ 621{
611 struct ila_map *ila; 622 struct ila_map *ila;
612 struct ipv6hdr *ip6h = ipv6_hdr(skb); 623 struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -616,16 +627,16 @@ static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
616 627
617 /* Assumes skb contains a valid IPv6 header that is pulled */ 628 /* Assumes skb contains a valid IPv6 header that is pulled */
618 629
619 if (!ila_addr_is_ila(iaddr)) { 630 /* No check here that ILA type in the mapping matches what is in the
620 /* Type indicates this is not an ILA address */ 631 * address. We assume that whatever sender gaves us can be translated.
621 return 0; 632 * The checksum mode however is relevant.
622 } 633 */
623 634
624 rcu_read_lock(); 635 rcu_read_lock();
625 636
626 ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan); 637 ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan);
627 if (ila) 638 if (ila)
628 ila_update_ipv6_locator(skb, &ila->xp.ip, set_csum_neutral); 639 ila_update_ipv6_locator(skb, &ila->xp.ip, sir2ila);
629 640
630 rcu_read_unlock(); 641 rcu_read_unlock();
631 642
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index c0cbcb259f5a..ec43d18b5ff9 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <net/ip.h> 2#include <net/ip.h>
2#include <net/udp.h> 3#include <net/udp.h>
3#include <net/udplite.h> 4#include <net/udplite.h>
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index e5308d7cbd75..f5285f4e1d08 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -38,14 +38,6 @@
38#include <net/ip6_fib.h> 38#include <net/ip6_fib.h>
39#include <net/ip6_route.h> 39#include <net/ip6_route.h>
40 40
41#define RT6_DEBUG 2
42
43#if RT6_DEBUG >= 3
44#define RT6_TRACE(x...) pr_debug(x)
45#else
46#define RT6_TRACE(x...) do { ; } while (0)
47#endif
48
49static struct kmem_cache *fib6_node_kmem __read_mostly; 41static struct kmem_cache *fib6_node_kmem __read_mostly;
50 42
51struct fib6_cleaner { 43struct fib6_cleaner {
@@ -62,9 +54,12 @@ struct fib6_cleaner {
62#define FWS_INIT FWS_L 54#define FWS_INIT FWS_L
63#endif 55#endif
64 56
65static void fib6_prune_clones(struct net *net, struct fib6_node *fn); 57static struct rt6_info *fib6_find_prefix(struct net *net,
66static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); 58 struct fib6_table *table,
67static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); 59 struct fib6_node *fn);
60static struct fib6_node *fib6_repair_tree(struct net *net,
61 struct fib6_table *table,
62 struct fib6_node *fn);
68static int fib6_walk(struct net *net, struct fib6_walker *w); 63static int fib6_walk(struct net *net, struct fib6_walker *w);
69static int fib6_walk_continue(struct fib6_walker *w); 64static int fib6_walk_continue(struct fib6_walker *w);
70 65
@@ -75,7 +70,7 @@ static int fib6_walk_continue(struct fib6_walker *w);
75 * result of redirects, path MTU changes, etc. 70 * result of redirects, path MTU changes, etc.
76 */ 71 */
77 72
78static void fib6_gc_timer_cb(unsigned long arg); 73static void fib6_gc_timer_cb(struct timer_list *t);
79 74
80#define FOR_WALKERS(net, w) \ 75#define FOR_WALKERS(net, w) \
81 list_for_each_entry(w, &(net)->ipv6.fib6_walkers, lh) 76 list_for_each_entry(w, &(net)->ipv6.fib6_walkers, lh)
@@ -110,6 +105,20 @@ enum {
110 FIB6_NO_SERNUM_CHANGE = 0, 105 FIB6_NO_SERNUM_CHANGE = 0,
111}; 106};
112 107
108void fib6_update_sernum(struct rt6_info *rt)
109{
110 struct fib6_table *table = rt->rt6i_table;
111 struct net *net = dev_net(rt->dst.dev);
112 struct fib6_node *fn;
113
114 spin_lock_bh(&table->tb6_lock);
115 fn = rcu_dereference_protected(rt->rt6i_node,
116 lockdep_is_held(&table->tb6_lock));
117 if (fn)
118 fn->fn_sernum = fib6_new_sernum(net);
119 spin_unlock_bh(&table->tb6_lock);
120}
121
113/* 122/*
114 * Auxiliary address test functions for the radix tree. 123 * Auxiliary address test functions for the radix tree.
115 * 124 *
@@ -140,18 +149,21 @@ static __be32 addr_bit_set(const void *token, int fn_bit)
140 addr[fn_bit >> 5]; 149 addr[fn_bit >> 5];
141} 150}
142 151
143static struct fib6_node *node_alloc(void) 152static struct fib6_node *node_alloc(struct net *net)
144{ 153{
145 struct fib6_node *fn; 154 struct fib6_node *fn;
146 155
147 fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC); 156 fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC);
157 if (fn)
158 net->ipv6.rt6_stats->fib_nodes++;
148 159
149 return fn; 160 return fn;
150} 161}
151 162
152static void node_free_immediate(struct fib6_node *fn) 163static void node_free_immediate(struct net *net, struct fib6_node *fn)
153{ 164{
154 kmem_cache_free(fib6_node_kmem, fn); 165 kmem_cache_free(fib6_node_kmem, fn);
166 net->ipv6.rt6_stats->fib_nodes--;
155} 167}
156 168
157static void node_free_rcu(struct rcu_head *head) 169static void node_free_rcu(struct rcu_head *head)
@@ -161,9 +173,10 @@ static void node_free_rcu(struct rcu_head *head)
161 kmem_cache_free(fib6_node_kmem, fn); 173 kmem_cache_free(fib6_node_kmem, fn);
162} 174}
163 175
164static void node_free(struct fib6_node *fn) 176static void node_free(struct net *net, struct fib6_node *fn)
165{ 177{
166 call_rcu(&fn->rcu, node_free_rcu); 178 call_rcu(&fn->rcu, node_free_rcu);
179 net->ipv6.rt6_stats->fib_nodes--;
167} 180}
168 181
169void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) 182void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
@@ -185,9 +198,6 @@ void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
185 *ppcpu_rt = NULL; 198 *ppcpu_rt = NULL;
186 } 199 }
187 } 200 }
188
189 free_percpu(non_pcpu_rt->rt6i_pcpu);
190 non_pcpu_rt->rt6i_pcpu = NULL;
191} 201}
192EXPORT_SYMBOL_GPL(rt6_free_pcpu); 202EXPORT_SYMBOL_GPL(rt6_free_pcpu);
193 203
@@ -205,8 +215,7 @@ static void fib6_link_table(struct net *net, struct fib6_table *tb)
205 * Initialize table lock at a single place to give lockdep a key, 215 * Initialize table lock at a single place to give lockdep a key,
206 * tables aren't visible prior to being linked to the list. 216 * tables aren't visible prior to being linked to the list.
207 */ 217 */
208 rwlock_init(&tb->tb6_lock); 218 spin_lock_init(&tb->tb6_lock);
209
210 h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1); 219 h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1);
211 220
212 /* 221 /*
@@ -225,7 +234,8 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
225 table = kzalloc(sizeof(*table), GFP_ATOMIC); 234 table = kzalloc(sizeof(*table), GFP_ATOMIC);
226 if (table) { 235 if (table) {
227 table->tb6_id = id; 236 table->tb6_id = id;
228 table->tb6_root.leaf = net->ipv6.ip6_null_entry; 237 rcu_assign_pointer(table->tb6_root.leaf,
238 net->ipv6.ip6_null_entry);
229 table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; 239 table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
230 inet_peer_base_init(&table->tb6_peers); 240 inet_peer_base_init(&table->tb6_peers);
231 } 241 }
@@ -322,11 +332,8 @@ unsigned int fib6_tables_seq_read(struct net *net)
322 struct hlist_head *head = &net->ipv6.fib_table_hash[h]; 332 struct hlist_head *head = &net->ipv6.fib_table_hash[h];
323 struct fib6_table *tb; 333 struct fib6_table *tb;
324 334
325 hlist_for_each_entry_rcu(tb, head, tb6_hlist) { 335 hlist_for_each_entry_rcu(tb, head, tb6_hlist)
326 read_lock_bh(&tb->tb6_lock);
327 fib_seq += tb->fib_seq; 336 fib_seq += tb->fib_seq;
328 read_unlock_bh(&tb->tb6_lock);
329 }
330 } 337 }
331 rcu_read_unlock(); 338 rcu_read_unlock();
332 339
@@ -346,9 +353,11 @@ static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
346 353
347static int call_fib6_entry_notifiers(struct net *net, 354static int call_fib6_entry_notifiers(struct net *net,
348 enum fib_event_type event_type, 355 enum fib_event_type event_type,
349 struct rt6_info *rt) 356 struct rt6_info *rt,
357 struct netlink_ext_ack *extack)
350{ 358{
351 struct fib6_entry_notifier_info info = { 359 struct fib6_entry_notifier_info info = {
360 .info.extack = extack,
352 .rt = rt, 361 .rt = rt,
353 }; 362 };
354 363
@@ -372,7 +381,7 @@ static int fib6_node_dump(struct fib6_walker *w)
372{ 381{
373 struct rt6_info *rt; 382 struct rt6_info *rt;
374 383
375 for (rt = w->leaf; rt; rt = rt->dst.rt6_next) 384 for_each_fib6_walker_rt(w)
376 fib6_rt_dump(rt, w->args); 385 fib6_rt_dump(rt, w->args);
377 w->leaf = NULL; 386 w->leaf = NULL;
378 return 0; 387 return 0;
@@ -382,9 +391,9 @@ static void fib6_table_dump(struct net *net, struct fib6_table *tb,
382 struct fib6_walker *w) 391 struct fib6_walker *w)
383{ 392{
384 w->root = &tb->tb6_root; 393 w->root = &tb->tb6_root;
385 read_lock_bh(&tb->tb6_lock); 394 spin_lock_bh(&tb->tb6_lock);
386 fib6_walk(net, w); 395 fib6_walk(net, w);
387 read_unlock_bh(&tb->tb6_lock); 396 spin_unlock_bh(&tb->tb6_lock);
388} 397}
389 398
390/* Called with rcu_read_lock() */ 399/* Called with rcu_read_lock() */
@@ -421,7 +430,7 @@ static int fib6_dump_node(struct fib6_walker *w)
421 int res; 430 int res;
422 struct rt6_info *rt; 431 struct rt6_info *rt;
423 432
424 for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { 433 for_each_fib6_walker_rt(w) {
425 res = rt6_dump_route(rt, w->args); 434 res = rt6_dump_route(rt, w->args);
426 if (res < 0) { 435 if (res < 0) {
427 /* Frame is full, suspend walking */ 436 /* Frame is full, suspend walking */
@@ -480,9 +489,9 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
480 w->count = 0; 489 w->count = 0;
481 w->skip = 0; 490 w->skip = 0;
482 491
483 read_lock_bh(&table->tb6_lock); 492 spin_lock_bh(&table->tb6_lock);
484 res = fib6_walk(net, w); 493 res = fib6_walk(net, w);
485 read_unlock_bh(&table->tb6_lock); 494 spin_unlock_bh(&table->tb6_lock);
486 if (res > 0) { 495 if (res > 0) {
487 cb->args[4] = 1; 496 cb->args[4] = 1;
488 cb->args[5] = w->root->fn_sernum; 497 cb->args[5] = w->root->fn_sernum;
@@ -497,9 +506,9 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
497 } else 506 } else
498 w->skip = 0; 507 w->skip = 0;
499 508
500 read_lock_bh(&table->tb6_lock); 509 spin_lock_bh(&table->tb6_lock);
501 res = fib6_walk_continue(w); 510 res = fib6_walk_continue(w);
502 read_unlock_bh(&table->tb6_lock); 511 spin_unlock_bh(&table->tb6_lock);
503 if (res <= 0) { 512 if (res <= 0) {
504 fib6_walker_unlink(net, w); 513 fib6_walker_unlink(net, w);
505 cb->args[4] = 0; 514 cb->args[4] = 0;
@@ -580,11 +589,13 @@ out:
580 * node. 589 * node.
581 */ 590 */
582 591
583static struct fib6_node *fib6_add_1(struct fib6_node *root, 592static struct fib6_node *fib6_add_1(struct net *net,
584 struct in6_addr *addr, int plen, 593 struct fib6_table *table,
585 int offset, int allow_create, 594 struct fib6_node *root,
586 int replace_required, int sernum, 595 struct in6_addr *addr, int plen,
587 struct netlink_ext_ack *extack) 596 int offset, int allow_create,
597 int replace_required,
598 struct netlink_ext_ack *extack)
588{ 599{
589 struct fib6_node *fn, *in, *ln; 600 struct fib6_node *fn, *in, *ln;
590 struct fib6_node *pn = NULL; 601 struct fib6_node *pn = NULL;
@@ -599,7 +610,9 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
599 fn = root; 610 fn = root;
600 611
601 do { 612 do {
602 key = (struct rt6key *)((u8 *)fn->leaf + offset); 613 struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
614 lockdep_is_held(&table->tb6_lock));
615 key = (struct rt6key *)((u8 *)leaf + offset);
603 616
604 /* 617 /*
605 * Prefix match 618 * Prefix match
@@ -625,12 +638,10 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
625 if (plen == fn->fn_bit) { 638 if (plen == fn->fn_bit) {
626 /* clean up an intermediate node */ 639 /* clean up an intermediate node */
627 if (!(fn->fn_flags & RTN_RTINFO)) { 640 if (!(fn->fn_flags & RTN_RTINFO)) {
628 rt6_release(fn->leaf); 641 RCU_INIT_POINTER(fn->leaf, NULL);
629 fn->leaf = NULL; 642 rt6_release(leaf);
630 } 643 }
631 644
632 fn->fn_sernum = sernum;
633
634 return fn; 645 return fn;
635 } 646 }
636 647
@@ -639,10 +650,13 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
639 */ 650 */
640 651
641 /* Try to walk down on tree. */ 652 /* Try to walk down on tree. */
642 fn->fn_sernum = sernum;
643 dir = addr_bit_set(addr, fn->fn_bit); 653 dir = addr_bit_set(addr, fn->fn_bit);
644 pn = fn; 654 pn = fn;
645 fn = dir ? fn->right : fn->left; 655 fn = dir ?
656 rcu_dereference_protected(fn->right,
657 lockdep_is_held(&table->tb6_lock)) :
658 rcu_dereference_protected(fn->left,
659 lockdep_is_held(&table->tb6_lock));
646 } while (fn); 660 } while (fn);
647 661
648 if (!allow_create) { 662 if (!allow_create) {
@@ -668,19 +682,17 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
668 * Create new leaf node without children. 682 * Create new leaf node without children.
669 */ 683 */
670 684
671 ln = node_alloc(); 685 ln = node_alloc(net);
672 686
673 if (!ln) 687 if (!ln)
674 return ERR_PTR(-ENOMEM); 688 return ERR_PTR(-ENOMEM);
675 ln->fn_bit = plen; 689 ln->fn_bit = plen;
676 690 RCU_INIT_POINTER(ln->parent, pn);
677 ln->parent = pn;
678 ln->fn_sernum = sernum;
679 691
680 if (dir) 692 if (dir)
681 pn->right = ln; 693 rcu_assign_pointer(pn->right, ln);
682 else 694 else
683 pn->left = ln; 695 rcu_assign_pointer(pn->left, ln);
684 696
685 return ln; 697 return ln;
686 698
@@ -694,7 +706,8 @@ insert_above:
694 * and the current 706 * and the current
695 */ 707 */
696 708
697 pn = fn->parent; 709 pn = rcu_dereference_protected(fn->parent,
710 lockdep_is_held(&table->tb6_lock));
698 711
699 /* find 1st bit in difference between the 2 addrs. 712 /* find 1st bit in difference between the 2 addrs.
700 713
@@ -710,14 +723,14 @@ insert_above:
710 * (new leaf node)[ln] (old node)[fn] 723 * (new leaf node)[ln] (old node)[fn]
711 */ 724 */
712 if (plen > bit) { 725 if (plen > bit) {
713 in = node_alloc(); 726 in = node_alloc(net);
714 ln = node_alloc(); 727 ln = node_alloc(net);
715 728
716 if (!in || !ln) { 729 if (!in || !ln) {
717 if (in) 730 if (in)
718 node_free_immediate(in); 731 node_free_immediate(net, in);
719 if (ln) 732 if (ln)
720 node_free_immediate(ln); 733 node_free_immediate(net, ln);
721 return ERR_PTR(-ENOMEM); 734 return ERR_PTR(-ENOMEM);
722 } 735 }
723 736
@@ -731,31 +744,28 @@ insert_above:
731 744
732 in->fn_bit = bit; 745 in->fn_bit = bit;
733 746
734 in->parent = pn; 747 RCU_INIT_POINTER(in->parent, pn);
735 in->leaf = fn->leaf; 748 in->leaf = fn->leaf;
736 atomic_inc(&in->leaf->rt6i_ref); 749 atomic_inc(&rcu_dereference_protected(in->leaf,
737 750 lockdep_is_held(&table->tb6_lock))->rt6i_ref);
738 in->fn_sernum = sernum;
739 751
740 /* update parent pointer */ 752 /* update parent pointer */
741 if (dir) 753 if (dir)
742 pn->right = in; 754 rcu_assign_pointer(pn->right, in);
743 else 755 else
744 pn->left = in; 756 rcu_assign_pointer(pn->left, in);
745 757
746 ln->fn_bit = plen; 758 ln->fn_bit = plen;
747 759
748 ln->parent = in; 760 RCU_INIT_POINTER(ln->parent, in);
749 fn->parent = in; 761 rcu_assign_pointer(fn->parent, in);
750
751 ln->fn_sernum = sernum;
752 762
753 if (addr_bit_set(addr, bit)) { 763 if (addr_bit_set(addr, bit)) {
754 in->right = ln; 764 rcu_assign_pointer(in->right, ln);
755 in->left = fn; 765 rcu_assign_pointer(in->left, fn);
756 } else { 766 } else {
757 in->left = ln; 767 rcu_assign_pointer(in->left, ln);
758 in->right = fn; 768 rcu_assign_pointer(in->right, fn);
759 } 769 }
760 } else { /* plen <= bit */ 770 } else { /* plen <= bit */
761 771
@@ -765,28 +775,26 @@ insert_above:
765 * (old node)[fn] NULL 775 * (old node)[fn] NULL
766 */ 776 */
767 777
768 ln = node_alloc(); 778 ln = node_alloc(net);
769 779
770 if (!ln) 780 if (!ln)
771 return ERR_PTR(-ENOMEM); 781 return ERR_PTR(-ENOMEM);
772 782
773 ln->fn_bit = plen; 783 ln->fn_bit = plen;
774 784
775 ln->parent = pn; 785 RCU_INIT_POINTER(ln->parent, pn);
776
777 ln->fn_sernum = sernum;
778
779 if (dir)
780 pn->right = ln;
781 else
782 pn->left = ln;
783 786
784 if (addr_bit_set(&key->addr, plen)) 787 if (addr_bit_set(&key->addr, plen))
785 ln->right = fn; 788 RCU_INIT_POINTER(ln->right, fn);
786 else 789 else
787 ln->left = fn; 790 RCU_INIT_POINTER(ln->left, fn);
791
792 rcu_assign_pointer(fn->parent, ln);
788 793
789 fn->parent = ln; 794 if (dir)
795 rcu_assign_pointer(pn->right, ln);
796 else
797 rcu_assign_pointer(pn->left, ln);
790 } 798 }
791 return ln; 799 return ln;
792} 800}
@@ -832,6 +840,8 @@ static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc)
832static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, 840static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
833 struct net *net) 841 struct net *net)
834{ 842{
843 struct fib6_table *table = rt->rt6i_table;
844
835 if (atomic_read(&rt->rt6i_ref) != 1) { 845 if (atomic_read(&rt->rt6i_ref) != 1) {
836 /* This route is used as dummy address holder in some split 846 /* This route is used as dummy address holder in some split
837 * nodes. It is not leaked, but it still holds other resources, 847 * nodes. It is not leaked, but it still holds other resources,
@@ -840,12 +850,17 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
840 * to still alive ones. 850 * to still alive ones.
841 */ 851 */
842 while (fn) { 852 while (fn) {
843 if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) { 853 struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
844 fn->leaf = fib6_find_prefix(net, fn); 854 lockdep_is_held(&table->tb6_lock));
845 atomic_inc(&fn->leaf->rt6i_ref); 855 struct rt6_info *new_leaf;
856 if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
857 new_leaf = fib6_find_prefix(net, table, fn);
858 atomic_inc(&new_leaf->rt6i_ref);
859 rcu_assign_pointer(fn->leaf, new_leaf);
846 rt6_release(rt); 860 rt6_release(rt);
847 } 861 }
848 fn = fn->parent; 862 fn = rcu_dereference_protected(fn->parent,
863 lockdep_is_held(&table->tb6_lock));
849 } 864 }
850 } 865 }
851} 866}
@@ -855,11 +870,14 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
855 */ 870 */
856 871
857static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, 872static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
858 struct nl_info *info, struct mx6_config *mxc) 873 struct nl_info *info, struct mx6_config *mxc,
874 struct netlink_ext_ack *extack)
859{ 875{
876 struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
877 lockdep_is_held(&rt->rt6i_table->tb6_lock));
860 struct rt6_info *iter = NULL; 878 struct rt6_info *iter = NULL;
861 struct rt6_info **ins; 879 struct rt6_info __rcu **ins;
862 struct rt6_info **fallback_ins = NULL; 880 struct rt6_info __rcu **fallback_ins = NULL;
863 int replace = (info->nlh && 881 int replace = (info->nlh &&
864 (info->nlh->nlmsg_flags & NLM_F_REPLACE)); 882 (info->nlh->nlmsg_flags & NLM_F_REPLACE));
865 int add = (!info->nlh || 883 int add = (!info->nlh ||
@@ -874,7 +892,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
874 892
875 ins = &fn->leaf; 893 ins = &fn->leaf;
876 894
877 for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) { 895 for (iter = leaf; iter;
896 iter = rcu_dereference_protected(iter->dst.rt6_next,
897 lockdep_is_held(&rt->rt6i_table->tb6_lock))) {
878 /* 898 /*
879 * Search for duplicates 899 * Search for duplicates
880 */ 900 */
@@ -936,7 +956,8 @@ next_iter:
936 if (fallback_ins && !found) { 956 if (fallback_ins && !found) {
937 /* No ECMP-able route found, replace first non-ECMP one */ 957 /* No ECMP-able route found, replace first non-ECMP one */
938 ins = fallback_ins; 958 ins = fallback_ins;
939 iter = *ins; 959 iter = rcu_dereference_protected(*ins,
960 lockdep_is_held(&rt->rt6i_table->tb6_lock));
940 found++; 961 found++;
941 } 962 }
942 963
@@ -950,7 +971,7 @@ next_iter:
950 struct rt6_info *sibling, *temp_sibling; 971 struct rt6_info *sibling, *temp_sibling;
951 972
952 /* Find the first route that have the same metric */ 973 /* Find the first route that have the same metric */
953 sibling = fn->leaf; 974 sibling = leaf;
954 while (sibling) { 975 while (sibling) {
955 if (sibling->rt6i_metric == rt->rt6i_metric && 976 if (sibling->rt6i_metric == rt->rt6i_metric &&
956 rt6_qualify_for_ecmp(sibling)) { 977 rt6_qualify_for_ecmp(sibling)) {
@@ -958,7 +979,8 @@ next_iter:
958 &sibling->rt6i_siblings); 979 &sibling->rt6i_siblings);
959 break; 980 break;
960 } 981 }
961 sibling = sibling->dst.rt6_next; 982 sibling = rcu_dereference_protected(sibling->dst.rt6_next,
983 lockdep_is_held(&rt->rt6i_table->tb6_lock));
962 } 984 }
963 /* For each sibling in the list, increment the counter of 985 /* For each sibling in the list, increment the counter of
964 * siblings. BUG() if counters does not match, list of siblings 986 * siblings. BUG() if counters does not match, list of siblings
@@ -987,12 +1009,12 @@ add:
987 if (err) 1009 if (err)
988 return err; 1010 return err;
989 1011
990 rt->dst.rt6_next = iter; 1012 rcu_assign_pointer(rt->dst.rt6_next, iter);
991 *ins = rt;
992 rcu_assign_pointer(rt->rt6i_node, fn);
993 atomic_inc(&rt->rt6i_ref); 1013 atomic_inc(&rt->rt6i_ref);
1014 rcu_assign_pointer(rt->rt6i_node, fn);
1015 rcu_assign_pointer(*ins, rt);
994 call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD, 1016 call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
995 rt); 1017 rt, extack);
996 if (!info->skip_notify) 1018 if (!info->skip_notify)
997 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); 1019 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
998 info->nl_net->ipv6.rt6_stats->fib_rt_entries++; 1020 info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@ -1016,12 +1038,12 @@ add:
1016 if (err) 1038 if (err)
1017 return err; 1039 return err;
1018 1040
1019 *ins = rt; 1041 atomic_inc(&rt->rt6i_ref);
1020 rcu_assign_pointer(rt->rt6i_node, fn); 1042 rcu_assign_pointer(rt->rt6i_node, fn);
1021 rt->dst.rt6_next = iter->dst.rt6_next; 1043 rt->dst.rt6_next = iter->dst.rt6_next;
1022 atomic_inc(&rt->rt6i_ref); 1044 rcu_assign_pointer(*ins, rt);
1023 call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE, 1045 call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
1024 rt); 1046 rt, extack);
1025 if (!info->skip_notify) 1047 if (!info->skip_notify)
1026 inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); 1048 inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
1027 if (!(fn->fn_flags & RTN_RTINFO)) { 1049 if (!(fn->fn_flags & RTN_RTINFO)) {
@@ -1031,14 +1053,15 @@ add:
1031 nsiblings = iter->rt6i_nsiblings; 1053 nsiblings = iter->rt6i_nsiblings;
1032 iter->rt6i_node = NULL; 1054 iter->rt6i_node = NULL;
1033 fib6_purge_rt(iter, fn, info->nl_net); 1055 fib6_purge_rt(iter, fn, info->nl_net);
1034 if (fn->rr_ptr == iter) 1056 if (rcu_access_pointer(fn->rr_ptr) == iter)
1035 fn->rr_ptr = NULL; 1057 fn->rr_ptr = NULL;
1036 rt6_release(iter); 1058 rt6_release(iter);
1037 1059
1038 if (nsiblings) { 1060 if (nsiblings) {
1039 /* Replacing an ECMP route, remove all siblings */ 1061 /* Replacing an ECMP route, remove all siblings */
1040 ins = &rt->dst.rt6_next; 1062 ins = &rt->dst.rt6_next;
1041 iter = *ins; 1063 iter = rcu_dereference_protected(*ins,
1064 lockdep_is_held(&rt->rt6i_table->tb6_lock));
1042 while (iter) { 1065 while (iter) {
1043 if (iter->rt6i_metric > rt->rt6i_metric) 1066 if (iter->rt6i_metric > rt->rt6i_metric)
1044 break; 1067 break;
@@ -1046,14 +1069,16 @@ add:
1046 *ins = iter->dst.rt6_next; 1069 *ins = iter->dst.rt6_next;
1047 iter->rt6i_node = NULL; 1070 iter->rt6i_node = NULL;
1048 fib6_purge_rt(iter, fn, info->nl_net); 1071 fib6_purge_rt(iter, fn, info->nl_net);
1049 if (fn->rr_ptr == iter) 1072 if (rcu_access_pointer(fn->rr_ptr) == iter)
1050 fn->rr_ptr = NULL; 1073 fn->rr_ptr = NULL;
1051 rt6_release(iter); 1074 rt6_release(iter);
1052 nsiblings--; 1075 nsiblings--;
1076 info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
1053 } else { 1077 } else {
1054 ins = &iter->dst.rt6_next; 1078 ins = &iter->dst.rt6_next;
1055 } 1079 }
1056 iter = *ins; 1080 iter = rcu_dereference_protected(*ins,
1081 lockdep_is_held(&rt->rt6i_table->tb6_lock));
1057 } 1082 }
1058 WARN_ON(nsiblings != 0); 1083 WARN_ON(nsiblings != 0);
1059 } 1084 }
@@ -1077,16 +1102,33 @@ void fib6_force_start_gc(struct net *net)
1077 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); 1102 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
1078} 1103}
1079 1104
1105static void fib6_update_sernum_upto_root(struct rt6_info *rt,
1106 int sernum)
1107{
1108 struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
1109 lockdep_is_held(&rt->rt6i_table->tb6_lock));
1110
1111 /* paired with smp_rmb() in rt6_get_cookie_safe() */
1112 smp_wmb();
1113 while (fn) {
1114 fn->fn_sernum = sernum;
1115 fn = rcu_dereference_protected(fn->parent,
1116 lockdep_is_held(&rt->rt6i_table->tb6_lock));
1117 }
1118}
1119
1080/* 1120/*
1081 * Add routing information to the routing tree. 1121 * Add routing information to the routing tree.
1082 * <destination addr>/<source addr> 1122 * <destination addr>/<source addr>
1083 * with source addr info in sub-trees 1123 * with source addr info in sub-trees
1124 * Need to own table->tb6_lock
1084 */ 1125 */
1085 1126
1086int fib6_add(struct fib6_node *root, struct rt6_info *rt, 1127int fib6_add(struct fib6_node *root, struct rt6_info *rt,
1087 struct nl_info *info, struct mx6_config *mxc, 1128 struct nl_info *info, struct mx6_config *mxc,
1088 struct netlink_ext_ack *extack) 1129 struct netlink_ext_ack *extack)
1089{ 1130{
1131 struct fib6_table *table = rt->rt6i_table;
1090 struct fib6_node *fn, *pn = NULL; 1132 struct fib6_node *fn, *pn = NULL;
1091 int err = -ENOMEM; 1133 int err = -ENOMEM;
1092 int allow_create = 1; 1134 int allow_create = 1;
@@ -1095,6 +1137,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
1095 1137
1096 if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt))) 1138 if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
1097 return -EINVAL; 1139 return -EINVAL;
1140 if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE))
1141 return -EINVAL;
1098 1142
1099 if (info->nlh) { 1143 if (info->nlh) {
1100 if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) 1144 if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
@@ -1105,9 +1149,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
1105 if (!allow_create && !replace_required) 1149 if (!allow_create && !replace_required)
1106 pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n"); 1150 pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
1107 1151
1108 fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen, 1152 fn = fib6_add_1(info->nl_net, table, root,
1153 &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
1109 offsetof(struct rt6_info, rt6i_dst), allow_create, 1154 offsetof(struct rt6_info, rt6i_dst), allow_create,
1110 replace_required, sernum, extack); 1155 replace_required, extack);
1111 if (IS_ERR(fn)) { 1156 if (IS_ERR(fn)) {
1112 err = PTR_ERR(fn); 1157 err = PTR_ERR(fn);
1113 fn = NULL; 1158 fn = NULL;
@@ -1120,7 +1165,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
1120 if (rt->rt6i_src.plen) { 1165 if (rt->rt6i_src.plen) {
1121 struct fib6_node *sn; 1166 struct fib6_node *sn;
1122 1167
1123 if (!fn->subtree) { 1168 if (!rcu_access_pointer(fn->subtree)) {
1124 struct fib6_node *sfn; 1169 struct fib6_node *sfn;
1125 1170
1126 /* 1171 /*
@@ -1134,42 +1179,40 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
1134 */ 1179 */
1135 1180
1136 /* Create subtree root node */ 1181 /* Create subtree root node */
1137 sfn = node_alloc(); 1182 sfn = node_alloc(info->nl_net);
1138 if (!sfn) 1183 if (!sfn)
1139 goto failure; 1184 goto failure;
1140 1185
1141 sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
1142 atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); 1186 atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
1187 rcu_assign_pointer(sfn->leaf,
1188 info->nl_net->ipv6.ip6_null_entry);
1143 sfn->fn_flags = RTN_ROOT; 1189 sfn->fn_flags = RTN_ROOT;
1144 sfn->fn_sernum = sernum;
1145 1190
1146 /* Now add the first leaf node to new subtree */ 1191 /* Now add the first leaf node to new subtree */
1147 1192
1148 sn = fib6_add_1(sfn, &rt->rt6i_src.addr, 1193 sn = fib6_add_1(info->nl_net, table, sfn,
1149 rt->rt6i_src.plen, 1194 &rt->rt6i_src.addr, rt->rt6i_src.plen,
1150 offsetof(struct rt6_info, rt6i_src), 1195 offsetof(struct rt6_info, rt6i_src),
1151 allow_create, replace_required, sernum, 1196 allow_create, replace_required, extack);
1152 extack);
1153 1197
1154 if (IS_ERR(sn)) { 1198 if (IS_ERR(sn)) {
1155 /* If it is failed, discard just allocated 1199 /* If it is failed, discard just allocated
1156 root, and then (in failure) stale node 1200 root, and then (in failure) stale node
1157 in main tree. 1201 in main tree.
1158 */ 1202 */
1159 node_free_immediate(sfn); 1203 node_free_immediate(info->nl_net, sfn);
1160 err = PTR_ERR(sn); 1204 err = PTR_ERR(sn);
1161 goto failure; 1205 goto failure;
1162 } 1206 }
1163 1207
1164 /* Now link new subtree to main tree */ 1208 /* Now link new subtree to main tree */
1165 sfn->parent = fn; 1209 rcu_assign_pointer(sfn->parent, fn);
1166 fn->subtree = sfn; 1210 rcu_assign_pointer(fn->subtree, sfn);
1167 } else { 1211 } else {
1168 sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, 1212 sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn),
1169 rt->rt6i_src.plen, 1213 &rt->rt6i_src.addr, rt->rt6i_src.plen,
1170 offsetof(struct rt6_info, rt6i_src), 1214 offsetof(struct rt6_info, rt6i_src),
1171 allow_create, replace_required, sernum, 1215 allow_create, replace_required, extack);
1172 extack);
1173 1216
1174 if (IS_ERR(sn)) { 1217 if (IS_ERR(sn)) {
1175 err = PTR_ERR(sn); 1218 err = PTR_ERR(sn);
@@ -1177,19 +1220,18 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
1177 } 1220 }
1178 } 1221 }
1179 1222
1180 if (!fn->leaf) { 1223 if (!rcu_access_pointer(fn->leaf)) {
1181 fn->leaf = rt;
1182 atomic_inc(&rt->rt6i_ref); 1224 atomic_inc(&rt->rt6i_ref);
1225 rcu_assign_pointer(fn->leaf, rt);
1183 } 1226 }
1184 fn = sn; 1227 fn = sn;
1185 } 1228 }
1186#endif 1229#endif
1187 1230
1188 err = fib6_add_rt2node(fn, rt, info, mxc); 1231 err = fib6_add_rt2node(fn, rt, info, mxc, extack);
1189 if (!err) { 1232 if (!err) {
1233 fib6_update_sernum_upto_root(rt, sernum);
1190 fib6_start_gc(info->nl_net, rt); 1234 fib6_start_gc(info->nl_net, rt);
1191 if (!(rt->rt6i_flags & RTF_CACHE))
1192 fib6_prune_clones(info->nl_net, pn);
1193 } 1235 }
1194 1236
1195out: 1237out:
@@ -1199,19 +1241,23 @@ out:
1199 * If fib6_add_1 has cleared the old leaf pointer in the 1241 * If fib6_add_1 has cleared the old leaf pointer in the
1200 * super-tree leaf node we have to find a new one for it. 1242 * super-tree leaf node we have to find a new one for it.
1201 */ 1243 */
1202 if (pn != fn && pn->leaf == rt) { 1244 struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
1203 pn->leaf = NULL; 1245 lockdep_is_held(&table->tb6_lock));
1246 if (pn != fn && pn_leaf == rt) {
1247 pn_leaf = NULL;
1248 RCU_INIT_POINTER(pn->leaf, NULL);
1204 atomic_dec(&rt->rt6i_ref); 1249 atomic_dec(&rt->rt6i_ref);
1205 } 1250 }
1206 if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) { 1251 if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
1207 pn->leaf = fib6_find_prefix(info->nl_net, pn); 1252 pn_leaf = fib6_find_prefix(info->nl_net, table, pn);
1208#if RT6_DEBUG >= 2 1253#if RT6_DEBUG >= 2
1209 if (!pn->leaf) { 1254 if (!pn_leaf) {
1210 WARN_ON(pn->leaf == NULL); 1255 WARN_ON(!pn_leaf);
1211 pn->leaf = info->nl_net->ipv6.ip6_null_entry; 1256 pn_leaf = info->nl_net->ipv6.ip6_null_entry;
1212 } 1257 }
1213#endif 1258#endif
1214 atomic_inc(&pn->leaf->rt6i_ref); 1259 atomic_inc(&pn_leaf->rt6i_ref);
1260 rcu_assign_pointer(pn->leaf, pn_leaf);
1215 } 1261 }
1216#endif 1262#endif
1217 goto failure; 1263 goto failure;
@@ -1226,7 +1272,7 @@ failure:
1226 * fn->leaf. 1272 * fn->leaf.
1227 */ 1273 */
1228 if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) 1274 if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
1229 fib6_repair_tree(info->nl_net, fn); 1275 fib6_repair_tree(info->nl_net, table, fn);
1230 /* Always release dst as dst->__refcnt is guaranteed 1276 /* Always release dst as dst->__refcnt is guaranteed
1231 * to be taken before entering this function 1277 * to be taken before entering this function
1232 */ 1278 */
@@ -1264,7 +1310,8 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
1264 1310
1265 dir = addr_bit_set(args->addr, fn->fn_bit); 1311 dir = addr_bit_set(args->addr, fn->fn_bit);
1266 1312
1267 next = dir ? fn->right : fn->left; 1313 next = dir ? rcu_dereference(fn->right) :
1314 rcu_dereference(fn->left);
1268 1315
1269 if (next) { 1316 if (next) {
1270 fn = next; 1317 fn = next;
@@ -1274,18 +1321,22 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
1274 } 1321 }
1275 1322
1276 while (fn) { 1323 while (fn) {
1277 if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { 1324 struct fib6_node *subtree = FIB6_SUBTREE(fn);
1325
1326 if (subtree || fn->fn_flags & RTN_RTINFO) {
1327 struct rt6_info *leaf = rcu_dereference(fn->leaf);
1278 struct rt6key *key; 1328 struct rt6key *key;
1279 1329
1280 key = (struct rt6key *) ((u8 *) fn->leaf + 1330 if (!leaf)
1281 args->offset); 1331 goto backtrack;
1332
1333 key = (struct rt6key *) ((u8 *)leaf + args->offset);
1282 1334
1283 if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { 1335 if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
1284#ifdef CONFIG_IPV6_SUBTREES 1336#ifdef CONFIG_IPV6_SUBTREES
1285 if (fn->subtree) { 1337 if (subtree) {
1286 struct fib6_node *sfn; 1338 struct fib6_node *sfn;
1287 sfn = fib6_lookup_1(fn->subtree, 1339 sfn = fib6_lookup_1(subtree, args + 1);
1288 args + 1);
1289 if (!sfn) 1340 if (!sfn)
1290 goto backtrack; 1341 goto backtrack;
1291 fn = sfn; 1342 fn = sfn;
@@ -1295,18 +1346,18 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
1295 return fn; 1346 return fn;
1296 } 1347 }
1297 } 1348 }
1298#ifdef CONFIG_IPV6_SUBTREES
1299backtrack: 1349backtrack:
1300#endif
1301 if (fn->fn_flags & RTN_ROOT) 1350 if (fn->fn_flags & RTN_ROOT)
1302 break; 1351 break;
1303 1352
1304 fn = fn->parent; 1353 fn = rcu_dereference(fn->parent);
1305 } 1354 }
1306 1355
1307 return NULL; 1356 return NULL;
1308} 1357}
1309 1358
1359/* called with rcu_read_lock() held
1360 */
1310struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr, 1361struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
1311 const struct in6_addr *saddr) 1362 const struct in6_addr *saddr)
1312{ 1363{
@@ -1337,54 +1388,87 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *dad
1337/* 1388/*
1338 * Get node with specified destination prefix (and source prefix, 1389 * Get node with specified destination prefix (and source prefix,
1339 * if subtrees are used) 1390 * if subtrees are used)
1391 * exact_match == true means we try to find fn with exact match of
1392 * the passed in prefix addr
1393 * exact_match == false means we try to find fn with longest prefix
1394 * match of the passed in prefix addr. This is useful for finding fn
1395 * for cached route as it will be stored in the exception table under
1396 * the node with longest prefix length.
1340 */ 1397 */
1341 1398
1342 1399
1343static struct fib6_node *fib6_locate_1(struct fib6_node *root, 1400static struct fib6_node *fib6_locate_1(struct fib6_node *root,
1344 const struct in6_addr *addr, 1401 const struct in6_addr *addr,
1345 int plen, int offset) 1402 int plen, int offset,
1403 bool exact_match)
1346{ 1404{
1347 struct fib6_node *fn; 1405 struct fib6_node *fn, *prev = NULL;
1348 1406
1349 for (fn = root; fn ; ) { 1407 for (fn = root; fn ; ) {
1350 struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset); 1408 struct rt6_info *leaf = rcu_dereference(fn->leaf);
1409 struct rt6key *key;
1410
1411 /* This node is being deleted */
1412 if (!leaf) {
1413 if (plen <= fn->fn_bit)
1414 goto out;
1415 else
1416 goto next;
1417 }
1418
1419 key = (struct rt6key *)((u8 *)leaf + offset);
1351 1420
1352 /* 1421 /*
1353 * Prefix match 1422 * Prefix match
1354 */ 1423 */
1355 if (plen < fn->fn_bit || 1424 if (plen < fn->fn_bit ||
1356 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) 1425 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
1357 return NULL; 1426 goto out;
1358 1427
1359 if (plen == fn->fn_bit) 1428 if (plen == fn->fn_bit)
1360 return fn; 1429 return fn;
1361 1430
1431 prev = fn;
1432
1433next:
1362 /* 1434 /*
1363 * We have more bits to go 1435 * We have more bits to go
1364 */ 1436 */
1365 if (addr_bit_set(addr, fn->fn_bit)) 1437 if (addr_bit_set(addr, fn->fn_bit))
1366 fn = fn->right; 1438 fn = rcu_dereference(fn->right);
1367 else 1439 else
1368 fn = fn->left; 1440 fn = rcu_dereference(fn->left);
1369 } 1441 }
1370 return NULL; 1442out:
1443 if (exact_match)
1444 return NULL;
1445 else
1446 return prev;
1371} 1447}
1372 1448
1373struct fib6_node *fib6_locate(struct fib6_node *root, 1449struct fib6_node *fib6_locate(struct fib6_node *root,
1374 const struct in6_addr *daddr, int dst_len, 1450 const struct in6_addr *daddr, int dst_len,
1375 const struct in6_addr *saddr, int src_len) 1451 const struct in6_addr *saddr, int src_len,
1452 bool exact_match)
1376{ 1453{
1377 struct fib6_node *fn; 1454 struct fib6_node *fn;
1378 1455
1379 fn = fib6_locate_1(root, daddr, dst_len, 1456 fn = fib6_locate_1(root, daddr, dst_len,
1380 offsetof(struct rt6_info, rt6i_dst)); 1457 offsetof(struct rt6_info, rt6i_dst),
1458 exact_match);
1381 1459
1382#ifdef CONFIG_IPV6_SUBTREES 1460#ifdef CONFIG_IPV6_SUBTREES
1383 if (src_len) { 1461 if (src_len) {
1384 WARN_ON(saddr == NULL); 1462 WARN_ON(saddr == NULL);
1385 if (fn && fn->subtree) 1463 if (fn) {
1386 fn = fib6_locate_1(fn->subtree, saddr, src_len, 1464 struct fib6_node *subtree = FIB6_SUBTREE(fn);
1387 offsetof(struct rt6_info, rt6i_src)); 1465
1466 if (subtree) {
1467 fn = fib6_locate_1(subtree, saddr, src_len,
1468 offsetof(struct rt6_info, rt6i_src),
1469 exact_match);
1470 }
1471 }
1388 } 1472 }
1389#endif 1473#endif
1390 1474
@@ -1400,16 +1484,26 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
1400 * 1484 *
1401 */ 1485 */
1402 1486
1403static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn) 1487static struct rt6_info *fib6_find_prefix(struct net *net,
1488 struct fib6_table *table,
1489 struct fib6_node *fn)
1404{ 1490{
1491 struct fib6_node *child_left, *child_right;
1492
1405 if (fn->fn_flags & RTN_ROOT) 1493 if (fn->fn_flags & RTN_ROOT)
1406 return net->ipv6.ip6_null_entry; 1494 return net->ipv6.ip6_null_entry;
1407 1495
1408 while (fn) { 1496 while (fn) {
1409 if (fn->left) 1497 child_left = rcu_dereference_protected(fn->left,
1410 return fn->left->leaf; 1498 lockdep_is_held(&table->tb6_lock));
1411 if (fn->right) 1499 child_right = rcu_dereference_protected(fn->right,
1412 return fn->right->leaf; 1500 lockdep_is_held(&table->tb6_lock));
1501 if (child_left)
1502 return rcu_dereference_protected(child_left->leaf,
1503 lockdep_is_held(&table->tb6_lock));
1504 if (child_right)
1505 return rcu_dereference_protected(child_right->leaf,
1506 lockdep_is_held(&table->tb6_lock));
1413 1507
1414 fn = FIB6_SUBTREE(fn); 1508 fn = FIB6_SUBTREE(fn);
1415 } 1509 }
@@ -1419,31 +1513,49 @@ static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn)
1419/* 1513/*
1420 * Called to trim the tree of intermediate nodes when possible. "fn" 1514 * Called to trim the tree of intermediate nodes when possible. "fn"
1421 * is the node we want to try and remove. 1515 * is the node we want to try and remove.
1516 * Need to own table->tb6_lock
1422 */ 1517 */
1423 1518
1424static struct fib6_node *fib6_repair_tree(struct net *net, 1519static struct fib6_node *fib6_repair_tree(struct net *net,
1425 struct fib6_node *fn) 1520 struct fib6_table *table,
1521 struct fib6_node *fn)
1426{ 1522{
1427 int children; 1523 int children;
1428 int nstate; 1524 int nstate;
1429 struct fib6_node *child, *pn; 1525 struct fib6_node *child;
1430 struct fib6_walker *w; 1526 struct fib6_walker *w;
1431 int iter = 0; 1527 int iter = 0;
1432 1528
1433 for (;;) { 1529 for (;;) {
1530 struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
1531 lockdep_is_held(&table->tb6_lock));
1532 struct fib6_node *fn_l = rcu_dereference_protected(fn->left,
1533 lockdep_is_held(&table->tb6_lock));
1534 struct fib6_node *pn = rcu_dereference_protected(fn->parent,
1535 lockdep_is_held(&table->tb6_lock));
1536 struct fib6_node *pn_r = rcu_dereference_protected(pn->right,
1537 lockdep_is_held(&table->tb6_lock));
1538 struct fib6_node *pn_l = rcu_dereference_protected(pn->left,
1539 lockdep_is_held(&table->tb6_lock));
1540 struct rt6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
1541 lockdep_is_held(&table->tb6_lock));
1542 struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
1543 lockdep_is_held(&table->tb6_lock));
1544 struct rt6_info *new_fn_leaf;
1545
1434 RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); 1546 RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
1435 iter++; 1547 iter++;
1436 1548
1437 WARN_ON(fn->fn_flags & RTN_RTINFO); 1549 WARN_ON(fn->fn_flags & RTN_RTINFO);
1438 WARN_ON(fn->fn_flags & RTN_TL_ROOT); 1550 WARN_ON(fn->fn_flags & RTN_TL_ROOT);
1439 WARN_ON(fn->leaf); 1551 WARN_ON(fn_leaf);
1440 1552
1441 children = 0; 1553 children = 0;
1442 child = NULL; 1554 child = NULL;
1443 if (fn->right) 1555 if (fn_r)
1444 child = fn->right, children |= 1; 1556 child = fn_r, children |= 1;
1445 if (fn->left) 1557 if (fn_l)
1446 child = fn->left, children |= 2; 1558 child = fn_l, children |= 2;
1447 1559
1448 if (children == 3 || FIB6_SUBTREE(fn) 1560 if (children == 3 || FIB6_SUBTREE(fn)
1449#ifdef CONFIG_IPV6_SUBTREES 1561#ifdef CONFIG_IPV6_SUBTREES
@@ -1451,36 +1563,36 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
1451 || (children && fn->fn_flags & RTN_ROOT) 1563 || (children && fn->fn_flags & RTN_ROOT)
1452#endif 1564#endif
1453 ) { 1565 ) {
1454 fn->leaf = fib6_find_prefix(net, fn); 1566 new_fn_leaf = fib6_find_prefix(net, table, fn);
1455#if RT6_DEBUG >= 2 1567#if RT6_DEBUG >= 2
1456 if (!fn->leaf) { 1568 if (!new_fn_leaf) {
1457 WARN_ON(!fn->leaf); 1569 WARN_ON(!new_fn_leaf);
1458 fn->leaf = net->ipv6.ip6_null_entry; 1570 new_fn_leaf = net->ipv6.ip6_null_entry;
1459 } 1571 }
1460#endif 1572#endif
1461 atomic_inc(&fn->leaf->rt6i_ref); 1573 atomic_inc(&new_fn_leaf->rt6i_ref);
1462 return fn->parent; 1574 rcu_assign_pointer(fn->leaf, new_fn_leaf);
1575 return pn;
1463 } 1576 }
1464 1577
1465 pn = fn->parent;
1466#ifdef CONFIG_IPV6_SUBTREES 1578#ifdef CONFIG_IPV6_SUBTREES
1467 if (FIB6_SUBTREE(pn) == fn) { 1579 if (FIB6_SUBTREE(pn) == fn) {
1468 WARN_ON(!(fn->fn_flags & RTN_ROOT)); 1580 WARN_ON(!(fn->fn_flags & RTN_ROOT));
1469 FIB6_SUBTREE(pn) = NULL; 1581 RCU_INIT_POINTER(pn->subtree, NULL);
1470 nstate = FWS_L; 1582 nstate = FWS_L;
1471 } else { 1583 } else {
1472 WARN_ON(fn->fn_flags & RTN_ROOT); 1584 WARN_ON(fn->fn_flags & RTN_ROOT);
1473#endif 1585#endif
1474 if (pn->right == fn) 1586 if (pn_r == fn)
1475 pn->right = child; 1587 rcu_assign_pointer(pn->right, child);
1476 else if (pn->left == fn) 1588 else if (pn_l == fn)
1477 pn->left = child; 1589 rcu_assign_pointer(pn->left, child);
1478#if RT6_DEBUG >= 2 1590#if RT6_DEBUG >= 2
1479 else 1591 else
1480 WARN_ON(1); 1592 WARN_ON(1);
1481#endif 1593#endif
1482 if (child) 1594 if (child)
1483 child->parent = pn; 1595 rcu_assign_pointer(child->parent, pn);
1484 nstate = FWS_R; 1596 nstate = FWS_R;
1485#ifdef CONFIG_IPV6_SUBTREES 1597#ifdef CONFIG_IPV6_SUBTREES
1486 } 1598 }
@@ -1489,19 +1601,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
1489 read_lock(&net->ipv6.fib6_walker_lock); 1601 read_lock(&net->ipv6.fib6_walker_lock);
1490 FOR_WALKERS(net, w) { 1602 FOR_WALKERS(net, w) {
1491 if (!child) { 1603 if (!child) {
1492 if (w->root == fn) { 1604 if (w->node == fn) {
1493 w->root = w->node = NULL;
1494 RT6_TRACE("W %p adjusted by delroot 1\n", w);
1495 } else if (w->node == fn) {
1496 RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate); 1605 RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
1497 w->node = pn; 1606 w->node = pn;
1498 w->state = nstate; 1607 w->state = nstate;
1499 } 1608 }
1500 } else { 1609 } else {
1501 if (w->root == fn) {
1502 w->root = child;
1503 RT6_TRACE("W %p adjusted by delroot 2\n", w);
1504 }
1505 if (w->node == fn) { 1610 if (w->node == fn) {
1506 w->node = child; 1611 w->node = child;
1507 if (children&2) { 1612 if (children&2) {
@@ -1516,33 +1621,39 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
1516 } 1621 }
1517 read_unlock(&net->ipv6.fib6_walker_lock); 1622 read_unlock(&net->ipv6.fib6_walker_lock);
1518 1623
1519 node_free(fn); 1624 node_free(net, fn);
1520 if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn)) 1625 if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
1521 return pn; 1626 return pn;
1522 1627
1523 rt6_release(pn->leaf); 1628 RCU_INIT_POINTER(pn->leaf, NULL);
1524 pn->leaf = NULL; 1629 rt6_release(pn_leaf);
1525 fn = pn; 1630 fn = pn;
1526 } 1631 }
1527} 1632}
1528 1633
1529static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, 1634static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
1530 struct nl_info *info) 1635 struct rt6_info __rcu **rtp, struct nl_info *info)
1531{ 1636{
1532 struct fib6_walker *w; 1637 struct fib6_walker *w;
1533 struct rt6_info *rt = *rtp; 1638 struct rt6_info *rt = rcu_dereference_protected(*rtp,
1639 lockdep_is_held(&table->tb6_lock));
1534 struct net *net = info->nl_net; 1640 struct net *net = info->nl_net;
1535 1641
1536 RT6_TRACE("fib6_del_route\n"); 1642 RT6_TRACE("fib6_del_route\n");
1537 1643
1644 WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
1645
1538 /* Unlink it */ 1646 /* Unlink it */
1539 *rtp = rt->dst.rt6_next; 1647 *rtp = rt->dst.rt6_next;
1540 rt->rt6i_node = NULL; 1648 rt->rt6i_node = NULL;
1541 net->ipv6.rt6_stats->fib_rt_entries--; 1649 net->ipv6.rt6_stats->fib_rt_entries--;
1542 net->ipv6.rt6_stats->fib_discarded_routes++; 1650 net->ipv6.rt6_stats->fib_discarded_routes++;
1543 1651
1652 /* Flush all cached dst in exception table */
1653 rt6_flush_exceptions(rt);
1654
1544 /* Reset round-robin state, if necessary */ 1655 /* Reset round-robin state, if necessary */
1545 if (fn->rr_ptr == rt) 1656 if (rcu_access_pointer(fn->rr_ptr) == rt)
1546 fn->rr_ptr = NULL; 1657 fn->rr_ptr = NULL;
1547 1658
1548 /* Remove this entry from other siblings */ 1659 /* Remove this entry from other siblings */
@@ -1561,36 +1672,38 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
1561 FOR_WALKERS(net, w) { 1672 FOR_WALKERS(net, w) {
1562 if (w->state == FWS_C && w->leaf == rt) { 1673 if (w->state == FWS_C && w->leaf == rt) {
1563 RT6_TRACE("walker %p adjusted by delroute\n", w); 1674 RT6_TRACE("walker %p adjusted by delroute\n", w);
1564 w->leaf = rt->dst.rt6_next; 1675 w->leaf = rcu_dereference_protected(rt->dst.rt6_next,
1676 lockdep_is_held(&table->tb6_lock));
1565 if (!w->leaf) 1677 if (!w->leaf)
1566 w->state = FWS_U; 1678 w->state = FWS_U;
1567 } 1679 }
1568 } 1680 }
1569 read_unlock(&net->ipv6.fib6_walker_lock); 1681 read_unlock(&net->ipv6.fib6_walker_lock);
1570 1682
1571 rt->dst.rt6_next = NULL;
1572
1573 /* If it was last route, expunge its radix tree node */ 1683 /* If it was last route, expunge its radix tree node */
1574 if (!fn->leaf) { 1684 if (!rcu_access_pointer(fn->leaf)) {
1575 fn->fn_flags &= ~RTN_RTINFO; 1685 fn->fn_flags &= ~RTN_RTINFO;
1576 net->ipv6.rt6_stats->fib_route_nodes--; 1686 net->ipv6.rt6_stats->fib_route_nodes--;
1577 fn = fib6_repair_tree(net, fn); 1687 fn = fib6_repair_tree(net, table, fn);
1578 } 1688 }
1579 1689
1580 fib6_purge_rt(rt, fn, net); 1690 fib6_purge_rt(rt, fn, net);
1581 1691
1582 call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt); 1692 call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
1583 if (!info->skip_notify) 1693 if (!info->skip_notify)
1584 inet6_rt_notify(RTM_DELROUTE, rt, info, 0); 1694 inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
1585 rt6_release(rt); 1695 rt6_release(rt);
1586} 1696}
1587 1697
1698/* Need to own table->tb6_lock */
1588int fib6_del(struct rt6_info *rt, struct nl_info *info) 1699int fib6_del(struct rt6_info *rt, struct nl_info *info)
1589{ 1700{
1590 struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, 1701 struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
1591 lockdep_is_held(&rt->rt6i_table->tb6_lock)); 1702 lockdep_is_held(&rt->rt6i_table->tb6_lock));
1703 struct fib6_table *table = rt->rt6i_table;
1592 struct net *net = info->nl_net; 1704 struct net *net = info->nl_net;
1593 struct rt6_info **rtp; 1705 struct rt6_info __rcu **rtp;
1706 struct rt6_info __rcu **rtp_next;
1594 1707
1595#if RT6_DEBUG >= 2 1708#if RT6_DEBUG >= 2
1596 if (rt->dst.obsolete > 0) { 1709 if (rt->dst.obsolete > 0) {
@@ -1603,28 +1716,22 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
1603 1716
1604 WARN_ON(!(fn->fn_flags & RTN_RTINFO)); 1717 WARN_ON(!(fn->fn_flags & RTN_RTINFO));
1605 1718
1606 if (!(rt->rt6i_flags & RTF_CACHE)) { 1719 /* remove cached dst from exception table */
1607 struct fib6_node *pn = fn; 1720 if (rt->rt6i_flags & RTF_CACHE)
1608#ifdef CONFIG_IPV6_SUBTREES 1721 return rt6_remove_exception_rt(rt);
1609 /* clones of this route might be in another subtree */
1610 if (rt->rt6i_src.plen) {
1611 while (!(pn->fn_flags & RTN_ROOT))
1612 pn = pn->parent;
1613 pn = pn->parent;
1614 }
1615#endif
1616 fib6_prune_clones(info->nl_net, pn);
1617 }
1618 1722
1619 /* 1723 /*
1620 * Walk the leaf entries looking for ourself 1724 * Walk the leaf entries looking for ourself
1621 */ 1725 */
1622 1726
1623 for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) { 1727 for (rtp = &fn->leaf; *rtp; rtp = rtp_next) {
1624 if (*rtp == rt) { 1728 struct rt6_info *cur = rcu_dereference_protected(*rtp,
1625 fib6_del_route(fn, rtp, info); 1729 lockdep_is_held(&table->tb6_lock));
1730 if (rt == cur) {
1731 fib6_del_route(table, fn, rtp, info);
1626 return 0; 1732 return 0;
1627 } 1733 }
1734 rtp_next = &cur->dst.rt6_next;
1628 } 1735 }
1629 return -ENOENT; 1736 return -ENOENT;
1630} 1737}
@@ -1651,22 +1758,22 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
1651 * 0 -> walk is complete. 1758 * 0 -> walk is complete.
1652 * >0 -> walk is incomplete (i.e. suspended) 1759 * >0 -> walk is incomplete (i.e. suspended)
1653 * <0 -> walk is terminated by an error. 1760 * <0 -> walk is terminated by an error.
1761 *
1762 * This function is called with tb6_lock held.
1654 */ 1763 */
1655 1764
1656static int fib6_walk_continue(struct fib6_walker *w) 1765static int fib6_walk_continue(struct fib6_walker *w)
1657{ 1766{
1658 struct fib6_node *fn, *pn; 1767 struct fib6_node *fn, *pn, *left, *right;
1768
1769 /* w->root should always be table->tb6_root */
1770 WARN_ON_ONCE(!(w->root->fn_flags & RTN_TL_ROOT));
1659 1771
1660 for (;;) { 1772 for (;;) {
1661 fn = w->node; 1773 fn = w->node;
1662 if (!fn) 1774 if (!fn)
1663 return 0; 1775 return 0;
1664 1776
1665 if (w->prune && fn != w->root &&
1666 fn->fn_flags & RTN_RTINFO && w->state < FWS_C) {
1667 w->state = FWS_C;
1668 w->leaf = fn->leaf;
1669 }
1670 switch (w->state) { 1777 switch (w->state) {
1671#ifdef CONFIG_IPV6_SUBTREES 1778#ifdef CONFIG_IPV6_SUBTREES
1672 case FWS_S: 1779 case FWS_S:
@@ -1676,21 +1783,26 @@ static int fib6_walk_continue(struct fib6_walker *w)
1676 } 1783 }
1677 w->state = FWS_L; 1784 w->state = FWS_L;
1678#endif 1785#endif
1786 /* fall through */
1679 case FWS_L: 1787 case FWS_L:
1680 if (fn->left) { 1788 left = rcu_dereference_protected(fn->left, 1);
1681 w->node = fn->left; 1789 if (left) {
1790 w->node = left;
1682 w->state = FWS_INIT; 1791 w->state = FWS_INIT;
1683 continue; 1792 continue;
1684 } 1793 }
1685 w->state = FWS_R; 1794 w->state = FWS_R;
1795 /* fall through */
1686 case FWS_R: 1796 case FWS_R:
1687 if (fn->right) { 1797 right = rcu_dereference_protected(fn->right, 1);
1688 w->node = fn->right; 1798 if (right) {
1799 w->node = right;
1689 w->state = FWS_INIT; 1800 w->state = FWS_INIT;
1690 continue; 1801 continue;
1691 } 1802 }
1692 w->state = FWS_C; 1803 w->state = FWS_C;
1693 w->leaf = fn->leaf; 1804 w->leaf = rcu_dereference_protected(fn->leaf, 1);
1805 /* fall through */
1694 case FWS_C: 1806 case FWS_C:
1695 if (w->leaf && fn->fn_flags & RTN_RTINFO) { 1807 if (w->leaf && fn->fn_flags & RTN_RTINFO) {
1696 int err; 1808 int err;
@@ -1709,10 +1821,13 @@ static int fib6_walk_continue(struct fib6_walker *w)
1709 } 1821 }
1710skip: 1822skip:
1711 w->state = FWS_U; 1823 w->state = FWS_U;
1824 /* fall through */
1712 case FWS_U: 1825 case FWS_U:
1713 if (fn == w->root) 1826 if (fn == w->root)
1714 return 0; 1827 return 0;
1715 pn = fn->parent; 1828 pn = rcu_dereference_protected(fn->parent, 1);
1829 left = rcu_dereference_protected(pn->left, 1);
1830 right = rcu_dereference_protected(pn->right, 1);
1716 w->node = pn; 1831 w->node = pn;
1717#ifdef CONFIG_IPV6_SUBTREES 1832#ifdef CONFIG_IPV6_SUBTREES
1718 if (FIB6_SUBTREE(pn) == fn) { 1833 if (FIB6_SUBTREE(pn) == fn) {
@@ -1721,13 +1836,13 @@ skip:
1721 continue; 1836 continue;
1722 } 1837 }
1723#endif 1838#endif
1724 if (pn->left == fn) { 1839 if (left == fn) {
1725 w->state = FWS_R; 1840 w->state = FWS_R;
1726 continue; 1841 continue;
1727 } 1842 }
1728 if (pn->right == fn) { 1843 if (right == fn) {
1729 w->state = FWS_C; 1844 w->state = FWS_C;
1730 w->leaf = w->node->leaf; 1845 w->leaf = rcu_dereference_protected(w->node->leaf, 1);
1731 continue; 1846 continue;
1732 } 1847 }
1733#if RT6_DEBUG >= 2 1848#if RT6_DEBUG >= 2
@@ -1770,7 +1885,7 @@ static int fib6_clean_node(struct fib6_walker *w)
1770 return 0; 1885 return 0;
1771 } 1886 }
1772 1887
1773 for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { 1888 for_each_fib6_walker_rt(w) {
1774 res = c->func(rt, c->arg); 1889 res = c->func(rt, c->arg);
1775 if (res < 0) { 1890 if (res < 0) {
1776 w->leaf = rt; 1891 w->leaf = rt;
@@ -1798,20 +1913,16 @@ static int fib6_clean_node(struct fib6_walker *w)
1798 * func is called on each route. 1913 * func is called on each route.
1799 * It may return -1 -> delete this route. 1914 * It may return -1 -> delete this route.
1800 * 0 -> continue walking 1915 * 0 -> continue walking
1801 *
1802 * prune==1 -> only immediate children of node (certainly,
1803 * ignoring pure split nodes) will be scanned.
1804 */ 1916 */
1805 1917
1806static void fib6_clean_tree(struct net *net, struct fib6_node *root, 1918static void fib6_clean_tree(struct net *net, struct fib6_node *root,
1807 int (*func)(struct rt6_info *, void *arg), 1919 int (*func)(struct rt6_info *, void *arg),
1808 bool prune, int sernum, void *arg) 1920 int sernum, void *arg)
1809{ 1921{
1810 struct fib6_cleaner c; 1922 struct fib6_cleaner c;
1811 1923
1812 c.w.root = root; 1924 c.w.root = root;
1813 c.w.func = fib6_clean_node; 1925 c.w.func = fib6_clean_node;
1814 c.w.prune = prune;
1815 c.w.count = 0; 1926 c.w.count = 0;
1816 c.w.skip = 0; 1927 c.w.skip = 0;
1817 c.func = func; 1928 c.func = func;
@@ -1834,10 +1945,10 @@ static void __fib6_clean_all(struct net *net,
1834 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { 1945 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
1835 head = &net->ipv6.fib_table_hash[h]; 1946 head = &net->ipv6.fib_table_hash[h];
1836 hlist_for_each_entry_rcu(table, head, tb6_hlist) { 1947 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
1837 write_lock_bh(&table->tb6_lock); 1948 spin_lock_bh(&table->tb6_lock);
1838 fib6_clean_tree(net, &table->tb6_root, 1949 fib6_clean_tree(net, &table->tb6_root,
1839 func, false, sernum, arg); 1950 func, sernum, arg);
1840 write_unlock_bh(&table->tb6_lock); 1951 spin_unlock_bh(&table->tb6_lock);
1841 } 1952 }
1842 } 1953 }
1843 rcu_read_unlock(); 1954 rcu_read_unlock();
@@ -1849,22 +1960,6 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *),
1849 __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg); 1960 __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg);
1850} 1961}
1851 1962
1852static int fib6_prune_clone(struct rt6_info *rt, void *arg)
1853{
1854 if (rt->rt6i_flags & RTF_CACHE) {
1855 RT6_TRACE("pruning clone %p\n", rt);
1856 return -1;
1857 }
1858
1859 return 0;
1860}
1861
1862static void fib6_prune_clones(struct net *net, struct fib6_node *fn)
1863{
1864 fib6_clean_tree(net, fn, fib6_prune_clone, true,
1865 FIB6_NO_SERNUM_CHANGE, NULL);
1866}
1867
1868static void fib6_flush_trees(struct net *net) 1963static void fib6_flush_trees(struct net *net)
1869{ 1964{
1870 int new_sernum = fib6_new_sernum(net); 1965 int new_sernum = fib6_new_sernum(net);
@@ -1876,12 +1971,6 @@ static void fib6_flush_trees(struct net *net)
1876 * Garbage collection 1971 * Garbage collection
1877 */ 1972 */
1878 1973
1879struct fib6_gc_args
1880{
1881 int timeout;
1882 int more;
1883};
1884
1885static int fib6_age(struct rt6_info *rt, void *arg) 1974static int fib6_age(struct rt6_info *rt, void *arg)
1886{ 1975{
1887 struct fib6_gc_args *gc_args = arg; 1976 struct fib6_gc_args *gc_args = arg;
@@ -1890,9 +1979,6 @@ static int fib6_age(struct rt6_info *rt, void *arg)
1890 /* 1979 /*
1891 * check addrconf expiration here. 1980 * check addrconf expiration here.
1892 * Routes are expired even if they are in use. 1981 * Routes are expired even if they are in use.
1893 *
1894 * Also age clones. Note, that clones are aged out
1895 * only if they are not in use now.
1896 */ 1982 */
1897 1983
1898 if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) { 1984 if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) {
@@ -1901,31 +1987,14 @@ static int fib6_age(struct rt6_info *rt, void *arg)
1901 return -1; 1987 return -1;
1902 } 1988 }
1903 gc_args->more++; 1989 gc_args->more++;
1904 } else if (rt->rt6i_flags & RTF_CACHE) {
1905 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout))
1906 rt->dst.obsolete = DST_OBSOLETE_KILL;
1907 if (atomic_read(&rt->dst.__refcnt) == 1 &&
1908 rt->dst.obsolete == DST_OBSOLETE_KILL) {
1909 RT6_TRACE("aging clone %p\n", rt);
1910 return -1;
1911 } else if (rt->rt6i_flags & RTF_GATEWAY) {
1912 struct neighbour *neigh;
1913 __u8 neigh_flags = 0;
1914
1915 neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
1916 if (neigh) {
1917 neigh_flags = neigh->flags;
1918 neigh_release(neigh);
1919 }
1920 if (!(neigh_flags & NTF_ROUTER)) {
1921 RT6_TRACE("purging route %p via non-router but gateway\n",
1922 rt);
1923 return -1;
1924 }
1925 }
1926 gc_args->more++;
1927 } 1990 }
1928 1991
1992 /* Also age clones in the exception table.
1993 * Note, that clones are aged out
1994 * only if they are not in use now.
1995 */
1996 rt6_age_exceptions(rt, gc_args, now);
1997
1929 return 0; 1998 return 0;
1930} 1999}
1931 2000
@@ -1957,9 +2026,11 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
1957 spin_unlock_bh(&net->ipv6.fib6_gc_lock); 2026 spin_unlock_bh(&net->ipv6.fib6_gc_lock);
1958} 2027}
1959 2028
1960static void fib6_gc_timer_cb(unsigned long arg) 2029static void fib6_gc_timer_cb(struct timer_list *t)
1961{ 2030{
1962 fib6_run_gc(0, (struct net *)arg, true); 2031 struct net *arg = from_timer(arg, t, ipv6.ip6_fib_timer);
2032
2033 fib6_run_gc(0, arg, true);
1963} 2034}
1964 2035
1965static int __net_init fib6_net_init(struct net *net) 2036static int __net_init fib6_net_init(struct net *net)
@@ -1974,7 +2045,7 @@ static int __net_init fib6_net_init(struct net *net)
1974 spin_lock_init(&net->ipv6.fib6_gc_lock); 2045 spin_lock_init(&net->ipv6.fib6_gc_lock);
1975 rwlock_init(&net->ipv6.fib6_walker_lock); 2046 rwlock_init(&net->ipv6.fib6_walker_lock);
1976 INIT_LIST_HEAD(&net->ipv6.fib6_walkers); 2047 INIT_LIST_HEAD(&net->ipv6.fib6_walkers);
1977 setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net); 2048 timer_setup(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, 0);
1978 2049
1979 net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); 2050 net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
1980 if (!net->ipv6.rt6_stats) 2051 if (!net->ipv6.rt6_stats)
@@ -1993,7 +2064,8 @@ static int __net_init fib6_net_init(struct net *net)
1993 goto out_fib_table_hash; 2064 goto out_fib_table_hash;
1994 2065
1995 net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN; 2066 net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
1996 net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; 2067 rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf,
2068 net->ipv6.ip6_null_entry);
1997 net->ipv6.fib6_main_tbl->tb6_root.fn_flags = 2069 net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
1998 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; 2070 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
1999 inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers); 2071 inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
@@ -2004,7 +2076,8 @@ static int __net_init fib6_net_init(struct net *net)
2004 if (!net->ipv6.fib6_local_tbl) 2076 if (!net->ipv6.fib6_local_tbl)
2005 goto out_fib6_main_tbl; 2077 goto out_fib6_main_tbl;
2006 net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL; 2078 net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
2007 net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; 2079 rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf,
2080 net->ipv6.ip6_null_entry);
2008 net->ipv6.fib6_local_tbl->tb6_root.fn_flags = 2081 net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
2009 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; 2082 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
2010 inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers); 2083 inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
@@ -2134,7 +2207,9 @@ static int ipv6_route_yield(struct fib6_walker *w)
2134 return 1; 2207 return 1;
2135 2208
2136 do { 2209 do {
2137 iter->w.leaf = iter->w.leaf->dst.rt6_next; 2210 iter->w.leaf = rcu_dereference_protected(
2211 iter->w.leaf->dst.rt6_next,
2212 lockdep_is_held(&iter->tbl->tb6_lock));
2138 iter->skip--; 2213 iter->skip--;
2139 if (!iter->skip && iter->w.leaf) 2214 if (!iter->skip && iter->w.leaf)
2140 return 1; 2215 return 1;
@@ -2199,7 +2274,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2199 if (!v) 2274 if (!v)
2200 goto iter_table; 2275 goto iter_table;
2201 2276
2202 n = ((struct rt6_info *)v)->dst.rt6_next; 2277 n = rcu_dereference_bh(((struct rt6_info *)v)->dst.rt6_next);
2203 if (n) { 2278 if (n) {
2204 ++*pos; 2279 ++*pos;
2205 return n; 2280 return n;
@@ -2207,9 +2282,9 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2207 2282
2208iter_table: 2283iter_table:
2209 ipv6_route_check_sernum(iter); 2284 ipv6_route_check_sernum(iter);
2210 read_lock(&iter->tbl->tb6_lock); 2285 spin_lock_bh(&iter->tbl->tb6_lock);
2211 r = fib6_walk_continue(&iter->w); 2286 r = fib6_walk_continue(&iter->w);
2212 read_unlock(&iter->tbl->tb6_lock); 2287 spin_unlock_bh(&iter->tbl->tb6_lock);
2213 if (r > 0) { 2288 if (r > 0) {
2214 if (v) 2289 if (v)
2215 ++*pos; 2290 ++*pos;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 8081bafe441b..7f59c8fabeeb 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -46,8 +46,8 @@
46static atomic_t fl_size = ATOMIC_INIT(0); 46static atomic_t fl_size = ATOMIC_INIT(0);
47static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1]; 47static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
48 48
49static void ip6_fl_gc(unsigned long dummy); 49static void ip6_fl_gc(struct timer_list *unused);
50static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0); 50static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc);
51 51
52/* FL hash table lock: it protects only of GC */ 52/* FL hash table lock: it protects only of GC */
53 53
@@ -127,7 +127,7 @@ static void fl_release(struct ip6_flowlabel *fl)
127 spin_unlock_bh(&ip6_fl_lock); 127 spin_unlock_bh(&ip6_fl_lock);
128} 128}
129 129
130static void ip6_fl_gc(unsigned long dummy) 130static void ip6_fl_gc(struct timer_list *unused)
131{ 131{
132 int i; 132 int i;
133 unsigned long now = jiffies; 133 unsigned long now = jiffies;
@@ -315,6 +315,7 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
315 } 315 }
316 opt_space->dst1opt = fopt->dst1opt; 316 opt_space->dst1opt = fopt->dst1opt;
317 opt_space->opt_flen = fopt->opt_flen; 317 opt_space->opt_flen = fopt->opt_flen;
318 opt_space->tot_len = fopt->tot_len;
318 return opt_space; 319 return opt_space;
319} 320}
320EXPORT_SYMBOL_GPL(fl6_merge_options); 321EXPORT_SYMBOL_GPL(fl6_merge_options);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index b7a72d409334..4cfd8e0696fe 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -369,6 +369,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
369static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 369static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
370 u8 type, u8 code, int offset, __be32 info) 370 u8 type, u8 code, int offset, __be32 info)
371{ 371{
372 struct net *net = dev_net(skb->dev);
372 const struct gre_base_hdr *greh; 373 const struct gre_base_hdr *greh;
373 const struct ipv6hdr *ipv6h; 374 const struct ipv6hdr *ipv6h;
374 int grehlen = sizeof(*greh); 375 int grehlen = sizeof(*greh);
@@ -402,19 +403,21 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
402 return; 403 return;
403 404
404 switch (type) { 405 switch (type) {
405 __u32 teli;
406 struct ipv6_tlv_tnl_enc_lim *tel; 406 struct ipv6_tlv_tnl_enc_lim *tel;
407 __u32 mtu; 407 __u32 teli;
408 case ICMPV6_DEST_UNREACH: 408 case ICMPV6_DEST_UNREACH:
409 net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", 409 net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
410 t->parms.name); 410 t->parms.name);
411 break; 411 if (code != ICMPV6_PORT_UNREACH)
412 break;
413 return;
412 case ICMPV6_TIME_EXCEED: 414 case ICMPV6_TIME_EXCEED:
413 if (code == ICMPV6_EXC_HOPLIMIT) { 415 if (code == ICMPV6_EXC_HOPLIMIT) {
414 net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", 416 net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
415 t->parms.name); 417 t->parms.name);
418 break;
416 } 419 }
417 break; 420 return;
418 case ICMPV6_PARAMPROB: 421 case ICMPV6_PARAMPROB:
419 teli = 0; 422 teli = 0;
420 if (code == ICMPV6_HDR_FIELD) 423 if (code == ICMPV6_HDR_FIELD)
@@ -430,15 +433,14 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
430 net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n", 433 net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
431 t->parms.name); 434 t->parms.name);
432 } 435 }
433 break; 436 return;
434 case ICMPV6_PKT_TOOBIG: 437 case ICMPV6_PKT_TOOBIG:
435 mtu = be32_to_cpu(info) - offset - t->tun_hlen; 438 ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
436 if (t->dev->type == ARPHRD_ETHER) 439 return;
437 mtu -= ETH_HLEN; 440 case NDISC_REDIRECT:
438 if (mtu < IPV6_MIN_MTU) 441 ip6_redirect(skb, net, skb->dev->ifindex, 0,
439 mtu = IPV6_MIN_MTU; 442 sock_net_uid(net, NULL));
440 t->dev->mtu = mtu; 443 return;
441 break;
442 } 444 }
443 445
444 if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO)) 446 if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
@@ -458,7 +460,7 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
458 &ipv6h->saddr, &ipv6h->daddr, tpi->key, 460 &ipv6h->saddr, &ipv6h->daddr, tpi->key,
459 tpi->proto); 461 tpi->proto);
460 if (tunnel) { 462 if (tunnel) {
461 ip6_tnl_rcv(tunnel, skb, tpi, NULL, false); 463 ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
462 464
463 return PACKET_RCVD; 465 return PACKET_RCVD;
464 } 466 }
@@ -500,8 +502,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
500 __u32 *pmtu, __be16 proto) 502 __u32 *pmtu, __be16 proto)
501{ 503{
502 struct ip6_tnl *tunnel = netdev_priv(dev); 504 struct ip6_tnl *tunnel = netdev_priv(dev);
503 __be16 protocol = (dev->type == ARPHRD_ETHER) ? 505 __be16 protocol;
504 htons(ETH_P_TEB) : proto;
505 506
506 if (dev->type == ARPHRD_ETHER) 507 if (dev->type == ARPHRD_ETHER)
507 IPCB(skb)->flags = 0; 508 IPCB(skb)->flags = 0;
@@ -515,6 +516,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
515 tunnel->o_seqno++; 516 tunnel->o_seqno++;
516 517
517 /* Push GRE header. */ 518 /* Push GRE header. */
519 protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
518 gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, 520 gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
519 protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); 521 protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
520 522
@@ -940,24 +942,25 @@ done:
940} 942}
941 943
942static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, 944static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
943 unsigned short type, 945 unsigned short type, const void *daddr,
944 const void *daddr, const void *saddr, unsigned int len) 946 const void *saddr, unsigned int len)
945{ 947{
946 struct ip6_tnl *t = netdev_priv(dev); 948 struct ip6_tnl *t = netdev_priv(dev);
947 struct ipv6hdr *ipv6h = skb_push(skb, t->hlen); 949 struct ipv6hdr *ipv6h;
948 __be16 *p = (__be16 *)(ipv6h+1); 950 __be16 *p;
949 951
950 ip6_flow_hdr(ipv6h, 0, 952 ipv6h = skb_push(skb, t->hlen + sizeof(*ipv6h));
951 ip6_make_flowlabel(dev_net(dev), skb, 953 ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb,
952 t->fl.u.ip6.flowlabel, true, 954 t->fl.u.ip6.flowlabel,
953 &t->fl.u.ip6)); 955 true, &t->fl.u.ip6));
954 ipv6h->hop_limit = t->parms.hop_limit; 956 ipv6h->hop_limit = t->parms.hop_limit;
955 ipv6h->nexthdr = NEXTHDR_GRE; 957 ipv6h->nexthdr = NEXTHDR_GRE;
956 ipv6h->saddr = t->parms.laddr; 958 ipv6h->saddr = t->parms.laddr;
957 ipv6h->daddr = t->parms.raddr; 959 ipv6h->daddr = t->parms.raddr;
958 960
959 p[0] = t->parms.o_flags; 961 p = (__be16 *)(ipv6h + 1);
960 p[1] = htons(type); 962 p[0] = t->parms.o_flags;
963 p[1] = htons(type);
961 964
962 /* 965 /*
963 * Set the source hardware address. 966 * Set the source hardware address.
@@ -1155,19 +1158,21 @@ err_alloc_dev:
1155 return err; 1158 return err;
1156} 1159}
1157 1160
1158static void __net_exit ip6gre_exit_net(struct net *net) 1161static void __net_exit ip6gre_exit_batch_net(struct list_head *net_list)
1159{ 1162{
1163 struct net *net;
1160 LIST_HEAD(list); 1164 LIST_HEAD(list);
1161 1165
1162 rtnl_lock(); 1166 rtnl_lock();
1163 ip6gre_destroy_tunnels(net, &list); 1167 list_for_each_entry(net, net_list, exit_list)
1168 ip6gre_destroy_tunnels(net, &list);
1164 unregister_netdevice_many(&list); 1169 unregister_netdevice_many(&list);
1165 rtnl_unlock(); 1170 rtnl_unlock();
1166} 1171}
1167 1172
1168static struct pernet_operations ip6gre_net_ops = { 1173static struct pernet_operations ip6gre_net_ops = {
1169 .init = ip6gre_init_net, 1174 .init = ip6gre_init_net,
1170 .exit = ip6gre_exit_net, 1175 .exit_batch = ip6gre_exit_batch_net,
1171 .id = &ip6gre_net_id, 1176 .id = &ip6gre_net_id,
1172 .size = sizeof(struct ip6gre_net), 1177 .size = sizeof(struct ip6gre_net),
1173}; 1178};
@@ -1310,6 +1315,7 @@ static void ip6gre_tap_setup(struct net_device *dev)
1310 dev->features |= NETIF_F_NETNS_LOCAL; 1315 dev->features |= NETIF_F_NETNS_LOCAL;
1311 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1316 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1312 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1317 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1318 netif_keep_dst(dev);
1313} 1319}
1314 1320
1315static bool ip6gre_netlink_encap_parms(struct nlattr *data[], 1321static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index 713676f14a0e..02045494c24c 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/export.h> 2#include <linux/export.h>
2#include <linux/icmpv6.h> 3#include <linux/icmpv6.h>
3#include <linux/mutex.h> 4#include <linux/mutex.h>
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index cdb3728faca7..4a87f9428ca5 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -105,7 +105,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
105 105
106 for (skb = segs; skb; skb = skb->next) { 106 for (skb = segs; skb; skb = skb->next) {
107 ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); 107 ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
108 if (gso_partial) 108 if (gso_partial && skb_is_gso(skb))
109 payload_len = skb_shinfo(skb)->gso_size + 109 payload_len = skb_shinfo(skb)->gso_size +
110 SKB_GSO_CB(skb)->data_offset + 110 SKB_GSO_CB(skb)->data_offset +
111 skb->head - (unsigned char *)(ipv6h + 1); 111 skb->head - (unsigned char *)(ipv6h + 1);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 43ca864327c7..5110a418cc4d 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1161,11 +1161,11 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1161 if (WARN_ON(v6_cork->opt)) 1161 if (WARN_ON(v6_cork->opt))
1162 return -EINVAL; 1162 return -EINVAL;
1163 1163
1164 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation); 1164 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1165 if (unlikely(!v6_cork->opt)) 1165 if (unlikely(!v6_cork->opt))
1166 return -ENOBUFS; 1166 return -ENOBUFS;
1167 1167
1168 v6_cork->opt->tot_len = opt->tot_len; 1168 v6_cork->opt->tot_len = sizeof(*opt);
1169 v6_cork->opt->opt_flen = opt->opt_flen; 1169 v6_cork->opt->opt_flen = opt->opt_flen;
1170 v6_cork->opt->opt_nflen = opt->opt_nflen; 1170 v6_cork->opt->opt_nflen = opt->opt_nflen;
1171 1171
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index ae73164559d5..3d3092adf1d2 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -471,15 +471,16 @@ static int
471ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, 471ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
472 u8 *type, u8 *code, int *msg, __u32 *info, int offset) 472 u8 *type, u8 *code, int *msg, __u32 *info, int offset)
473{ 473{
474 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data; 474 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
475 struct ip6_tnl *t; 475 struct net *net = dev_net(skb->dev);
476 int rel_msg = 0;
477 u8 rel_type = ICMPV6_DEST_UNREACH; 476 u8 rel_type = ICMPV6_DEST_UNREACH;
478 u8 rel_code = ICMPV6_ADDR_UNREACH; 477 u8 rel_code = ICMPV6_ADDR_UNREACH;
479 u8 tproto;
480 __u32 rel_info = 0; 478 __u32 rel_info = 0;
481 __u16 len; 479 struct ip6_tnl *t;
482 int err = -ENOENT; 480 int err = -ENOENT;
481 int rel_msg = 0;
482 u8 tproto;
483 __u16 len;
483 484
484 /* If the packet doesn't contain the original IPv6 header we are 485 /* If the packet doesn't contain the original IPv6 header we are
485 in trouble since we might need the source address for further 486 in trouble since we might need the source address for further
@@ -490,16 +491,15 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
490 if (!t) 491 if (!t)
491 goto out; 492 goto out;
492 493
493 tproto = ACCESS_ONCE(t->parms.proto); 494 tproto = READ_ONCE(t->parms.proto);
494 if (tproto != ipproto && tproto != 0) 495 if (tproto != ipproto && tproto != 0)
495 goto out; 496 goto out;
496 497
497 err = 0; 498 err = 0;
498 499
499 switch (*type) { 500 switch (*type) {
500 __u32 teli;
501 struct ipv6_tlv_tnl_enc_lim *tel; 501 struct ipv6_tlv_tnl_enc_lim *tel;
502 __u32 mtu; 502 __u32 mtu, teli;
503 case ICMPV6_DEST_UNREACH: 503 case ICMPV6_DEST_UNREACH:
504 net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", 504 net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
505 t->parms.name); 505 t->parms.name);
@@ -530,11 +530,11 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
530 } 530 }
531 break; 531 break;
532 case ICMPV6_PKT_TOOBIG: 532 case ICMPV6_PKT_TOOBIG:
533 ip6_update_pmtu(skb, net, htonl(*info), 0, 0,
534 sock_net_uid(net, NULL));
533 mtu = *info - offset; 535 mtu = *info - offset;
534 if (mtu < IPV6_MIN_MTU) 536 if (mtu < IPV6_MIN_MTU)
535 mtu = IPV6_MIN_MTU; 537 mtu = IPV6_MIN_MTU;
536 t->dev->mtu = mtu;
537
538 len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len); 538 len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len);
539 if (len > mtu) { 539 if (len > mtu) {
540 rel_type = ICMPV6_PKT_TOOBIG; 540 rel_type = ICMPV6_PKT_TOOBIG;
@@ -543,6 +543,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
543 rel_msg = 1; 543 rel_msg = 1;
544 } 544 }
545 break; 545 break;
546 case NDISC_REDIRECT:
547 ip6_redirect(skb, net, skb->dev->ifindex, 0,
548 sock_net_uid(net, NULL));
549 break;
546 } 550 }
547 551
548 *type = rel_type; 552 *type = rel_type;
@@ -559,13 +563,12 @@ static int
559ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 563ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
560 u8 type, u8 code, int offset, __be32 info) 564 u8 type, u8 code, int offset, __be32 info)
561{ 565{
562 int rel_msg = 0;
563 u8 rel_type = type;
564 u8 rel_code = code;
565 __u32 rel_info = ntohl(info); 566 __u32 rel_info = ntohl(info);
566 int err;
567 struct sk_buff *skb2;
568 const struct iphdr *eiph; 567 const struct iphdr *eiph;
568 struct sk_buff *skb2;
569 int err, rel_msg = 0;
570 u8 rel_type = type;
571 u8 rel_code = code;
569 struct rtable *rt; 572 struct rtable *rt;
570 struct flowi4 fl4; 573 struct flowi4 fl4;
571 574
@@ -590,9 +593,6 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
590 rel_type = ICMP_DEST_UNREACH; 593 rel_type = ICMP_DEST_UNREACH;
591 rel_code = ICMP_FRAG_NEEDED; 594 rel_code = ICMP_FRAG_NEEDED;
592 break; 595 break;
593 case NDISC_REDIRECT:
594 rel_type = ICMP_REDIRECT;
595 rel_code = ICMP_REDIR_HOST;
596 default: 596 default:
597 return 0; 597 return 0;
598 } 598 }
@@ -611,33 +611,26 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
611 eiph = ip_hdr(skb2); 611 eiph = ip_hdr(skb2);
612 612
613 /* Try to guess incoming interface */ 613 /* Try to guess incoming interface */
614 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, 614 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
615 eiph->saddr, 0, 615 0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
616 0, 0,
617 IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
618 if (IS_ERR(rt)) 616 if (IS_ERR(rt))
619 goto out; 617 goto out;
620 618
621 skb2->dev = rt->dst.dev; 619 skb2->dev = rt->dst.dev;
620 ip_rt_put(rt);
622 621
623 /* route "incoming" packet */ 622 /* route "incoming" packet */
624 if (rt->rt_flags & RTCF_LOCAL) { 623 if (rt->rt_flags & RTCF_LOCAL) {
625 ip_rt_put(rt);
626 rt = NULL;
627 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, 624 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
628 eiph->daddr, eiph->saddr, 625 eiph->daddr, eiph->saddr, 0, 0,
629 0, 0, 626 IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
630 IPPROTO_IPIP, 627 if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) {
631 RT_TOS(eiph->tos), 0);
632 if (IS_ERR(rt) ||
633 rt->dst.dev->type != ARPHRD_TUNNEL) {
634 if (!IS_ERR(rt)) 628 if (!IS_ERR(rt))
635 ip_rt_put(rt); 629 ip_rt_put(rt);
636 goto out; 630 goto out;
637 } 631 }
638 skb_dst_set(skb2, &rt->dst); 632 skb_dst_set(skb2, &rt->dst);
639 } else { 633 } else {
640 ip_rt_put(rt);
641 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, 634 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
642 skb2->dev) || 635 skb2->dev) ||
643 skb_dst(skb2)->dev->type != ARPHRD_TUNNEL) 636 skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
@@ -649,10 +642,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
649 if (rel_info > dst_mtu(skb_dst(skb2))) 642 if (rel_info > dst_mtu(skb_dst(skb2)))
650 goto out; 643 goto out;
651 644
652 skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info); 645 skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2,
646 rel_info);
653 } 647 }
654 if (rel_type == ICMP_REDIRECT)
655 skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
656 648
657 icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); 649 icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
658 650
@@ -665,11 +657,10 @@ static int
665ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 657ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
666 u8 type, u8 code, int offset, __be32 info) 658 u8 type, u8 code, int offset, __be32 info)
667{ 659{
668 int rel_msg = 0; 660 __u32 rel_info = ntohl(info);
661 int err, rel_msg = 0;
669 u8 rel_type = type; 662 u8 rel_type = type;
670 u8 rel_code = code; 663 u8 rel_code = code;
671 __u32 rel_info = ntohl(info);
672 int err;
673 664
674 err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code, 665 err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
675 &rel_msg, &rel_info, offset); 666 &rel_msg, &rel_info, offset);
@@ -769,7 +760,8 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
769 760
770 if ((ipv6_addr_is_multicast(laddr) || 761 if ((ipv6_addr_is_multicast(laddr) ||
771 likely(ipv6_chk_addr(net, laddr, ldev, 0))) && 762 likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
772 likely(!ipv6_chk_addr(net, raddr, NULL, 0))) 763 ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
764 likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
773 ret = 1; 765 ret = 1;
774 } 766 }
775 return ret; 767 return ret;
@@ -899,7 +891,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
899 t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); 891 t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
900 892
901 if (t) { 893 if (t) {
902 u8 tproto = ACCESS_ONCE(t->parms.proto); 894 u8 tproto = READ_ONCE(t->parms.proto);
903 895
904 if (tproto != ipproto && tproto != 0) 896 if (tproto != ipproto && tproto != 0)
905 goto drop; 897 goto drop;
@@ -999,7 +991,8 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
999 if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0))) 991 if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
1000 pr_warn("%s xmit: Local address not yet configured!\n", 992 pr_warn("%s xmit: Local address not yet configured!\n",
1001 p->name); 993 p->name);
1002 else if (!ipv6_addr_is_multicast(raddr) && 994 else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
995 !ipv6_addr_is_multicast(raddr) &&
1003 unlikely(ipv6_chk_addr(net, raddr, NULL, 0))) 996 unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
1004 pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", 997 pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
1005 p->name); 998 p->name);
@@ -1043,6 +1036,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
1043 struct dst_entry *dst = NULL, *ndst = NULL; 1036 struct dst_entry *dst = NULL, *ndst = NULL;
1044 struct net_device *tdev; 1037 struct net_device *tdev;
1045 int mtu; 1038 int mtu;
1039 unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0;
1046 unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; 1040 unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen;
1047 unsigned int max_headroom = psh_hlen; 1041 unsigned int max_headroom = psh_hlen;
1048 bool use_cache = false; 1042 bool use_cache = false;
@@ -1124,7 +1118,7 @@ route_lookup:
1124 t->parms.name); 1118 t->parms.name);
1125 goto tx_err_dst_release; 1119 goto tx_err_dst_release;
1126 } 1120 }
1127 mtu = dst_mtu(dst) - psh_hlen - t->tun_hlen; 1121 mtu = dst_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen;
1128 if (encap_limit >= 0) { 1122 if (encap_limit >= 0) {
1129 max_headroom += 8; 1123 max_headroom += 8;
1130 mtu -= 8; 1124 mtu -= 8;
@@ -1133,7 +1127,7 @@ route_lookup:
1133 mtu = IPV6_MIN_MTU; 1127 mtu = IPV6_MIN_MTU;
1134 if (skb_dst(skb) && !t->parms.collect_md) 1128 if (skb_dst(skb) && !t->parms.collect_md)
1135 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 1129 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
1136 if (skb->len - t->tun_hlen > mtu && !skb_is_gso(skb)) { 1130 if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
1137 *pmtu = mtu; 1131 *pmtu = mtu;
1138 err = -EMSGSIZE; 1132 err = -EMSGSIZE;
1139 goto tx_err_dst_release; 1133 goto tx_err_dst_release;
@@ -1232,7 +1226,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1232 1226
1233 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1227 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1234 1228
1235 tproto = ACCESS_ONCE(t->parms.proto); 1229 tproto = READ_ONCE(t->parms.proto);
1236 if (tproto != IPPROTO_IPIP && tproto != 0) 1230 if (tproto != IPPROTO_IPIP && tproto != 0)
1237 return -1; 1231 return -1;
1238 1232
@@ -1302,7 +1296,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1302 u8 tproto; 1296 u8 tproto;
1303 int err; 1297 int err;
1304 1298
1305 tproto = ACCESS_ONCE(t->parms.proto); 1299 tproto = READ_ONCE(t->parms.proto);
1306 if ((tproto != IPPROTO_IPV6 && tproto != 0) || 1300 if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
1307 ip6_tnl_addr_conflict(t, ipv6h)) 1301 ip6_tnl_addr_conflict(t, ipv6h))
1308 return -1; 1302 return -1;
@@ -2167,17 +2161,16 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
2167 .priority = 1, 2161 .priority = 1,
2168}; 2162};
2169 2163
2170static void __net_exit ip6_tnl_destroy_tunnels(struct net *net) 2164static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list)
2171{ 2165{
2172 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 2166 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
2173 struct net_device *dev, *aux; 2167 struct net_device *dev, *aux;
2174 int h; 2168 int h;
2175 struct ip6_tnl *t; 2169 struct ip6_tnl *t;
2176 LIST_HEAD(list);
2177 2170
2178 for_each_netdev_safe(net, dev, aux) 2171 for_each_netdev_safe(net, dev, aux)
2179 if (dev->rtnl_link_ops == &ip6_link_ops) 2172 if (dev->rtnl_link_ops == &ip6_link_ops)
2180 unregister_netdevice_queue(dev, &list); 2173 unregister_netdevice_queue(dev, list);
2181 2174
2182 for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) { 2175 for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
2183 t = rtnl_dereference(ip6n->tnls_r_l[h]); 2176 t = rtnl_dereference(ip6n->tnls_r_l[h]);
@@ -2186,12 +2179,10 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
2186 * been added to the list by the previous loop. 2179 * been added to the list by the previous loop.
2187 */ 2180 */
2188 if (!net_eq(dev_net(t->dev), net)) 2181 if (!net_eq(dev_net(t->dev), net))
2189 unregister_netdevice_queue(t->dev, &list); 2182 unregister_netdevice_queue(t->dev, list);
2190 t = rtnl_dereference(t->next); 2183 t = rtnl_dereference(t->next);
2191 } 2184 }
2192 } 2185 }
2193
2194 unregister_netdevice_many(&list);
2195} 2186}
2196 2187
2197static int __net_init ip6_tnl_init_net(struct net *net) 2188static int __net_init ip6_tnl_init_net(struct net *net)
@@ -2235,16 +2226,21 @@ err_alloc_dev:
2235 return err; 2226 return err;
2236} 2227}
2237 2228
2238static void __net_exit ip6_tnl_exit_net(struct net *net) 2229static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list)
2239{ 2230{
2231 struct net *net;
2232 LIST_HEAD(list);
2233
2240 rtnl_lock(); 2234 rtnl_lock();
2241 ip6_tnl_destroy_tunnels(net); 2235 list_for_each_entry(net, net_list, exit_list)
2236 ip6_tnl_destroy_tunnels(net, &list);
2237 unregister_netdevice_many(&list);
2242 rtnl_unlock(); 2238 rtnl_unlock();
2243} 2239}
2244 2240
2245static struct pernet_operations ip6_tnl_net_ops = { 2241static struct pernet_operations ip6_tnl_net_ops = {
2246 .init = ip6_tnl_init_net, 2242 .init = ip6_tnl_init_net,
2247 .exit = ip6_tnl_exit_net, 2243 .exit_batch = ip6_tnl_exit_batch_net,
2248 .id = &ip6_tnl_net_id, 2244 .id = &ip6_tnl_net_id,
2249 .size = sizeof(struct ip6_tnl_net), 2245 .size = sizeof(struct ip6_tnl_net),
2250}; 2246};
@@ -2259,6 +2255,9 @@ static int __init ip6_tunnel_init(void)
2259{ 2255{
2260 int err; 2256 int err;
2261 2257
2258 if (!ipv6_mod_enabled())
2259 return -EOPNOTSUPP;
2260
2262 err = register_pernet_device(&ip6_tnl_net_ops); 2261 err = register_pernet_device(&ip6_tnl_net_ops);
2263 if (err < 0) 2262 if (err < 0)
2264 goto out_pernet; 2263 goto out_pernet;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 79444a4bfd6d..dbb74f3c57a7 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -445,6 +445,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
445 struct dst_entry *dst = skb_dst(skb); 445 struct dst_entry *dst = skb_dst(skb);
446 struct net_device *tdev; 446 struct net_device *tdev;
447 struct xfrm_state *x; 447 struct xfrm_state *x;
448 int pkt_len = skb->len;
448 int err = -1; 449 int err = -1;
449 int mtu; 450 int mtu;
450 451
@@ -502,7 +503,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
502 struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); 503 struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
503 504
504 u64_stats_update_begin(&tstats->syncp); 505 u64_stats_update_begin(&tstats->syncp);
505 tstats->tx_bytes += skb->len; 506 tstats->tx_bytes += pkt_len;
506 tstats->tx_packets++; 507 tstats->tx_packets++;
507 u64_stats_update_end(&tstats->syncp); 508 u64_stats_update_end(&tstats->syncp);
508 } else { 509 } else {
@@ -1052,23 +1053,22 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = {
1052 .get_link_net = ip6_tnl_get_link_net, 1053 .get_link_net = ip6_tnl_get_link_net,
1053}; 1054};
1054 1055
1055static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n) 1056static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n,
1057 struct list_head *list)
1056{ 1058{
1057 int h; 1059 int h;
1058 struct ip6_tnl *t; 1060 struct ip6_tnl *t;
1059 LIST_HEAD(list);
1060 1061
1061 for (h = 0; h < IP6_VTI_HASH_SIZE; h++) { 1062 for (h = 0; h < IP6_VTI_HASH_SIZE; h++) {
1062 t = rtnl_dereference(ip6n->tnls_r_l[h]); 1063 t = rtnl_dereference(ip6n->tnls_r_l[h]);
1063 while (t) { 1064 while (t) {
1064 unregister_netdevice_queue(t->dev, &list); 1065 unregister_netdevice_queue(t->dev, list);
1065 t = rtnl_dereference(t->next); 1066 t = rtnl_dereference(t->next);
1066 } 1067 }
1067 } 1068 }
1068 1069
1069 t = rtnl_dereference(ip6n->tnls_wc[0]); 1070 t = rtnl_dereference(ip6n->tnls_wc[0]);
1070 unregister_netdevice_queue(t->dev, &list); 1071 unregister_netdevice_queue(t->dev, list);
1071 unregister_netdevice_many(&list);
1072} 1072}
1073 1073
1074static int __net_init vti6_init_net(struct net *net) 1074static int __net_init vti6_init_net(struct net *net)
@@ -1108,18 +1108,24 @@ err_alloc_dev:
1108 return err; 1108 return err;
1109} 1109}
1110 1110
1111static void __net_exit vti6_exit_net(struct net *net) 1111static void __net_exit vti6_exit_batch_net(struct list_head *net_list)
1112{ 1112{
1113 struct vti6_net *ip6n = net_generic(net, vti6_net_id); 1113 struct vti6_net *ip6n;
1114 struct net *net;
1115 LIST_HEAD(list);
1114 1116
1115 rtnl_lock(); 1117 rtnl_lock();
1116 vti6_destroy_tunnels(ip6n); 1118 list_for_each_entry(net, net_list, exit_list) {
1119 ip6n = net_generic(net, vti6_net_id);
1120 vti6_destroy_tunnels(ip6n, &list);
1121 }
1122 unregister_netdevice_many(&list);
1117 rtnl_unlock(); 1123 rtnl_unlock();
1118} 1124}
1119 1125
1120static struct pernet_operations vti6_net_ops = { 1126static struct pernet_operations vti6_net_ops = {
1121 .init = vti6_init_net, 1127 .init = vti6_init_net,
1122 .exit = vti6_exit_net, 1128 .exit_batch = vti6_exit_batch_net,
1123 .id = &vti6_net_id, 1129 .id = &vti6_net_id,
1124 .size = sizeof(struct vti6_net), 1130 .size = sizeof(struct vti6_net),
1125}; 1131};
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index f5500f5444e9..a2e1a864eb46 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -120,7 +120,7 @@ static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt);
120static int ip6mr_rtm_dumproute(struct sk_buff *skb, 120static int ip6mr_rtm_dumproute(struct sk_buff *skb,
121 struct netlink_callback *cb); 121 struct netlink_callback *cb);
122static void mroute_clean_tables(struct mr6_table *mrt, bool all); 122static void mroute_clean_tables(struct mr6_table *mrt, bool all);
123static void ipmr_expire_process(unsigned long arg); 123static void ipmr_expire_process(struct timer_list *t);
124 124
125#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 125#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
126#define ip6mr_for_each_table(mrt, net) \ 126#define ip6mr_for_each_table(mrt, net) \
@@ -320,8 +320,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
320 320
321 INIT_LIST_HEAD(&mrt->mfc6_unres_queue); 321 INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
322 322
323 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, 323 timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
324 (unsigned long)mrt);
325 324
326#ifdef CONFIG_IPV6_PIMSM_V2 325#ifdef CONFIG_IPV6_PIMSM_V2
327 mrt->mroute_reg_vif_num = -1; 326 mrt->mroute_reg_vif_num = -1;
@@ -888,9 +887,9 @@ static void ipmr_do_expire_process(struct mr6_table *mrt)
888 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 887 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
889} 888}
890 889
891static void ipmr_expire_process(unsigned long arg) 890static void ipmr_expire_process(struct timer_list *t)
892{ 891{
893 struct mr6_table *mrt = (struct mr6_table *)arg; 892 struct mr6_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
894 893
895 if (!spin_trylock(&mfc_unres_lock)) { 894 if (!spin_trylock(&mfc_unres_lock)) {
896 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); 895 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
@@ -1617,6 +1616,10 @@ int ip6mr_sk_done(struct sock *sk)
1617 struct net *net = sock_net(sk); 1616 struct net *net = sock_net(sk);
1618 struct mr6_table *mrt; 1617 struct mr6_table *mrt;
1619 1618
1619 if (sk->sk_type != SOCK_RAW ||
1620 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1621 return err;
1622
1620 rtnl_lock(); 1623 rtnl_lock();
1621 ip6mr_for_each_table(mrt, net) { 1624 ip6mr_for_each_table(mrt, net) {
1622 if (sk == mrt->mroute6_sk) { 1625 if (sk == mrt->mroute6_sk) {
@@ -1722,6 +1725,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
1722 case MRT6_ADD_MFC: 1725 case MRT6_ADD_MFC:
1723 case MRT6_DEL_MFC: 1726 case MRT6_DEL_MFC:
1724 parent = -1; 1727 parent = -1;
1728 /* fall through */
1725 case MRT6_ADD_MFC_PROXY: 1729 case MRT6_ADD_MFC_PROXY:
1726 case MRT6_DEL_MFC_PROXY: 1730 case MRT6_DEL_MFC_PROXY:
1727 if (optlen < sizeof(mfc)) 1731 if (optlen < sizeof(mfc))
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a5e466d4e093..b9404feabd78 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -377,6 +377,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
377 retv = 0; 377 retv = 0;
378 break; 378 break;
379 379
380 case IPV6_FREEBIND:
381 if (optlen < sizeof(int))
382 goto e_inval;
383 /* we also don't have a separate freebind bit for IPV6 */
384 inet_sk(sk)->freebind = valbool;
385 retv = 0;
386 break;
387
380 case IPV6_RECVORIGDSTADDR: 388 case IPV6_RECVORIGDSTADDR:
381 if (optlen < sizeof(int)) 389 if (optlen < sizeof(int))
382 goto e_inval; 390 goto e_inval;
@@ -1214,6 +1222,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
1214 val = inet_sk(sk)->transparent; 1222 val = inet_sk(sk)->transparent;
1215 break; 1223 break;
1216 1224
1225 case IPV6_FREEBIND:
1226 val = inet_sk(sk)->freebind;
1227 break;
1228
1217 case IPV6_RECVORIGDSTADDR: 1229 case IPV6_RECVORIGDSTADDR:
1218 val = np->rxopt.bits.rxorigdstaddr; 1230 val = np->rxopt.bits.rxorigdstaddr;
1219 break; 1231 break;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 12b7c27ce5ce..fc6d7d143f2c 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -75,10 +75,10 @@ static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;
75 75
76static void igmp6_join_group(struct ifmcaddr6 *ma); 76static void igmp6_join_group(struct ifmcaddr6 *ma);
77static void igmp6_leave_group(struct ifmcaddr6 *ma); 77static void igmp6_leave_group(struct ifmcaddr6 *ma);
78static void igmp6_timer_handler(unsigned long data); 78static void igmp6_timer_handler(struct timer_list *t);
79 79
80static void mld_gq_timer_expire(unsigned long data); 80static void mld_gq_timer_expire(struct timer_list *t);
81static void mld_ifc_timer_expire(unsigned long data); 81static void mld_ifc_timer_expire(struct timer_list *t);
82static void mld_ifc_event(struct inet6_dev *idev); 82static void mld_ifc_event(struct inet6_dev *idev);
83static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); 83static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
84static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); 84static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
@@ -839,7 +839,7 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
839 if (!mc) 839 if (!mc)
840 return NULL; 840 return NULL;
841 841
842 setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc); 842 timer_setup(&mc->mca_timer, igmp6_timer_handler, 0);
843 843
844 mc->mca_addr = *addr; 844 mc->mca_addr = *addr;
845 mc->idev = idev; /* reference taken by caller */ 845 mc->idev = idev; /* reference taken by caller */
@@ -2083,9 +2083,9 @@ void ipv6_mc_dad_complete(struct inet6_dev *idev)
2083 } 2083 }
2084} 2084}
2085 2085
2086static void mld_dad_timer_expire(unsigned long data) 2086static void mld_dad_timer_expire(struct timer_list *t)
2087{ 2087{
2088 struct inet6_dev *idev = (struct inet6_dev *)data; 2088 struct inet6_dev *idev = from_timer(idev, t, mc_dad_timer);
2089 2089
2090 mld_send_initial_cr(idev); 2090 mld_send_initial_cr(idev);
2091 if (idev->mc_dad_count) { 2091 if (idev->mc_dad_count) {
@@ -2432,18 +2432,18 @@ static void igmp6_leave_group(struct ifmcaddr6 *ma)
2432 } 2432 }
2433} 2433}
2434 2434
2435static void mld_gq_timer_expire(unsigned long data) 2435static void mld_gq_timer_expire(struct timer_list *t)
2436{ 2436{
2437 struct inet6_dev *idev = (struct inet6_dev *)data; 2437 struct inet6_dev *idev = from_timer(idev, t, mc_gq_timer);
2438 2438
2439 idev->mc_gq_running = 0; 2439 idev->mc_gq_running = 0;
2440 mld_send_report(idev, NULL); 2440 mld_send_report(idev, NULL);
2441 in6_dev_put(idev); 2441 in6_dev_put(idev);
2442} 2442}
2443 2443
2444static void mld_ifc_timer_expire(unsigned long data) 2444static void mld_ifc_timer_expire(struct timer_list *t)
2445{ 2445{
2446 struct inet6_dev *idev = (struct inet6_dev *)data; 2446 struct inet6_dev *idev = from_timer(idev, t, mc_ifc_timer);
2447 2447
2448 mld_send_cr(idev); 2448 mld_send_cr(idev);
2449 if (idev->mc_ifc_count) { 2449 if (idev->mc_ifc_count) {
@@ -2462,9 +2462,9 @@ static void mld_ifc_event(struct inet6_dev *idev)
2462 mld_ifc_start_timer(idev, 1); 2462 mld_ifc_start_timer(idev, 1);
2463} 2463}
2464 2464
2465static void igmp6_timer_handler(unsigned long data) 2465static void igmp6_timer_handler(struct timer_list *t)
2466{ 2466{
2467 struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data; 2467 struct ifmcaddr6 *ma = from_timer(ma, t, mca_timer);
2468 2468
2469 if (mld_in_v1_mode(ma->idev)) 2469 if (mld_in_v1_mode(ma->idev))
2470 igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT); 2470 igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
@@ -2552,14 +2552,11 @@ void ipv6_mc_init_dev(struct inet6_dev *idev)
2552 write_lock_bh(&idev->lock); 2552 write_lock_bh(&idev->lock);
2553 spin_lock_init(&idev->mc_lock); 2553 spin_lock_init(&idev->mc_lock);
2554 idev->mc_gq_running = 0; 2554 idev->mc_gq_running = 0;
2555 setup_timer(&idev->mc_gq_timer, mld_gq_timer_expire, 2555 timer_setup(&idev->mc_gq_timer, mld_gq_timer_expire, 0);
2556 (unsigned long)idev);
2557 idev->mc_tomb = NULL; 2556 idev->mc_tomb = NULL;
2558 idev->mc_ifc_count = 0; 2557 idev->mc_ifc_count = 0;
2559 setup_timer(&idev->mc_ifc_timer, mld_ifc_timer_expire, 2558 timer_setup(&idev->mc_ifc_timer, mld_ifc_timer_expire, 0);
2560 (unsigned long)idev); 2559 timer_setup(&idev->mc_dad_timer, mld_dad_timer_expire, 0);
2561 setup_timer(&idev->mc_dad_timer, mld_dad_timer_expire,
2562 (unsigned long)idev);
2563 ipv6_mc_reset(idev); 2560 ipv6_mc_reset(idev);
2564 write_unlock_bh(&idev->lock); 2561 write_unlock_bh(&idev->lock);
2565} 2562}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 266a530414d7..b3cea200c85e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -46,6 +46,7 @@
46#endif 46#endif
47 47
48#include <linux/if_addr.h> 48#include <linux/if_addr.h>
49#include <linux/if_ether.h>
49#include <linux/if_arp.h> 50#include <linux/if_arp.h>
50#include <linux/ipv6.h> 51#include <linux/ipv6.h>
51#include <linux/icmpv6.h> 52#include <linux/icmpv6.h>
@@ -426,12 +427,19 @@ static void ip6_nd_hdr(struct sk_buff *skb,
426 int hop_limit, int len) 427 int hop_limit, int len)
427{ 428{
428 struct ipv6hdr *hdr; 429 struct ipv6hdr *hdr;
430 struct inet6_dev *idev;
431 unsigned tclass;
432
433 rcu_read_lock();
434 idev = __in6_dev_get(skb->dev);
435 tclass = idev ? idev->cnf.ndisc_tclass : 0;
436 rcu_read_unlock();
429 437
430 skb_push(skb, sizeof(*hdr)); 438 skb_push(skb, sizeof(*hdr));
431 skb_reset_network_header(skb); 439 skb_reset_network_header(skb);
432 hdr = ipv6_hdr(skb); 440 hdr = ipv6_hdr(skb);
433 441
434 ip6_flow_hdr(hdr, 0, 0); 442 ip6_flow_hdr(hdr, tclass, 0);
435 443
436 hdr->payload_len = htons(len); 444 hdr->payload_len = htons(len);
437 hdr->nexthdr = IPPROTO_ICMPV6; 445 hdr->nexthdr = IPPROTO_ICMPV6;
@@ -822,7 +830,7 @@ have_ifp:
822 * who is doing DAD 830 * who is doing DAD
823 * so fail our DAD process 831 * so fail our DAD process
824 */ 832 */
825 addrconf_dad_failure(ifp); 833 addrconf_dad_failure(skb, ifp);
826 return; 834 return;
827 } else { 835 } else {
828 /* 836 /*
@@ -975,7 +983,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
975 if (ifp) { 983 if (ifp) {
976 if (skb->pkt_type != PACKET_LOOPBACK 984 if (skb->pkt_type != PACKET_LOOPBACK
977 && (ifp->flags & IFA_F_TENTATIVE)) { 985 && (ifp->flags & IFA_F_TENTATIVE)) {
978 addrconf_dad_failure(ifp); 986 addrconf_dad_failure(skb, ifp);
979 return; 987 return;
980 } 988 }
981 /* What should we make now? The advertisement 989 /* What should we make now? The advertisement
@@ -989,8 +997,8 @@ static void ndisc_recv_na(struct sk_buff *skb)
989 */ 997 */
990 if (skb->pkt_type != PACKET_LOOPBACK) 998 if (skb->pkt_type != PACKET_LOOPBACK)
991 ND_PRINTK(1, warn, 999 ND_PRINTK(1, warn,
992 "NA: someone advertises our address %pI6 on %s!\n", 1000 "NA: %pM advertised our address %pI6c on %s!\n",
993 &ifp->addr, ifp->idev->dev->name); 1001 eth_hdr(skb)->h_source, &ifp->addr, ifp->idev->dev->name);
994 in6_ifa_put(ifp); 1002 in6_ifa_put(ifp);
995 return; 1003 return;
996 } 1004 }
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index fe180c96040e..c6ee0cdd0ba9 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the netfilter modules on top of IPv6. 3# Makefile for the netfilter modules on top of IPv6.
3# 4#
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 01bd3ee5ebc6..f06e25065a34 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -800,6 +800,25 @@ get_counters(const struct xt_table_info *t,
800 } 800 }
801} 801}
802 802
803static void get_old_counters(const struct xt_table_info *t,
804 struct xt_counters counters[])
805{
806 struct ip6t_entry *iter;
807 unsigned int cpu, i;
808
809 for_each_possible_cpu(cpu) {
810 i = 0;
811 xt_entry_foreach(iter, t->entries, t->size) {
812 const struct xt_counters *tmp;
813
814 tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
815 ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
816 ++i;
817 }
818 cond_resched();
819 }
820}
821
803static struct xt_counters *alloc_counters(const struct xt_table *table) 822static struct xt_counters *alloc_counters(const struct xt_table *table)
804{ 823{
805 unsigned int countersize; 824 unsigned int countersize;
@@ -1090,8 +1109,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1090 (newinfo->number <= oldinfo->initial_entries)) 1109 (newinfo->number <= oldinfo->initial_entries))
1091 module_put(t->me); 1110 module_put(t->me);
1092 1111
1093 /* Get the old counters, and synchronize with replace */ 1112 get_old_counters(oldinfo, counters);
1094 get_counters(oldinfo, counters);
1095 1113
1096 /* Decrease module usage counts and free resource */ 1114 /* Decrease module usage counts and free resource */
1097 xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) 1115 xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index a5cd43d75393..437af8c95277 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -353,7 +353,7 @@ static unsigned int ipv6_synproxy_hook(void *priv,
353 nexthdr = ipv6_hdr(skb)->nexthdr; 353 nexthdr = ipv6_hdr(skb)->nexthdr;
354 thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, 354 thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
355 &frag_off); 355 &frag_off);
356 if (thoff < 0) 356 if (thoff < 0 || nexthdr != IPPROTO_TCP)
357 return NF_ACCEPT; 357 return NF_ACCEPT;
358 358
359 th = skb_header_pointer(skb, thoff, sizeof(_th), &_th); 359 th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index fe01dc953c56..3b80a38f62b8 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -339,7 +339,7 @@ static void ipv6_hooks_unregister(struct net *net)
339 mutex_unlock(&register_ipv6_hooks); 339 mutex_unlock(&register_ipv6_hooks);
340} 340}
341 341
342struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { 342const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
343 .l3proto = PF_INET6, 343 .l3proto = PF_INET6,
344 .pkt_to_tuple = ipv6_pkt_to_tuple, 344 .pkt_to_tuple = ipv6_pkt_to_tuple,
345 .invert_tuple = ipv6_invert_tuple, 345 .invert_tuple = ipv6_invert_tuple,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index a9e1fd1a8536..3ac0d826afc4 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -94,7 +94,6 @@ static int icmpv6_packet(struct nf_conn *ct,
94 const struct sk_buff *skb, 94 const struct sk_buff *skb,
95 unsigned int dataoff, 95 unsigned int dataoff,
96 enum ip_conntrack_info ctinfo, 96 enum ip_conntrack_info ctinfo,
97 u_int8_t pf,
98 unsigned int *timeout) 97 unsigned int *timeout)
99{ 98{
100 /* Do not immediately delete the connection after the first 99 /* Do not immediately delete the connection after the first
@@ -176,6 +175,12 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
176 return NF_ACCEPT; 175 return NF_ACCEPT;
177} 176}
178 177
178static void icmpv6_error_log(const struct sk_buff *skb, struct net *net,
179 u8 pf, const char *msg)
180{
181 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg);
182}
183
179static int 184static int
180icmpv6_error(struct net *net, struct nf_conn *tmpl, 185icmpv6_error(struct net *net, struct nf_conn *tmpl,
181 struct sk_buff *skb, unsigned int dataoff, 186 struct sk_buff *skb, unsigned int dataoff,
@@ -187,17 +192,13 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
187 192
188 icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); 193 icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
189 if (icmp6h == NULL) { 194 if (icmp6h == NULL) {
190 if (LOG_INVALID(net, IPPROTO_ICMPV6)) 195 icmpv6_error_log(skb, net, pf, "short packet");
191 nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
192 "nf_ct_icmpv6: short packet ");
193 return -NF_ACCEPT; 196 return -NF_ACCEPT;
194 } 197 }
195 198
196 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 199 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
197 nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { 200 nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
198 if (LOG_INVALID(net, IPPROTO_ICMPV6)) 201 icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed");
199 nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
200 "nf_ct_icmpv6: ICMPv6 checksum failed ");
201 return -NF_ACCEPT; 202 return -NF_ACCEPT;
202 } 203 }
203 204
@@ -258,9 +259,14 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
258 return 0; 259 return 0;
259} 260}
260 261
261static int icmpv6_nlattr_tuple_size(void) 262static unsigned int icmpv6_nlattr_tuple_size(void)
262{ 263{
263 return nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1); 264 static unsigned int size __read_mostly;
265
266 if (!size)
267 size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
268
269 return size;
264} 270}
265#endif 271#endif
266 272
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b263bf3a19f7..977d8900cfd1 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -169,12 +169,13 @@ static unsigned int nf_hashfn(const struct inet_frag_queue *q)
169 return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr); 169 return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
170} 170}
171 171
172static void nf_ct_frag6_expire(unsigned long data) 172static void nf_ct_frag6_expire(struct timer_list *t)
173{ 173{
174 struct inet_frag_queue *frag = from_timer(frag, t, timer);
174 struct frag_queue *fq; 175 struct frag_queue *fq;
175 struct net *net; 176 struct net *net;
176 177
177 fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); 178 fq = container_of(frag, struct frag_queue, q);
178 net = container_of(fq->q.net, struct net, nf_frag.frags); 179 net = container_of(fq->q.net, struct net, nf_frag.frags);
179 180
180 ip6_expire_frag_queue(net, fq, &nf_frags); 181 ip6_expire_frag_queue(net, fq, &nf_frags);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 46d6dba50698..1d2fb9267d6f 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -290,7 +290,8 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
290 else 290 else
291 return NF_ACCEPT; 291 return NF_ACCEPT;
292 } 292 }
293 /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ 293 /* Only ICMPs can be IP_CT_IS_REPLY: */
294 /* fall through */
294 case IP_CT_NEW: 295 case IP_CT_NEW:
295 /* Seen it before? This can happen for loopback, retrans, 296 /* Seen it before? This can happen for loopback, retrans,
296 * or local packets. 297 * or local packets.
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index a338bbc33cf3..4fe7c90962dd 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -39,7 +39,7 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
39 * 39 *
40 * The network header must be set before calling this. 40 * The network header must be set before calling this.
41 */ 41 */
42void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) 42__be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
43{ 43{
44 static u32 ip6_proxy_idents_hashrnd __read_mostly; 44 static u32 ip6_proxy_idents_hashrnd __read_mostly;
45 struct in6_addr buf[2]; 45 struct in6_addr buf[2];
@@ -51,14 +51,14 @@ void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
51 offsetof(struct ipv6hdr, saddr), 51 offsetof(struct ipv6hdr, saddr),
52 sizeof(buf), buf); 52 sizeof(buf), buf);
53 if (!addrs) 53 if (!addrs)
54 return; 54 return 0;
55 55
56 net_get_random_once(&ip6_proxy_idents_hashrnd, 56 net_get_random_once(&ip6_proxy_idents_hashrnd,
57 sizeof(ip6_proxy_idents_hashrnd)); 57 sizeof(ip6_proxy_idents_hashrnd));
58 58
59 id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd, 59 id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd,
60 &addrs[1], &addrs[0]); 60 &addrs[1], &addrs[0]);
61 skb_shinfo(skb)->ip6_frag_id = htonl(id); 61 return htonl(id);
62} 62}
63EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); 63EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
64 64
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index ac826dd338ff..d12c55dad7d1 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -154,9 +154,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
154 ICMP6_MIB_OUTERRORS); 154 ICMP6_MIB_OUTERRORS);
155 ip6_flush_pending_frames(sk); 155 ip6_flush_pending_frames(sk);
156 } else { 156 } else {
157 err = icmpv6_push_pending_frames(sk, &fl6, 157 icmpv6_push_pending_frames(sk, &fl6,
158 (struct icmp6hdr *) &pfh.icmph, 158 (struct icmp6hdr *)&pfh.icmph, len);
159 len);
160 } 159 }
161 release_sock(sk); 160 release_sock(sk);
162 161
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index e4462b0ff801..761a473a07c5 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1055,6 +1055,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
1055 if (optname == IPV6_CHECKSUM || 1055 if (optname == IPV6_CHECKSUM ||
1056 optname == IPV6_HDRINCL) 1056 optname == IPV6_HDRINCL)
1057 break; 1057 break;
1058 /* fall through */
1058 default: 1059 default:
1059 return ipv6_setsockopt(sk, level, optname, optval, optlen); 1060 return ipv6_setsockopt(sk, level, optname, optval, optlen);
1060 } 1061 }
@@ -1077,6 +1078,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
1077 if (optname == IPV6_CHECKSUM || 1078 if (optname == IPV6_CHECKSUM ||
1078 optname == IPV6_HDRINCL) 1079 optname == IPV6_HDRINCL)
1079 break; 1080 break;
1081 /* fall through */
1080 default: 1082 default:
1081 return compat_ipv6_setsockopt(sk, level, optname, 1083 return compat_ipv6_setsockopt(sk, level, optname,
1082 optval, optlen); 1084 optval, optlen);
@@ -1138,6 +1140,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
1138 if (optname == IPV6_CHECKSUM || 1140 if (optname == IPV6_CHECKSUM ||
1139 optname == IPV6_HDRINCL) 1141 optname == IPV6_HDRINCL)
1140 break; 1142 break;
1143 /* fall through */
1141 default: 1144 default:
1142 return ipv6_getsockopt(sk, level, optname, optval, optlen); 1145 return ipv6_getsockopt(sk, level, optname, optval, optlen);
1143 } 1146 }
@@ -1160,6 +1163,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
1160 if (optname == IPV6_CHECKSUM || 1163 if (optname == IPV6_CHECKSUM ||
1161 optname == IPV6_HDRINCL) 1164 optname == IPV6_HDRINCL)
1162 break; 1165 break;
1166 /* fall through */
1163 default: 1167 default:
1164 return compat_ipv6_getsockopt(sk, level, optname, 1168 return compat_ipv6_getsockopt(sk, level, optname,
1165 optval, optlen); 1169 optval, optlen);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 846012eae526..afbc000ad4f2 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -170,12 +170,13 @@ out:
170} 170}
171EXPORT_SYMBOL(ip6_expire_frag_queue); 171EXPORT_SYMBOL(ip6_expire_frag_queue);
172 172
173static void ip6_frag_expire(unsigned long data) 173static void ip6_frag_expire(struct timer_list *t)
174{ 174{
175 struct inet_frag_queue *frag = from_timer(frag, t, timer);
175 struct frag_queue *fq; 176 struct frag_queue *fq;
176 struct net *net; 177 struct net *net;
177 178
178 fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); 179 fq = container_of(frag, struct frag_queue, q);
179 net = container_of(fq->q.net, struct net, ipv6.frags); 180 net = container_of(fq->q.net, struct net, ipv6.frags);
180 181
181 ip6_expire_frag_queue(net, fq, &ip6_frags); 182 ip6_expire_frag_queue(net, fq, &ip6_frags);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 26cc9f483b6d..7a8d1500d374 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -44,6 +44,7 @@
44#include <linux/seq_file.h> 44#include <linux/seq_file.h>
45#include <linux/nsproxy.h> 45#include <linux/nsproxy.h>
46#include <linux/slab.h> 46#include <linux/slab.h>
47#include <linux/jhash.h>
47#include <net/net_namespace.h> 48#include <net/net_namespace.h>
48#include <net/snmp.h> 49#include <net/snmp.h>
49#include <net/ipv6.h> 50#include <net/ipv6.h>
@@ -104,6 +105,9 @@ static int rt6_fill_node(struct net *net,
104 struct in6_addr *dst, struct in6_addr *src, 105 struct in6_addr *dst, struct in6_addr *src,
105 int iif, int type, u32 portid, u32 seq, 106 int iif, int type, u32 portid, u32 seq,
106 unsigned int flags); 107 unsigned int flags);
108static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
109 struct in6_addr *daddr,
110 struct in6_addr *saddr);
107 111
108#ifdef CONFIG_IPV6_ROUTE_INFO 112#ifdef CONFIG_IPV6_ROUTE_INFO
109static struct rt6_info *rt6_add_route_info(struct net *net, 113static struct rt6_info *rt6_add_route_info(struct net *net,
@@ -139,9 +143,11 @@ static void rt6_uncached_list_del(struct rt6_info *rt)
139{ 143{
140 if (!list_empty(&rt->rt6i_uncached)) { 144 if (!list_empty(&rt->rt6i_uncached)) {
141 struct uncached_list *ul = rt->rt6i_uncached_list; 145 struct uncached_list *ul = rt->rt6i_uncached_list;
146 struct net *net = dev_net(rt->dst.dev);
142 147
143 spin_lock_bh(&ul->lock); 148 spin_lock_bh(&ul->lock);
144 list_del(&rt->rt6i_uncached); 149 list_del(&rt->rt6i_uncached);
150 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
145 spin_unlock_bh(&ul->lock); 151 spin_unlock_bh(&ul->lock);
146 } 152 }
147} 153}
@@ -355,8 +361,10 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
355 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 361 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
356 1, DST_OBSOLETE_FORCE_CHK, flags); 362 1, DST_OBSOLETE_FORCE_CHK, flags);
357 363
358 if (rt) 364 if (rt) {
359 rt6_info_init(rt); 365 rt6_info_init(rt);
366 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
367 }
360 368
361 return rt; 369 return rt;
362} 370}
@@ -369,17 +377,7 @@ struct rt6_info *ip6_dst_alloc(struct net *net,
369 377
370 if (rt) { 378 if (rt) {
371 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); 379 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
372 if (rt->rt6i_pcpu) { 380 if (!rt->rt6i_pcpu) {
373 int cpu;
374
375 for_each_possible_cpu(cpu) {
376 struct rt6_info **p;
377
378 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
379 /* no one shares rt */
380 *p = NULL;
381 }
382 } else {
383 dst_release_immediate(&rt->dst); 381 dst_release_immediate(&rt->dst);
384 return NULL; 382 return NULL;
385 } 383 }
@@ -392,6 +390,7 @@ EXPORT_SYMBOL(ip6_dst_alloc);
392static void ip6_dst_destroy(struct dst_entry *dst) 390static void ip6_dst_destroy(struct dst_entry *dst)
393{ 391{
394 struct rt6_info *rt = (struct rt6_info *)dst; 392 struct rt6_info *rt = (struct rt6_info *)dst;
393 struct rt6_exception_bucket *bucket;
395 struct dst_entry *from = dst->from; 394 struct dst_entry *from = dst->from;
396 struct inet6_dev *idev; 395 struct inet6_dev *idev;
397 396
@@ -404,6 +403,11 @@ static void ip6_dst_destroy(struct dst_entry *dst)
404 rt->rt6i_idev = NULL; 403 rt->rt6i_idev = NULL;
405 in6_dev_put(idev); 404 in6_dev_put(idev);
406 } 405 }
406 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
407 if (bucket) {
408 rt->rt6i_exception_bucket = NULL;
409 kfree(bucket);
410 }
407 411
408 dst->from = NULL; 412 dst->from = NULL;
409 dst_release(from); 413 dst_release(from);
@@ -468,6 +472,11 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
468 &match->rt6i_siblings, rt6i_siblings) { 472 &match->rt6i_siblings, rt6i_siblings) {
469 route_choosen--; 473 route_choosen--;
470 if (route_choosen == 0) { 474 if (route_choosen == 0) {
475 struct inet6_dev *idev = sibling->rt6i_idev;
476
477 if (!netif_carrier_ok(sibling->dst.dev) &&
478 idev->cnf.ignore_routes_with_linkdown)
479 break;
471 if (rt6_score_route(sibling, oif, strict) < 0) 480 if (rt6_score_route(sibling, oif, strict) < 0)
472 break; 481 break;
473 match = sibling; 482 match = sibling;
@@ -478,7 +487,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
478} 487}
479 488
480/* 489/*
481 * Route lookup. Any table->tb6_lock is implied. 490 * Route lookup. rcu_read_lock() should be held.
482 */ 491 */
483 492
484static inline struct rt6_info *rt6_device_match(struct net *net, 493static inline struct rt6_info *rt6_device_match(struct net *net,
@@ -493,7 +502,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
493 if (!oif && ipv6_addr_any(saddr)) 502 if (!oif && ipv6_addr_any(saddr))
494 goto out; 503 goto out;
495 504
496 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { 505 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->dst.rt6_next)) {
497 struct net_device *dev = sprt->dst.dev; 506 struct net_device *dev = sprt->dst.dev;
498 507
499 if (oif) { 508 if (oif) {
@@ -702,6 +711,7 @@ out:
702} 711}
703 712
704static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 713static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
714 struct rt6_info *leaf,
705 struct rt6_info *rr_head, 715 struct rt6_info *rr_head,
706 u32 metric, int oif, int strict, 716 u32 metric, int oif, int strict,
707 bool *do_rr) 717 bool *do_rr)
@@ -711,7 +721,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
711 721
712 match = NULL; 722 match = NULL;
713 cont = NULL; 723 cont = NULL;
714 for (rt = rr_head; rt; rt = rt->dst.rt6_next) { 724 for (rt = rr_head; rt; rt = rcu_dereference(rt->dst.rt6_next)) {
715 if (rt->rt6i_metric != metric) { 725 if (rt->rt6i_metric != metric) {
716 cont = rt; 726 cont = rt;
717 break; 727 break;
@@ -720,7 +730,8 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
720 match = find_match(rt, oif, strict, &mpri, match, do_rr); 730 match = find_match(rt, oif, strict, &mpri, match, do_rr);
721 } 731 }
722 732
723 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) { 733 for (rt = leaf; rt && rt != rr_head;
734 rt = rcu_dereference(rt->dst.rt6_next)) {
724 if (rt->rt6i_metric != metric) { 735 if (rt->rt6i_metric != metric) {
725 cont = rt; 736 cont = rt;
726 break; 737 break;
@@ -732,37 +743,59 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
732 if (match || !cont) 743 if (match || !cont)
733 return match; 744 return match;
734 745
735 for (rt = cont; rt; rt = rt->dst.rt6_next) 746 for (rt = cont; rt; rt = rcu_dereference(rt->dst.rt6_next))
736 match = find_match(rt, oif, strict, &mpri, match, do_rr); 747 match = find_match(rt, oif, strict, &mpri, match, do_rr);
737 748
738 return match; 749 return match;
739} 750}
740 751
741static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 752static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
753 int oif, int strict)
742{ 754{
755 struct rt6_info *leaf = rcu_dereference(fn->leaf);
743 struct rt6_info *match, *rt0; 756 struct rt6_info *match, *rt0;
744 struct net *net;
745 bool do_rr = false; 757 bool do_rr = false;
758 int key_plen;
746 759
747 rt0 = fn->rr_ptr; 760 if (!leaf || leaf == net->ipv6.ip6_null_entry)
761 return net->ipv6.ip6_null_entry;
762
763 rt0 = rcu_dereference(fn->rr_ptr);
748 if (!rt0) 764 if (!rt0)
749 fn->rr_ptr = rt0 = fn->leaf; 765 rt0 = leaf;
766
767 /* Double check to make sure fn is not an intermediate node
768 * and fn->leaf does not points to its child's leaf
769 * (This might happen if all routes under fn are deleted from
770 * the tree and fib6_repair_tree() is called on the node.)
771 */
772 key_plen = rt0->rt6i_dst.plen;
773#ifdef CONFIG_IPV6_SUBTREES
774 if (rt0->rt6i_src.plen)
775 key_plen = rt0->rt6i_src.plen;
776#endif
777 if (fn->fn_bit != key_plen)
778 return net->ipv6.ip6_null_entry;
750 779
751 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict, 780 match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
752 &do_rr); 781 &do_rr);
753 782
754 if (do_rr) { 783 if (do_rr) {
755 struct rt6_info *next = rt0->dst.rt6_next; 784 struct rt6_info *next = rcu_dereference(rt0->dst.rt6_next);
756 785
757 /* no entries matched; do round-robin */ 786 /* no entries matched; do round-robin */
758 if (!next || next->rt6i_metric != rt0->rt6i_metric) 787 if (!next || next->rt6i_metric != rt0->rt6i_metric)
759 next = fn->leaf; 788 next = leaf;
760 789
761 if (next != rt0) 790 if (next != rt0) {
762 fn->rr_ptr = next; 791 spin_lock_bh(&leaf->rt6i_table->tb6_lock);
792 /* make sure next is not being deleted from the tree */
793 if (next->rt6i_node)
794 rcu_assign_pointer(fn->rr_ptr, next);
795 spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
796 }
763 } 797 }
764 798
765 net = dev_net(rt0->dst.dev);
766 return match ? match : net->ipv6.ip6_null_entry; 799 return match ? match : net->ipv6.ip6_null_entry;
767} 800}
768 801
@@ -850,13 +883,14 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
850static struct fib6_node* fib6_backtrack(struct fib6_node *fn, 883static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
851 struct in6_addr *saddr) 884 struct in6_addr *saddr)
852{ 885{
853 struct fib6_node *pn; 886 struct fib6_node *pn, *sn;
854 while (1) { 887 while (1) {
855 if (fn->fn_flags & RTN_TL_ROOT) 888 if (fn->fn_flags & RTN_TL_ROOT)
856 return NULL; 889 return NULL;
857 pn = fn->parent; 890 pn = rcu_dereference(fn->parent);
858 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) 891 sn = FIB6_SUBTREE(pn);
859 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); 892 if (sn && sn != fn)
893 fn = fib6_lookup(sn, NULL, saddr);
860 else 894 else
861 fn = pn; 895 fn = pn;
862 if (fn->fn_flags & RTN_RTINFO) 896 if (fn->fn_flags & RTN_RTINFO)
@@ -864,29 +898,59 @@ static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
864 } 898 }
865} 899}
866 900
901static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
902 bool null_fallback)
903{
904 struct rt6_info *rt = *prt;
905
906 if (dst_hold_safe(&rt->dst))
907 return true;
908 if (null_fallback) {
909 rt = net->ipv6.ip6_null_entry;
910 dst_hold(&rt->dst);
911 } else {
912 rt = NULL;
913 }
914 *prt = rt;
915 return false;
916}
917
867static struct rt6_info *ip6_pol_route_lookup(struct net *net, 918static struct rt6_info *ip6_pol_route_lookup(struct net *net,
868 struct fib6_table *table, 919 struct fib6_table *table,
869 struct flowi6 *fl6, int flags) 920 struct flowi6 *fl6, int flags)
870{ 921{
922 struct rt6_info *rt, *rt_cache;
871 struct fib6_node *fn; 923 struct fib6_node *fn;
872 struct rt6_info *rt;
873 924
874 read_lock_bh(&table->tb6_lock); 925 rcu_read_lock();
875 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 926 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
876restart: 927restart:
877 rt = fn->leaf; 928 rt = rcu_dereference(fn->leaf);
878 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); 929 if (!rt) {
879 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) 930 rt = net->ipv6.ip6_null_entry;
880 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags); 931 } else {
932 rt = rt6_device_match(net, rt, &fl6->saddr,
933 fl6->flowi6_oif, flags);
934 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
935 rt = rt6_multipath_select(rt, fl6,
936 fl6->flowi6_oif, flags);
937 }
881 if (rt == net->ipv6.ip6_null_entry) { 938 if (rt == net->ipv6.ip6_null_entry) {
882 fn = fib6_backtrack(fn, &fl6->saddr); 939 fn = fib6_backtrack(fn, &fl6->saddr);
883 if (fn) 940 if (fn)
884 goto restart; 941 goto restart;
885 } 942 }
886 dst_use(&rt->dst, jiffies); 943 /* Search through exception table */
887 read_unlock_bh(&table->tb6_lock); 944 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
945 if (rt_cache)
946 rt = rt_cache;
888 947
889 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); 948 if (ip6_hold_safe(net, &rt, true))
949 dst_use_noref(&rt->dst, jiffies);
950
951 rcu_read_unlock();
952
953 trace_fib6_table_lookup(net, rt, table, fl6);
890 954
891 return rt; 955 return rt;
892 956
@@ -938,9 +1002,9 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
938 struct fib6_table *table; 1002 struct fib6_table *table;
939 1003
940 table = rt->rt6i_table; 1004 table = rt->rt6i_table;
941 write_lock_bh(&table->tb6_lock); 1005 spin_lock_bh(&table->tb6_lock);
942 err = fib6_add(&table->tb6_root, rt, info, mxc, extack); 1006 err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
943 write_unlock_bh(&table->tb6_lock); 1007 spin_unlock_bh(&table->tb6_lock);
944 1008
945 return err; 1009 return err;
946} 1010}
@@ -960,7 +1024,7 @@ static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
960{ 1024{
961 struct net_device *dev = rt->dst.dev; 1025 struct net_device *dev = rt->dst.dev;
962 1026
963 if (rt->rt6i_flags & RTF_LOCAL) { 1027 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
964 /* for copies of local routes, dst->dev needs to be the 1028 /* for copies of local routes, dst->dev needs to be the
965 * device if it is a master device, the master device if 1029 * device if it is a master device, the master device if
966 * device is enslaved, and the loopback as the default 1030 * device is enslaved, and the loopback as the default
@@ -1038,7 +1102,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1038 return pcpu_rt; 1102 return pcpu_rt;
1039} 1103}
1040 1104
1041/* It should be called with read_lock_bh(&tb6_lock) acquired */ 1105/* It should be called with rcu_read_lock() acquired */
1042static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) 1106static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1043{ 1107{
1044 struct rt6_info *pcpu_rt, **p; 1108 struct rt6_info *pcpu_rt, **p;
@@ -1046,16 +1110,14 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1046 p = this_cpu_ptr(rt->rt6i_pcpu); 1110 p = this_cpu_ptr(rt->rt6i_pcpu);
1047 pcpu_rt = *p; 1111 pcpu_rt = *p;
1048 1112
1049 if (pcpu_rt) { 1113 if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
1050 dst_hold(&pcpu_rt->dst);
1051 rt6_dst_from_metrics_check(pcpu_rt); 1114 rt6_dst_from_metrics_check(pcpu_rt);
1052 } 1115
1053 return pcpu_rt; 1116 return pcpu_rt;
1054} 1117}
1055 1118
1056static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) 1119static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1057{ 1120{
1058 struct fib6_table *table = rt->rt6i_table;
1059 struct rt6_info *pcpu_rt, *prev, **p; 1121 struct rt6_info *pcpu_rt, *prev, **p;
1060 1122
1061 pcpu_rt = ip6_rt_pcpu_alloc(rt); 1123 pcpu_rt = ip6_rt_pcpu_alloc(rt);
@@ -1066,36 +1128,526 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1066 return net->ipv6.ip6_null_entry; 1128 return net->ipv6.ip6_null_entry;
1067 } 1129 }
1068 1130
1069 read_lock_bh(&table->tb6_lock);
1070 if (rt->rt6i_pcpu) {
1071 p = this_cpu_ptr(rt->rt6i_pcpu);
1072 prev = cmpxchg(p, NULL, pcpu_rt);
1073 if (prev) {
1074 /* If someone did it before us, return prev instead */
1075 dst_release_immediate(&pcpu_rt->dst);
1076 pcpu_rt = prev;
1077 }
1078 } else {
1079 /* rt has been removed from the fib6 tree
1080 * before we have a chance to acquire the read_lock.
1081 * In this case, don't brother to create a pcpu rt
1082 * since rt is going away anyway. The next
1083 * dst_check() will trigger a re-lookup.
1084 */
1085 dst_release_immediate(&pcpu_rt->dst);
1086 pcpu_rt = rt;
1087 }
1088 dst_hold(&pcpu_rt->dst); 1131 dst_hold(&pcpu_rt->dst);
1132 p = this_cpu_ptr(rt->rt6i_pcpu);
1133 prev = cmpxchg(p, NULL, pcpu_rt);
1134 BUG_ON(prev);
1135
1089 rt6_dst_from_metrics_check(pcpu_rt); 1136 rt6_dst_from_metrics_check(pcpu_rt);
1090 read_unlock_bh(&table->tb6_lock);
1091 return pcpu_rt; 1137 return pcpu_rt;
1092} 1138}
1093 1139
1140/* exception hash table implementation
1141 */
1142static DEFINE_SPINLOCK(rt6_exception_lock);
1143
1144/* Remove rt6_ex from hash table and free the memory
1145 * Caller must hold rt6_exception_lock
1146 */
1147static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1148 struct rt6_exception *rt6_ex)
1149{
1150 struct net *net;
1151
1152 if (!bucket || !rt6_ex)
1153 return;
1154
1155 net = dev_net(rt6_ex->rt6i->dst.dev);
1156 rt6_ex->rt6i->rt6i_node = NULL;
1157 hlist_del_rcu(&rt6_ex->hlist);
1158 rt6_release(rt6_ex->rt6i);
1159 kfree_rcu(rt6_ex, rcu);
1160 WARN_ON_ONCE(!bucket->depth);
1161 bucket->depth--;
1162 net->ipv6.rt6_stats->fib_rt_cache--;
1163}
1164
1165/* Remove oldest rt6_ex in bucket and free the memory
1166 * Caller must hold rt6_exception_lock
1167 */
1168static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1169{
1170 struct rt6_exception *rt6_ex, *oldest = NULL;
1171
1172 if (!bucket)
1173 return;
1174
1175 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1176 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1177 oldest = rt6_ex;
1178 }
1179 rt6_remove_exception(bucket, oldest);
1180}
1181
1182static u32 rt6_exception_hash(const struct in6_addr *dst,
1183 const struct in6_addr *src)
1184{
1185 static u32 seed __read_mostly;
1186 u32 val;
1187
1188 net_get_random_once(&seed, sizeof(seed));
1189 val = jhash(dst, sizeof(*dst), seed);
1190
1191#ifdef CONFIG_IPV6_SUBTREES
1192 if (src)
1193 val = jhash(src, sizeof(*src), val);
1194#endif
1195 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1196}
1197
1198/* Helper function to find the cached rt in the hash table
1199 * and update bucket pointer to point to the bucket for this
1200 * (daddr, saddr) pair
1201 * Caller must hold rt6_exception_lock
1202 */
1203static struct rt6_exception *
1204__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1205 const struct in6_addr *daddr,
1206 const struct in6_addr *saddr)
1207{
1208 struct rt6_exception *rt6_ex;
1209 u32 hval;
1210
1211 if (!(*bucket) || !daddr)
1212 return NULL;
1213
1214 hval = rt6_exception_hash(daddr, saddr);
1215 *bucket += hval;
1216
1217 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1218 struct rt6_info *rt6 = rt6_ex->rt6i;
1219 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1220
1221#ifdef CONFIG_IPV6_SUBTREES
1222 if (matched && saddr)
1223 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1224#endif
1225 if (matched)
1226 return rt6_ex;
1227 }
1228 return NULL;
1229}
1230
1231/* Helper function to find the cached rt in the hash table
1232 * and update bucket pointer to point to the bucket for this
1233 * (daddr, saddr) pair
1234 * Caller must hold rcu_read_lock()
1235 */
1236static struct rt6_exception *
1237__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1238 const struct in6_addr *daddr,
1239 const struct in6_addr *saddr)
1240{
1241 struct rt6_exception *rt6_ex;
1242 u32 hval;
1243
1244 WARN_ON_ONCE(!rcu_read_lock_held());
1245
1246 if (!(*bucket) || !daddr)
1247 return NULL;
1248
1249 hval = rt6_exception_hash(daddr, saddr);
1250 *bucket += hval;
1251
1252 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1253 struct rt6_info *rt6 = rt6_ex->rt6i;
1254 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1255
1256#ifdef CONFIG_IPV6_SUBTREES
1257 if (matched && saddr)
1258 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1259#endif
1260 if (matched)
1261 return rt6_ex;
1262 }
1263 return NULL;
1264}
1265
1266static int rt6_insert_exception(struct rt6_info *nrt,
1267 struct rt6_info *ort)
1268{
1269 struct net *net = dev_net(ort->dst.dev);
1270 struct rt6_exception_bucket *bucket;
1271 struct in6_addr *src_key = NULL;
1272 struct rt6_exception *rt6_ex;
1273 int err = 0;
1274
1275 /* ort can't be a cache or pcpu route */
1276 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
1277 ort = (struct rt6_info *)ort->dst.from;
1278 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1279
1280 spin_lock_bh(&rt6_exception_lock);
1281
1282 if (ort->exception_bucket_flushed) {
1283 err = -EINVAL;
1284 goto out;
1285 }
1286
1287 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1288 lockdep_is_held(&rt6_exception_lock));
1289 if (!bucket) {
1290 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1291 GFP_ATOMIC);
1292 if (!bucket) {
1293 err = -ENOMEM;
1294 goto out;
1295 }
1296 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1297 }
1298
1299#ifdef CONFIG_IPV6_SUBTREES
1300 /* rt6i_src.plen != 0 indicates ort is in subtree
1301 * and exception table is indexed by a hash of
1302 * both rt6i_dst and rt6i_src.
1303 * Otherwise, the exception table is indexed by
1304 * a hash of only rt6i_dst.
1305 */
1306 if (ort->rt6i_src.plen)
1307 src_key = &nrt->rt6i_src.addr;
1308#endif
1309
1310 /* Update rt6i_prefsrc as it could be changed
1311 * in rt6_remove_prefsrc()
1312 */
1313 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
1314 /* rt6_mtu_change() might lower mtu on ort.
1315 * Only insert this exception route if its mtu
1316 * is less than ort's mtu value.
1317 */
1318 if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
1319 err = -EINVAL;
1320 goto out;
1321 }
1322
1323 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1324 src_key);
1325 if (rt6_ex)
1326 rt6_remove_exception(bucket, rt6_ex);
1327
1328 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1329 if (!rt6_ex) {
1330 err = -ENOMEM;
1331 goto out;
1332 }
1333 rt6_ex->rt6i = nrt;
1334 rt6_ex->stamp = jiffies;
1335 atomic_inc(&nrt->rt6i_ref);
1336 nrt->rt6i_node = ort->rt6i_node;
1337 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1338 bucket->depth++;
1339 net->ipv6.rt6_stats->fib_rt_cache++;
1340
1341 if (bucket->depth > FIB6_MAX_DEPTH)
1342 rt6_exception_remove_oldest(bucket);
1343
1344out:
1345 spin_unlock_bh(&rt6_exception_lock);
1346
1347 /* Update fn->fn_sernum to invalidate all cached dst */
1348 if (!err) {
1349 fib6_update_sernum(ort);
1350 fib6_force_start_gc(net);
1351 }
1352
1353 return err;
1354}
1355
1356void rt6_flush_exceptions(struct rt6_info *rt)
1357{
1358 struct rt6_exception_bucket *bucket;
1359 struct rt6_exception *rt6_ex;
1360 struct hlist_node *tmp;
1361 int i;
1362
1363 spin_lock_bh(&rt6_exception_lock);
1364 /* Prevent rt6_insert_exception() to recreate the bucket list */
1365 rt->exception_bucket_flushed = 1;
1366
1367 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1368 lockdep_is_held(&rt6_exception_lock));
1369 if (!bucket)
1370 goto out;
1371
1372 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1373 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1374 rt6_remove_exception(bucket, rt6_ex);
1375 WARN_ON_ONCE(bucket->depth);
1376 bucket++;
1377 }
1378
1379out:
1380 spin_unlock_bh(&rt6_exception_lock);
1381}
1382
1383/* Find cached rt in the hash table inside passed in rt
1384 * Caller has to hold rcu_read_lock()
1385 */
1386static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1387 struct in6_addr *daddr,
1388 struct in6_addr *saddr)
1389{
1390 struct rt6_exception_bucket *bucket;
1391 struct in6_addr *src_key = NULL;
1392 struct rt6_exception *rt6_ex;
1393 struct rt6_info *res = NULL;
1394
1395 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1396
1397#ifdef CONFIG_IPV6_SUBTREES
1398 /* rt6i_src.plen != 0 indicates rt is in subtree
1399 * and exception table is indexed by a hash of
1400 * both rt6i_dst and rt6i_src.
1401 * Otherwise, the exception table is indexed by
1402 * a hash of only rt6i_dst.
1403 */
1404 if (rt->rt6i_src.plen)
1405 src_key = saddr;
1406#endif
1407 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1408
1409 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1410 res = rt6_ex->rt6i;
1411
1412 return res;
1413}
1414
1415/* Remove the passed in cached rt from the hash table that contains it */
1416int rt6_remove_exception_rt(struct rt6_info *rt)
1417{
1418 struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1419 struct rt6_exception_bucket *bucket;
1420 struct in6_addr *src_key = NULL;
1421 struct rt6_exception *rt6_ex;
1422 int err;
1423
1424 if (!from ||
1425 !(rt->rt6i_flags & RTF_CACHE))
1426 return -EINVAL;
1427
1428 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1429 return -ENOENT;
1430
1431 spin_lock_bh(&rt6_exception_lock);
1432 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1433 lockdep_is_held(&rt6_exception_lock));
1434#ifdef CONFIG_IPV6_SUBTREES
1435 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1436 * and exception table is indexed by a hash of
1437 * both rt6i_dst and rt6i_src.
1438 * Otherwise, the exception table is indexed by
1439 * a hash of only rt6i_dst.
1440 */
1441 if (from->rt6i_src.plen)
1442 src_key = &rt->rt6i_src.addr;
1443#endif
1444 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1445 &rt->rt6i_dst.addr,
1446 src_key);
1447 if (rt6_ex) {
1448 rt6_remove_exception(bucket, rt6_ex);
1449 err = 0;
1450 } else {
1451 err = -ENOENT;
1452 }
1453
1454 spin_unlock_bh(&rt6_exception_lock);
1455 return err;
1456}
1457
1458/* Find rt6_ex which contains the passed in rt cache and
1459 * refresh its stamp
1460 */
1461static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1462{
1463 struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1464 struct rt6_exception_bucket *bucket;
1465 struct in6_addr *src_key = NULL;
1466 struct rt6_exception *rt6_ex;
1467
1468 if (!from ||
1469 !(rt->rt6i_flags & RTF_CACHE))
1470 return;
1471
1472 rcu_read_lock();
1473 bucket = rcu_dereference(from->rt6i_exception_bucket);
1474
1475#ifdef CONFIG_IPV6_SUBTREES
1476 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1477 * and exception table is indexed by a hash of
1478 * both rt6i_dst and rt6i_src.
1479 * Otherwise, the exception table is indexed by
1480 * a hash of only rt6i_dst.
1481 */
1482 if (from->rt6i_src.plen)
1483 src_key = &rt->rt6i_src.addr;
1484#endif
1485 rt6_ex = __rt6_find_exception_rcu(&bucket,
1486 &rt->rt6i_dst.addr,
1487 src_key);
1488 if (rt6_ex)
1489 rt6_ex->stamp = jiffies;
1490
1491 rcu_read_unlock();
1492}
1493
1494static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1495{
1496 struct rt6_exception_bucket *bucket;
1497 struct rt6_exception *rt6_ex;
1498 int i;
1499
1500 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1501 lockdep_is_held(&rt6_exception_lock));
1502
1503 if (bucket) {
1504 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1505 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1506 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1507 }
1508 bucket++;
1509 }
1510 }
1511}
1512
1513static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
1514{
1515 struct rt6_exception_bucket *bucket;
1516 struct rt6_exception *rt6_ex;
1517 int i;
1518
1519 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1520 lockdep_is_held(&rt6_exception_lock));
1521
1522 if (bucket) {
1523 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1524 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1525 struct rt6_info *entry = rt6_ex->rt6i;
1526 /* For RTF_CACHE with rt6i_pmtu == 0
1527 * (i.e. a redirected route),
1528 * the metrics of its rt->dst.from has already
1529 * been updated.
1530 */
1531 if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
1532 entry->rt6i_pmtu = mtu;
1533 }
1534 bucket++;
1535 }
1536 }
1537}
1538
1539#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1540
1541static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1542 struct in6_addr *gateway)
1543{
1544 struct rt6_exception_bucket *bucket;
1545 struct rt6_exception *rt6_ex;
1546 struct hlist_node *tmp;
1547 int i;
1548
1549 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1550 return;
1551
1552 spin_lock_bh(&rt6_exception_lock);
1553 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1554 lockdep_is_held(&rt6_exception_lock));
1555
1556 if (bucket) {
1557 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1558 hlist_for_each_entry_safe(rt6_ex, tmp,
1559 &bucket->chain, hlist) {
1560 struct rt6_info *entry = rt6_ex->rt6i;
1561
1562 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1563 RTF_CACHE_GATEWAY &&
1564 ipv6_addr_equal(gateway,
1565 &entry->rt6i_gateway)) {
1566 rt6_remove_exception(bucket, rt6_ex);
1567 }
1568 }
1569 bucket++;
1570 }
1571 }
1572
1573 spin_unlock_bh(&rt6_exception_lock);
1574}
1575
1576static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1577 struct rt6_exception *rt6_ex,
1578 struct fib6_gc_args *gc_args,
1579 unsigned long now)
1580{
1581 struct rt6_info *rt = rt6_ex->rt6i;
1582
1583 /* we are pruning and obsoleting aged-out and non gateway exceptions
1584 * even if others have still references to them, so that on next
1585 * dst_check() such references can be dropped.
1586 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1587 * expired, independently from their aging, as per RFC 8201 section 4
1588 */
1589 if (!(rt->rt6i_flags & RTF_EXPIRES) &&
1590 time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1591 RT6_TRACE("aging clone %p\n", rt);
1592 rt6_remove_exception(bucket, rt6_ex);
1593 return;
1594 } else if (rt->rt6i_flags & RTF_GATEWAY) {
1595 struct neighbour *neigh;
1596 __u8 neigh_flags = 0;
1597
1598 neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
1599 if (neigh) {
1600 neigh_flags = neigh->flags;
1601 neigh_release(neigh);
1602 }
1603 if (!(neigh_flags & NTF_ROUTER)) {
1604 RT6_TRACE("purging route %p via non-router but gateway\n",
1605 rt);
1606 rt6_remove_exception(bucket, rt6_ex);
1607 return;
1608 }
1609 } else if (__rt6_check_expired(rt)) {
1610 RT6_TRACE("purging expired route %p\n", rt);
1611 rt6_remove_exception(bucket, rt6_ex);
1612 return;
1613 }
1614 gc_args->more++;
1615}
1616
1617void rt6_age_exceptions(struct rt6_info *rt,
1618 struct fib6_gc_args *gc_args,
1619 unsigned long now)
1620{
1621 struct rt6_exception_bucket *bucket;
1622 struct rt6_exception *rt6_ex;
1623 struct hlist_node *tmp;
1624 int i;
1625
1626 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1627 return;
1628
1629 spin_lock_bh(&rt6_exception_lock);
1630 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1631 lockdep_is_held(&rt6_exception_lock));
1632
1633 if (bucket) {
1634 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1635 hlist_for_each_entry_safe(rt6_ex, tmp,
1636 &bucket->chain, hlist) {
1637 rt6_age_examine_exception(bucket, rt6_ex,
1638 gc_args, now);
1639 }
1640 bucket++;
1641 }
1642 }
1643 spin_unlock_bh(&rt6_exception_lock);
1644}
1645
1094struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, 1646struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1095 int oif, struct flowi6 *fl6, int flags) 1647 int oif, struct flowi6 *fl6, int flags)
1096{ 1648{
1097 struct fib6_node *fn, *saved_fn; 1649 struct fib6_node *fn, *saved_fn;
1098 struct rt6_info *rt; 1650 struct rt6_info *rt, *rt_cache;
1099 int strict = 0; 1651 int strict = 0;
1100 1652
1101 strict |= flags & RT6_LOOKUP_F_IFACE; 1653 strict |= flags & RT6_LOOKUP_F_IFACE;
@@ -1103,7 +1655,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1103 if (net->ipv6.devconf_all->forwarding == 0) 1655 if (net->ipv6.devconf_all->forwarding == 0)
1104 strict |= RT6_LOOKUP_F_REACHABLE; 1656 strict |= RT6_LOOKUP_F_REACHABLE;
1105 1657
1106 read_lock_bh(&table->tb6_lock); 1658 rcu_read_lock();
1107 1659
1108 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1660 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1109 saved_fn = fn; 1661 saved_fn = fn;
@@ -1112,7 +1664,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1112 oif = 0; 1664 oif = 0;
1113 1665
1114redo_rt6_select: 1666redo_rt6_select:
1115 rt = rt6_select(fn, oif, strict); 1667 rt = rt6_select(net, fn, oif, strict);
1116 if (rt->rt6i_nsiblings) 1668 if (rt->rt6i_nsiblings)
1117 rt = rt6_multipath_select(rt, fl6, oif, strict); 1669 rt = rt6_multipath_select(rt, fl6, oif, strict);
1118 if (rt == net->ipv6.ip6_null_entry) { 1670 if (rt == net->ipv6.ip6_null_entry) {
@@ -1127,14 +1679,23 @@ redo_rt6_select:
1127 } 1679 }
1128 } 1680 }
1129 1681
1682 /*Search through exception table */
1683 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
1684 if (rt_cache)
1685 rt = rt_cache;
1130 1686
1131 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) { 1687 if (rt == net->ipv6.ip6_null_entry) {
1132 dst_use(&rt->dst, jiffies); 1688 rcu_read_unlock();
1133 read_unlock_bh(&table->tb6_lock); 1689 dst_hold(&rt->dst);
1134 1690 trace_fib6_table_lookup(net, rt, table, fl6);
1135 rt6_dst_from_metrics_check(rt); 1691 return rt;
1136 1692 } else if (rt->rt6i_flags & RTF_CACHE) {
1137 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); 1693 if (ip6_hold_safe(net, &rt, true)) {
1694 dst_use_noref(&rt->dst, jiffies);
1695 rt6_dst_from_metrics_check(rt);
1696 }
1697 rcu_read_unlock();
1698 trace_fib6_table_lookup(net, rt, table, fl6);
1138 return rt; 1699 return rt;
1139 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && 1700 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1140 !(rt->rt6i_flags & RTF_GATEWAY))) { 1701 !(rt->rt6i_flags & RTF_GATEWAY))) {
@@ -1146,8 +1707,14 @@ redo_rt6_select:
1146 1707
1147 struct rt6_info *uncached_rt; 1708 struct rt6_info *uncached_rt;
1148 1709
1149 dst_use(&rt->dst, jiffies); 1710 if (ip6_hold_safe(net, &rt, true)) {
1150 read_unlock_bh(&table->tb6_lock); 1711 dst_use_noref(&rt->dst, jiffies);
1712 } else {
1713 rcu_read_unlock();
1714 uncached_rt = rt;
1715 goto uncached_rt_out;
1716 }
1717 rcu_read_unlock();
1151 1718
1152 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); 1719 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1153 dst_release(&rt->dst); 1720 dst_release(&rt->dst);
@@ -1157,12 +1724,14 @@ redo_rt6_select:
1157 * No need for another dst_hold() 1724 * No need for another dst_hold()
1158 */ 1725 */
1159 rt6_uncached_list_add(uncached_rt); 1726 rt6_uncached_list_add(uncached_rt);
1727 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1160 } else { 1728 } else {
1161 uncached_rt = net->ipv6.ip6_null_entry; 1729 uncached_rt = net->ipv6.ip6_null_entry;
1162 dst_hold(&uncached_rt->dst); 1730 dst_hold(&uncached_rt->dst);
1163 } 1731 }
1164 1732
1165 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6); 1733uncached_rt_out:
1734 trace_fib6_table_lookup(net, uncached_rt, table, fl6);
1166 return uncached_rt; 1735 return uncached_rt;
1167 1736
1168 } else { 1737 } else {
@@ -1170,26 +1739,28 @@ redo_rt6_select:
1170 1739
1171 struct rt6_info *pcpu_rt; 1740 struct rt6_info *pcpu_rt;
1172 1741
1173 rt->dst.lastuse = jiffies; 1742 dst_use_noref(&rt->dst, jiffies);
1174 rt->dst.__use++; 1743 local_bh_disable();
1175 pcpu_rt = rt6_get_pcpu_route(rt); 1744 pcpu_rt = rt6_get_pcpu_route(rt);
1176 1745
1177 if (pcpu_rt) { 1746 if (!pcpu_rt) {
1178 read_unlock_bh(&table->tb6_lock); 1747 /* atomic_inc_not_zero() is needed when using rcu */
1179 } else { 1748 if (atomic_inc_not_zero(&rt->rt6i_ref)) {
1180 /* We have to do the read_unlock first 1749 /* No dst_hold() on rt is needed because grabbing
1181 * because rt6_make_pcpu_route() may trigger 1750 * rt->rt6i_ref makes sure rt can't be released.
1182 * ip6_dst_gc() which will take the write_lock. 1751 */
1183 */ 1752 pcpu_rt = rt6_make_pcpu_route(rt);
1184 dst_hold(&rt->dst); 1753 rt6_release(rt);
1185 read_unlock_bh(&table->tb6_lock); 1754 } else {
1186 pcpu_rt = rt6_make_pcpu_route(rt); 1755 /* rt is already removed from tree */
1187 dst_release(&rt->dst); 1756 pcpu_rt = net->ipv6.ip6_null_entry;
1757 dst_hold(&pcpu_rt->dst);
1758 }
1188 } 1759 }
1189 1760 local_bh_enable();
1190 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6); 1761 rcu_read_unlock();
1762 trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
1191 return pcpu_rt; 1763 return pcpu_rt;
1192
1193 } 1764 }
1194} 1765}
1195EXPORT_SYMBOL_GPL(ip6_pol_route); 1766EXPORT_SYMBOL_GPL(ip6_pol_route);
@@ -1325,9 +1896,10 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
1325 struct dst_entry *new = NULL; 1896 struct dst_entry *new = NULL;
1326 1897
1327 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1, 1898 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
1328 DST_OBSOLETE_NONE, 0); 1899 DST_OBSOLETE_DEAD, 0);
1329 if (rt) { 1900 if (rt) {
1330 rt6_info_init(rt); 1901 rt6_info_init(rt);
1902 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
1331 1903
1332 new = &rt->dst; 1904 new = &rt->dst;
1333 new->__use = 1; 1905 new->__use = 1;
@@ -1491,23 +2063,17 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1491 2063
1492 if (!rt6_cache_allowed_for_pmtu(rt6)) { 2064 if (!rt6_cache_allowed_for_pmtu(rt6)) {
1493 rt6_do_update_pmtu(rt6, mtu); 2065 rt6_do_update_pmtu(rt6, mtu);
2066 /* update rt6_ex->stamp for cache */
2067 if (rt6->rt6i_flags & RTF_CACHE)
2068 rt6_update_exception_stamp_rt(rt6);
1494 } else if (daddr) { 2069 } else if (daddr) {
1495 struct rt6_info *nrt6; 2070 struct rt6_info *nrt6;
1496 2071
1497 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); 2072 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1498 if (nrt6) { 2073 if (nrt6) {
1499 rt6_do_update_pmtu(nrt6, mtu); 2074 rt6_do_update_pmtu(nrt6, mtu);
1500 2075 if (rt6_insert_exception(nrt6, rt6))
1501 /* ip6_ins_rt(nrt6) will bump the 2076 dst_release_immediate(&nrt6->dst);
1502 * rt6->rt6i_node->fn_sernum
1503 * which will fail the next rt6_check() and
1504 * invalidate the sk->sk_dst_cache.
1505 */
1506 ip6_ins_rt(nrt6);
1507 /* Release the reference taken in
1508 * ip6_rt_cache_alloc()
1509 */
1510 dst_release(&nrt6->dst);
1511 } 2077 }
1512 } 2078 }
1513} 2079}
@@ -1571,7 +2137,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
1571 int flags) 2137 int flags)
1572{ 2138{
1573 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; 2139 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1574 struct rt6_info *rt; 2140 struct rt6_info *rt, *rt_cache;
1575 struct fib6_node *fn; 2141 struct fib6_node *fn;
1576 2142
1577 /* Get the "current" route for this destination and 2143 /* Get the "current" route for this destination and
@@ -1584,10 +2150,10 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
1584 * routes. 2150 * routes.
1585 */ 2151 */
1586 2152
1587 read_lock_bh(&table->tb6_lock); 2153 rcu_read_lock();
1588 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 2154 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1589restart: 2155restart:
1590 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 2156 for_each_fib6_node_rt_rcu(fn) {
1591 if (rt6_check_expired(rt)) 2157 if (rt6_check_expired(rt))
1592 continue; 2158 continue;
1593 if (rt->dst.error) 2159 if (rt->dst.error)
@@ -1596,8 +2162,23 @@ restart:
1596 continue; 2162 continue;
1597 if (fl6->flowi6_oif != rt->dst.dev->ifindex) 2163 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1598 continue; 2164 continue;
1599 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 2165 /* rt_cache's gateway might be different from its 'parent'
2166 * in the case of an ip redirect.
2167 * So we keep searching in the exception table if the gateway
2168 * is different.
2169 */
2170 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
2171 rt_cache = rt6_find_cached_rt(rt,
2172 &fl6->daddr,
2173 &fl6->saddr);
2174 if (rt_cache &&
2175 ipv6_addr_equal(&rdfl->gateway,
2176 &rt_cache->rt6i_gateway)) {
2177 rt = rt_cache;
2178 break;
2179 }
1600 continue; 2180 continue;
2181 }
1601 break; 2182 break;
1602 } 2183 }
1603 2184
@@ -1615,11 +2196,11 @@ restart:
1615 } 2196 }
1616 2197
1617out: 2198out:
1618 dst_hold(&rt->dst); 2199 ip6_hold_safe(net, &rt, true);
1619 2200
1620 read_unlock_bh(&table->tb6_lock); 2201 rcu_read_unlock();
1621 2202
1622 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); 2203 trace_fib6_table_lookup(net, rt, table, fl6);
1623 return rt; 2204 return rt;
1624}; 2205};
1625 2206
@@ -1766,6 +2347,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1766 * do proper release of the net_device 2347 * do proper release of the net_device
1767 */ 2348 */
1768 rt6_uncached_list_add(rt); 2349 rt6_uncached_list_add(rt);
2350 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1769 2351
1770 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); 2352 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1771 2353
@@ -1801,6 +2383,7 @@ out:
1801static int ip6_convert_metrics(struct mx6_config *mxc, 2383static int ip6_convert_metrics(struct mx6_config *mxc,
1802 const struct fib6_config *cfg) 2384 const struct fib6_config *cfg)
1803{ 2385{
2386 struct net *net = cfg->fc_nlinfo.nl_net;
1804 bool ecn_ca = false; 2387 bool ecn_ca = false;
1805 struct nlattr *nla; 2388 struct nlattr *nla;
1806 int remaining; 2389 int remaining;
@@ -1826,7 +2409,7 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
1826 char tmp[TCP_CA_NAME_MAX]; 2409 char tmp[TCP_CA_NAME_MAX];
1827 2410
1828 nla_strlcpy(tmp, nla, sizeof(tmp)); 2411 nla_strlcpy(tmp, nla, sizeof(tmp));
1829 val = tcp_ca_get_key_by_name(tmp, &ecn_ca); 2412 val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
1830 if (val == TCP_CA_UNSPEC) 2413 if (val == TCP_CA_UNSPEC)
1831 goto err; 2414 goto err;
1832 } else { 2415 } else {
@@ -1901,6 +2484,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
1901 goto out; 2484 goto out;
1902 } 2485 }
1903 2486
2487 /* RTF_CACHE is an internal flag; can not be set by userspace */
2488 if (cfg->fc_flags & RTF_CACHE) {
2489 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2490 goto out;
2491 }
2492
1904 if (cfg->fc_dst_len > 128) { 2493 if (cfg->fc_dst_len > 128) {
1905 NL_SET_ERR_MSG(extack, "Invalid prefix length"); 2494 NL_SET_ERR_MSG(extack, "Invalid prefix length");
1906 goto out; 2495 goto out;
@@ -2216,9 +2805,9 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2216 } 2805 }
2217 2806
2218 table = rt->rt6i_table; 2807 table = rt->rt6i_table;
2219 write_lock_bh(&table->tb6_lock); 2808 spin_lock_bh(&table->tb6_lock);
2220 err = fib6_del(rt, info); 2809 err = fib6_del(rt, info);
2221 write_unlock_bh(&table->tb6_lock); 2810 spin_unlock_bh(&table->tb6_lock);
2222 2811
2223out: 2812out:
2224 ip6_rt_put(rt); 2813 ip6_rt_put(rt);
@@ -2244,7 +2833,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2244 if (rt == net->ipv6.ip6_null_entry) 2833 if (rt == net->ipv6.ip6_null_entry)
2245 goto out_put; 2834 goto out_put;
2246 table = rt->rt6i_table; 2835 table = rt->rt6i_table;
2247 write_lock_bh(&table->tb6_lock); 2836 spin_lock_bh(&table->tb6_lock);
2248 2837
2249 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) { 2838 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2250 struct rt6_info *sibling, *next_sibling; 2839 struct rt6_info *sibling, *next_sibling;
@@ -2274,7 +2863,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2274 2863
2275 err = fib6_del(rt, info); 2864 err = fib6_del(rt, info);
2276out_unlock: 2865out_unlock:
2277 write_unlock_bh(&table->tb6_lock); 2866 spin_unlock_bh(&table->tb6_lock);
2278out_put: 2867out_put:
2279 ip6_rt_put(rt); 2868 ip6_rt_put(rt);
2280 2869
@@ -2288,9 +2877,9 @@ out_put:
2288static int ip6_route_del(struct fib6_config *cfg, 2877static int ip6_route_del(struct fib6_config *cfg,
2289 struct netlink_ext_ack *extack) 2878 struct netlink_ext_ack *extack)
2290{ 2879{
2880 struct rt6_info *rt, *rt_cache;
2291 struct fib6_table *table; 2881 struct fib6_table *table;
2292 struct fib6_node *fn; 2882 struct fib6_node *fn;
2293 struct rt6_info *rt;
2294 int err = -ESRCH; 2883 int err = -ESRCH;
2295 2884
2296 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 2885 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
@@ -2299,17 +2888,22 @@ static int ip6_route_del(struct fib6_config *cfg,
2299 return err; 2888 return err;
2300 } 2889 }
2301 2890
2302 read_lock_bh(&table->tb6_lock); 2891 rcu_read_lock();
2303 2892
2304 fn = fib6_locate(&table->tb6_root, 2893 fn = fib6_locate(&table->tb6_root,
2305 &cfg->fc_dst, cfg->fc_dst_len, 2894 &cfg->fc_dst, cfg->fc_dst_len,
2306 &cfg->fc_src, cfg->fc_src_len); 2895 &cfg->fc_src, cfg->fc_src_len,
2896 !(cfg->fc_flags & RTF_CACHE));
2307 2897
2308 if (fn) { 2898 if (fn) {
2309 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 2899 for_each_fib6_node_rt_rcu(fn) {
2310 if ((rt->rt6i_flags & RTF_CACHE) && 2900 if (cfg->fc_flags & RTF_CACHE) {
2311 !(cfg->fc_flags & RTF_CACHE)) 2901 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
2312 continue; 2902 &cfg->fc_src);
2903 if (!rt_cache)
2904 continue;
2905 rt = rt_cache;
2906 }
2313 if (cfg->fc_ifindex && 2907 if (cfg->fc_ifindex &&
2314 (!rt->dst.dev || 2908 (!rt->dst.dev ||
2315 rt->dst.dev->ifindex != cfg->fc_ifindex)) 2909 rt->dst.dev->ifindex != cfg->fc_ifindex))
@@ -2321,8 +2915,9 @@ static int ip6_route_del(struct fib6_config *cfg,
2321 continue; 2915 continue;
2322 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol) 2916 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2323 continue; 2917 continue;
2324 dst_hold(&rt->dst); 2918 if (!dst_hold_safe(&rt->dst))
2325 read_unlock_bh(&table->tb6_lock); 2919 break;
2920 rcu_read_unlock();
2326 2921
2327 /* if gateway was specified only delete the one hop */ 2922 /* if gateway was specified only delete the one hop */
2328 if (cfg->fc_flags & RTF_GATEWAY) 2923 if (cfg->fc_flags & RTF_GATEWAY)
@@ -2331,7 +2926,7 @@ static int ip6_route_del(struct fib6_config *cfg,
2331 return __ip6_del_rt_siblings(rt, cfg); 2926 return __ip6_del_rt_siblings(rt, cfg);
2332 } 2927 }
2333 } 2928 }
2334 read_unlock_bh(&table->tb6_lock); 2929 rcu_read_unlock();
2335 2930
2336 return err; 2931 return err;
2337} 2932}
@@ -2435,8 +3030,14 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
2435 nrt->rt6i_protocol = RTPROT_REDIRECT; 3030 nrt->rt6i_protocol = RTPROT_REDIRECT;
2436 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; 3031 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2437 3032
2438 if (ip6_ins_rt(nrt)) 3033 /* No need to remove rt from the exception table if rt is
2439 goto out_release; 3034 * a cached route because rt6_insert_exception() will
3035 * takes care of it
3036 */
3037 if (rt6_insert_exception(nrt, rt)) {
3038 dst_release_immediate(&nrt->dst);
3039 goto out;
3040 }
2440 3041
2441 netevent.old = &rt->dst; 3042 netevent.old = &rt->dst;
2442 netevent.new = &nrt->dst; 3043 netevent.new = &nrt->dst;
@@ -2444,17 +3045,6 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
2444 netevent.neigh = neigh; 3045 netevent.neigh = neigh;
2445 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 3046 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2446 3047
2447 if (rt->rt6i_flags & RTF_CACHE) {
2448 rt = (struct rt6_info *) dst_clone(&rt->dst);
2449 ip6_del_rt(rt);
2450 }
2451
2452out_release:
2453 /* Release the reference taken in
2454 * ip6_rt_cache_alloc()
2455 */
2456 dst_release(&nrt->dst);
2457
2458out: 3048out:
2459 neigh_release(neigh); 3049 neigh_release(neigh);
2460} 3050}
@@ -2511,23 +3101,23 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
2511 if (!table) 3101 if (!table)
2512 return NULL; 3102 return NULL;
2513 3103
2514 read_lock_bh(&table->tb6_lock); 3104 rcu_read_lock();
2515 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0); 3105 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
2516 if (!fn) 3106 if (!fn)
2517 goto out; 3107 goto out;
2518 3108
2519 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 3109 for_each_fib6_node_rt_rcu(fn) {
2520 if (rt->dst.dev->ifindex != ifindex) 3110 if (rt->dst.dev->ifindex != ifindex)
2521 continue; 3111 continue;
2522 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 3112 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2523 continue; 3113 continue;
2524 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 3114 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2525 continue; 3115 continue;
2526 dst_hold(&rt->dst); 3116 ip6_hold_safe(NULL, &rt, false);
2527 break; 3117 break;
2528 } 3118 }
2529out: 3119out:
2530 read_unlock_bh(&table->tb6_lock); 3120 rcu_read_unlock();
2531 return rt; 3121 return rt;
2532} 3122}
2533 3123
@@ -2573,16 +3163,16 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
2573 if (!table) 3163 if (!table)
2574 return NULL; 3164 return NULL;
2575 3165
2576 read_lock_bh(&table->tb6_lock); 3166 rcu_read_lock();
2577 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 3167 for_each_fib6_node_rt_rcu(&table->tb6_root) {
2578 if (dev == rt->dst.dev && 3168 if (dev == rt->dst.dev &&
2579 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 3169 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2580 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 3170 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2581 break; 3171 break;
2582 } 3172 }
2583 if (rt) 3173 if (rt)
2584 dst_hold(&rt->dst); 3174 ip6_hold_safe(NULL, &rt, false);
2585 read_unlock_bh(&table->tb6_lock); 3175 rcu_read_unlock();
2586 return rt; 3176 return rt;
2587} 3177}
2588 3178
@@ -2620,17 +3210,20 @@ static void __rt6_purge_dflt_routers(struct fib6_table *table)
2620 struct rt6_info *rt; 3210 struct rt6_info *rt;
2621 3211
2622restart: 3212restart:
2623 read_lock_bh(&table->tb6_lock); 3213 rcu_read_lock();
2624 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 3214 for_each_fib6_node_rt_rcu(&table->tb6_root) {
2625 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && 3215 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2626 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { 3216 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2627 dst_hold(&rt->dst); 3217 if (dst_hold_safe(&rt->dst)) {
2628 read_unlock_bh(&table->tb6_lock); 3218 rcu_read_unlock();
2629 ip6_del_rt(rt); 3219 ip6_del_rt(rt);
3220 } else {
3221 rcu_read_unlock();
3222 }
2630 goto restart; 3223 goto restart;
2631 } 3224 }
2632 } 3225 }
2633 read_unlock_bh(&table->tb6_lock); 3226 rcu_read_unlock();
2634 3227
2635 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER; 3228 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
2636} 3229}
@@ -2818,8 +3411,12 @@ static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2818 if (((void *)rt->dst.dev == dev || !dev) && 3411 if (((void *)rt->dst.dev == dev || !dev) &&
2819 rt != net->ipv6.ip6_null_entry && 3412 rt != net->ipv6.ip6_null_entry &&
2820 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { 3413 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
3414 spin_lock_bh(&rt6_exception_lock);
2821 /* remove prefsrc entry */ 3415 /* remove prefsrc entry */
2822 rt->rt6i_prefsrc.plen = 0; 3416 rt->rt6i_prefsrc.plen = 0;
3417 /* need to update cache as well */
3418 rt6_exceptions_remove_prefsrc(rt);
3419 spin_unlock_bh(&rt6_exception_lock);
2823 } 3420 }
2824 return 0; 3421 return 0;
2825} 3422}
@@ -2836,18 +3433,23 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2836} 3433}
2837 3434
2838#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) 3435#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2839#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2840 3436
2841/* Remove routers and update dst entries when gateway turn into host. */ 3437/* Remove routers and update dst entries when gateway turn into host. */
2842static int fib6_clean_tohost(struct rt6_info *rt, void *arg) 3438static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2843{ 3439{
2844 struct in6_addr *gateway = (struct in6_addr *)arg; 3440 struct in6_addr *gateway = (struct in6_addr *)arg;
2845 3441
2846 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) || 3442 if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
2847 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) && 3443 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2848 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2849 return -1; 3444 return -1;
2850 } 3445 }
3446
3447 /* Further clean up cached routes in exception table.
3448 * This is needed because cached route may have a different
3449 * gateway than its 'parent' in the case of an ip redirect.
3450 */
3451 rt6_exceptions_clean_tohost(rt, gateway);
3452
2851 return 0; 3453 return 0;
2852} 3454}
2853 3455
@@ -2926,19 +3528,14 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2926 if (rt->dst.dev == arg->dev && 3528 if (rt->dst.dev == arg->dev &&
2927 dst_metric_raw(&rt->dst, RTAX_MTU) && 3529 dst_metric_raw(&rt->dst, RTAX_MTU) &&
2928 !dst_metric_locked(&rt->dst, RTAX_MTU)) { 3530 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2929 if (rt->rt6i_flags & RTF_CACHE) { 3531 spin_lock_bh(&rt6_exception_lock);
2930 /* For RTF_CACHE with rt6i_pmtu == 0 3532 if (dst_mtu(&rt->dst) >= arg->mtu ||
2931 * (i.e. a redirected route), 3533 (dst_mtu(&rt->dst) < arg->mtu &&
2932 * the metrics of its rt->dst.from has already 3534 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2933 * been updated.
2934 */
2935 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2936 rt->rt6i_pmtu = arg->mtu;
2937 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2938 (dst_mtu(&rt->dst) < arg->mtu &&
2939 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2940 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); 3535 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2941 } 3536 }
3537 rt6_exceptions_update_pmtu(rt, arg->mtu);
3538 spin_unlock_bh(&rt6_exception_lock);
2942 } 3539 }
2943 return 0; 3540 return 0;
2944} 3541}
@@ -3839,7 +4436,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3839 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 4436 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3840 net->ipv6.rt6_stats->fib_nodes, 4437 net->ipv6.rt6_stats->fib_nodes,
3841 net->ipv6.rt6_stats->fib_route_nodes, 4438 net->ipv6.rt6_stats->fib_route_nodes,
3842 net->ipv6.rt6_stats->fib_rt_alloc, 4439 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
3843 net->ipv6.rt6_stats->fib_rt_entries, 4440 net->ipv6.rt6_stats->fib_rt_entries,
3844 net->ipv6.rt6_stats->fib_rt_cache, 4441 net->ipv6.rt6_stats->fib_rt_cache,
3845 dst_entries_get_slow(&net->ipv6.ip6_dst_ops), 4442 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index ac912bb21747..d60ddcb0bfe2 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -91,29 +91,35 @@ struct sit_net {
91 * Must be invoked with rcu_read_lock 91 * Must be invoked with rcu_read_lock
92 */ 92 */
93static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net, 93static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
94 struct net_device *dev, __be32 remote, __be32 local) 94 struct net_device *dev,
95 __be32 remote, __be32 local,
96 int sifindex)
95{ 97{
96 unsigned int h0 = HASH(remote); 98 unsigned int h0 = HASH(remote);
97 unsigned int h1 = HASH(local); 99 unsigned int h1 = HASH(local);
98 struct ip_tunnel *t; 100 struct ip_tunnel *t;
99 struct sit_net *sitn = net_generic(net, sit_net_id); 101 struct sit_net *sitn = net_generic(net, sit_net_id);
102 int ifindex = dev ? dev->ifindex : 0;
100 103
101 for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) { 104 for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) {
102 if (local == t->parms.iph.saddr && 105 if (local == t->parms.iph.saddr &&
103 remote == t->parms.iph.daddr && 106 remote == t->parms.iph.daddr &&
104 (!dev || !t->parms.link || dev->ifindex == t->parms.link) && 107 (!dev || !t->parms.link || ifindex == t->parms.link ||
108 sifindex == t->parms.link) &&
105 (t->dev->flags & IFF_UP)) 109 (t->dev->flags & IFF_UP))
106 return t; 110 return t;
107 } 111 }
108 for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) { 112 for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) {
109 if (remote == t->parms.iph.daddr && 113 if (remote == t->parms.iph.daddr &&
110 (!dev || !t->parms.link || dev->ifindex == t->parms.link) && 114 (!dev || !t->parms.link || ifindex == t->parms.link ||
115 sifindex == t->parms.link) &&
111 (t->dev->flags & IFF_UP)) 116 (t->dev->flags & IFF_UP))
112 return t; 117 return t;
113 } 118 }
114 for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) { 119 for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) {
115 if (local == t->parms.iph.saddr && 120 if (local == t->parms.iph.saddr &&
116 (!dev || !t->parms.link || dev->ifindex == t->parms.link) && 121 (!dev || !t->parms.link || ifindex == t->parms.link ||
122 sifindex == t->parms.link) &&
117 (t->dev->flags & IFF_UP)) 123 (t->dev->flags & IFF_UP))
118 return t; 124 return t;
119 } 125 }
@@ -486,6 +492,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
486 const int code = icmp_hdr(skb)->code; 492 const int code = icmp_hdr(skb)->code;
487 unsigned int data_len = 0; 493 unsigned int data_len = 0;
488 struct ip_tunnel *t; 494 struct ip_tunnel *t;
495 int sifindex;
489 int err; 496 int err;
490 497
491 switch (type) { 498 switch (type) {
@@ -517,10 +524,9 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
517 524
518 err = -ENOENT; 525 err = -ENOENT;
519 526
520 t = ipip6_tunnel_lookup(dev_net(skb->dev), 527 sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
521 skb->dev, 528 t = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
522 iph->daddr, 529 iph->daddr, iph->saddr, sifindex);
523 iph->saddr);
524 if (!t) 530 if (!t)
525 goto out; 531 goto out;
526 532
@@ -633,10 +639,12 @@ static int ipip6_rcv(struct sk_buff *skb)
633{ 639{
634 const struct iphdr *iph = ip_hdr(skb); 640 const struct iphdr *iph = ip_hdr(skb);
635 struct ip_tunnel *tunnel; 641 struct ip_tunnel *tunnel;
642 int sifindex;
636 int err; 643 int err;
637 644
645 sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
638 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, 646 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
639 iph->saddr, iph->daddr); 647 iph->saddr, iph->daddr, sifindex);
640 if (tunnel) { 648 if (tunnel) {
641 struct pcpu_sw_netstats *tstats; 649 struct pcpu_sw_netstats *tstats;
642 650
@@ -704,10 +712,13 @@ static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
704{ 712{
705 const struct iphdr *iph; 713 const struct iphdr *iph;
706 struct ip_tunnel *tunnel; 714 struct ip_tunnel *tunnel;
715 int sifindex;
716
717 sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
707 718
708 iph = ip_hdr(skb); 719 iph = ip_hdr(skb);
709 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, 720 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
710 iph->saddr, iph->daddr); 721 iph->saddr, iph->daddr, sifindex);
711 if (tunnel) { 722 if (tunnel) {
712 const struct tnl_ptk_info *tpi; 723 const struct tnl_ptk_info *tpi;
713 724
@@ -1848,19 +1859,22 @@ err_alloc_dev:
1848 return err; 1859 return err;
1849} 1860}
1850 1861
1851static void __net_exit sit_exit_net(struct net *net) 1862static void __net_exit sit_exit_batch_net(struct list_head *net_list)
1852{ 1863{
1853 LIST_HEAD(list); 1864 LIST_HEAD(list);
1865 struct net *net;
1854 1866
1855 rtnl_lock(); 1867 rtnl_lock();
1856 sit_destroy_tunnels(net, &list); 1868 list_for_each_entry(net, net_list, exit_list)
1869 sit_destroy_tunnels(net, &list);
1870
1857 unregister_netdevice_many(&list); 1871 unregister_netdevice_many(&list);
1858 rtnl_unlock(); 1872 rtnl_unlock();
1859} 1873}
1860 1874
1861static struct pernet_operations sit_net_ops = { 1875static struct pernet_operations sit_net_ops = {
1862 .init = sit_init_net, 1876 .init = sit_init_net,
1863 .exit = sit_exit_net, 1877 .exit_batch = sit_exit_batch_net,
1864 .id = &sit_net_id, 1878 .id = &sit_net_id,
1865 .size = sizeof(struct sit_net), 1879 .size = sizeof(struct sit_net),
1866}; 1880};
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 4e7817abc0b9..e7a3a6b6cf56 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -244,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
244 } 244 }
245 245
246 req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); 246 req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
247 tcp_select_initial_window(tcp_full_space(sk), req->mss, 247 tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
248 &req->rsk_rcv_wnd, &req->rsk_window_clamp, 248 &req->rsk_rcv_wnd, &req->rsk_window_clamp,
249 ireq->wscale_ok, &rcv_wscale, 249 ireq->wscale_ok, &rcv_wscale,
250 dst_metric(dst, RTAX_INITRWND)); 250 dst_metric(dst, RTAX_INITRWND));
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 6fbf8ae5e52c..a789a8ac6a64 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * sysctl_net_ipv6.c: sysctl interface to net IPV6 subsystem. 3 * sysctl_net_ipv6.c: sysctl interface to net IPV6 subsystem.
3 * 4 *
@@ -97,6 +98,34 @@ static struct ctl_table ipv6_table_template[] = {
97 .mode = 0644, 98 .mode = 0644,
98 .proc_handler = proc_dointvec, 99 .proc_handler = proc_dointvec,
99 }, 100 },
101 {
102 .procname = "max_dst_opts_number",
103 .data = &init_net.ipv6.sysctl.max_dst_opts_cnt,
104 .maxlen = sizeof(int),
105 .mode = 0644,
106 .proc_handler = proc_dointvec
107 },
108 {
109 .procname = "max_hbh_opts_number",
110 .data = &init_net.ipv6.sysctl.max_hbh_opts_cnt,
111 .maxlen = sizeof(int),
112 .mode = 0644,
113 .proc_handler = proc_dointvec
114 },
115 {
116 .procname = "max_dst_opts_length",
117 .data = &init_net.ipv6.sysctl.max_dst_opts_len,
118 .maxlen = sizeof(int),
119 .mode = 0644,
120 .proc_handler = proc_dointvec
121 },
122 {
123 .procname = "max_hbh_length",
124 .data = &init_net.ipv6.sysctl.max_hbh_opts_len,
125 .maxlen = sizeof(int),
126 .mode = 0644,
127 .proc_handler = proc_dointvec
128 },
100 { } 129 { }
101}; 130};
102 131
@@ -157,6 +186,10 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
157 ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges; 186 ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges;
158 ipv6_table[8].data = &net->ipv6.sysctl.ip_nonlocal_bind; 187 ipv6_table[8].data = &net->ipv6.sysctl.ip_nonlocal_bind;
159 ipv6_table[9].data = &net->ipv6.sysctl.flowlabel_reflect; 188 ipv6_table[9].data = &net->ipv6.sysctl.flowlabel_reflect;
189 ipv6_table[10].data = &net->ipv6.sysctl.max_dst_opts_cnt;
190 ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
191 ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
192 ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
160 193
161 ipv6_route_table = ipv6_route_sysctl_init(net); 194 ipv6_route_table = ipv6_route_sysctl_init(net);
162 if (!ipv6_route_table) 195 if (!ipv6_route_table)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 64d94afa427f..6bb98c93edfe 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -69,6 +69,8 @@
69#include <crypto/hash.h> 69#include <crypto/hash.h>
70#include <linux/scatterlist.h> 70#include <linux/scatterlist.h>
71 71
72#include <trace/events/tcp.h>
73
72static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); 74static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
73static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 75static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
74 struct request_sock *req); 76 struct request_sock *req);
@@ -890,7 +892,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
890 int genhash; 892 int genhash;
891 struct sock *sk1 = NULL; 893 struct sock *sk1 = NULL;
892#endif 894#endif
893 int oif; 895 int oif = 0;
894 896
895 if (th->rst) 897 if (th->rst)
896 return; 898 return;
@@ -939,7 +941,11 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
939 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 941 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
940 (th->doff << 2); 942 (th->doff << 2);
941 943
942 oif = sk ? sk->sk_bound_dev_if : 0; 944 if (sk) {
945 oif = sk->sk_bound_dev_if;
946 trace_tcp_send_reset(sk, skb);
947 }
948
943 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); 949 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
944 950
945#ifdef CONFIG_TCP_MD5SIG 951#ifdef CONFIG_TCP_MD5SIG
@@ -1577,8 +1583,9 @@ do_time_wait:
1577 refcounted = false; 1583 refcounted = false;
1578 goto process; 1584 goto process;
1579 } 1585 }
1580 /* Fall through to ACK */
1581 } 1586 }
1587 /* to ACK */
1588 /* fall through */
1582 case TCP_TW_ACK: 1589 case TCP_TW_ACK:
1583 tcp_v6_timewait_ack(sk, skb); 1590 tcp_v6_timewait_ack(sk, skb);
1584 break; 1591 break;
@@ -1933,8 +1940,8 @@ struct proto tcpv6_prot = {
1933 .memory_pressure = &tcp_memory_pressure, 1940 .memory_pressure = &tcp_memory_pressure,
1934 .orphan_count = &tcp_orphan_count, 1941 .orphan_count = &tcp_orphan_count,
1935 .sysctl_mem = sysctl_tcp_mem, 1942 .sysctl_mem = sysctl_tcp_mem,
1936 .sysctl_wmem = sysctl_tcp_wmem, 1943 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
1937 .sysctl_rmem = sysctl_tcp_rmem, 1944 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
1938 .max_header = MAX_TCP_HEADER, 1945 .max_header = MAX_TCP_HEADER,
1939 .obj_size = sizeof(struct tcp6_sock), 1946 .obj_size = sizeof(struct tcp6_sock),
1940 .slab_flags = SLAB_TYPESAFE_BY_RCU, 1947 .slab_flags = SLAB_TYPESAFE_BY_RCU,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e2ecfb137297..3f30fa313bf2 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -606,7 +606,7 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
606 */ 606 */
607 607
608 /* if we're overly short, let UDP handle it */ 608 /* if we're overly short, let UDP handle it */
609 encap_rcv = ACCESS_ONCE(up->encap_rcv); 609 encap_rcv = READ_ONCE(up->encap_rcv);
610 if (encap_rcv) { 610 if (encap_rcv) {
611 int ret; 611 int ret;
612 612
@@ -1015,6 +1015,7 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
1015 */ 1015 */
1016 offset = skb_transport_offset(skb); 1016 offset = skb_transport_offset(skb);
1017 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 1017 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
1018 csum = skb->csum;
1018 1019
1019 skb->ip_summed = CHECKSUM_NONE; 1020 skb->ip_summed = CHECKSUM_NONE;
1020 1021
@@ -1431,7 +1432,7 @@ void udpv6_destroy_sock(struct sock *sk)
1431 1432
1432 if (static_key_false(&udpv6_encap_needed) && up->encap_type) { 1433 if (static_key_false(&udpv6_encap_needed) && up->encap_type) {
1433 void (*encap_destroy)(struct sock *sk); 1434 void (*encap_destroy)(struct sock *sk);
1434 encap_destroy = ACCESS_ONCE(up->encap_destroy); 1435 encap_destroy = READ_ONCE(up->encap_destroy);
1435 if (encap_destroy) 1436 if (encap_destroy)
1436 encap_destroy(sk); 1437 encap_destroy(sk);
1437 } 1438 }
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index f180b3d85e31..7903e21c178b 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef _UDP6_IMPL_H 2#ifndef _UDP6_IMPL_H
2#define _UDP6_IMPL_H 3#define _UDP6_IMPL_H
3#include <net/udp.h> 4#include <net/udp.h>
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 455fd4e39333..a0f89ad76f9d 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -17,15 +17,94 @@
17#include <net/ip6_checksum.h> 17#include <net/ip6_checksum.h>
18#include "ip6_offload.h" 18#include "ip6_offload.h"
19 19
20static struct sk_buff *udp6_tunnel_segment(struct sk_buff *skb, 20static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
21 netdev_features_t features) 21 netdev_features_t features)
22{ 22{
23 struct sk_buff *segs = ERR_PTR(-EINVAL); 23 struct sk_buff *segs = ERR_PTR(-EINVAL);
24 unsigned int mss;
25 unsigned int unfrag_ip6hlen, unfrag_len;
26 struct frag_hdr *fptr;
27 u8 *packet_start, *prevhdr;
28 u8 nexthdr;
29 u8 frag_hdr_sz = sizeof(struct frag_hdr);
30 __wsum csum;
31 int tnl_hlen;
32 int err;
33
34 mss = skb_shinfo(skb)->gso_size;
35 if (unlikely(skb->len <= mss))
36 goto out;
24 37
25 if (skb->encapsulation && skb_shinfo(skb)->gso_type & 38 if (skb->encapsulation && skb_shinfo(skb)->gso_type &
26 (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)) 39 (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
27 segs = skb_udp_tunnel_segment(skb, features, true); 40 segs = skb_udp_tunnel_segment(skb, features, true);
41 else {
42 const struct ipv6hdr *ipv6h;
43 struct udphdr *uh;
44
45 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
46 goto out;
47
48 /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
49 * do checksum of UDP packets sent as multiple IP fragments.
50 */
51
52 uh = udp_hdr(skb);
53 ipv6h = ipv6_hdr(skb);
54
55 uh->check = 0;
56 csum = skb_checksum(skb, 0, skb->len, 0);
57 uh->check = udp_v6_check(skb->len, &ipv6h->saddr,
58 &ipv6h->daddr, csum);
59 if (uh->check == 0)
60 uh->check = CSUM_MANGLED_0;
61
62 skb->ip_summed = CHECKSUM_UNNECESSARY;
63
64 /* If there is no outer header we can fake a checksum offload
65 * due to the fact that we have already done the checksum in
66 * software prior to segmenting the frame.
67 */
68 if (!skb->encap_hdr_csum)
69 features |= NETIF_F_HW_CSUM;
70
71 /* Check if there is enough headroom to insert fragment header. */
72 tnl_hlen = skb_tnl_header_len(skb);
73 if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) {
74 if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
75 goto out;
76 }
77
78 /* Find the unfragmentable header and shift it left by frag_hdr_sz
79 * bytes to insert fragment header.
80 */
81 err = ip6_find_1stfragopt(skb, &prevhdr);
82 if (err < 0)
83 return ERR_PTR(err);
84 unfrag_ip6hlen = err;
85 nexthdr = *prevhdr;
86 *prevhdr = NEXTHDR_FRAGMENT;
87 unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
88 unfrag_ip6hlen + tnl_hlen;
89 packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
90 memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
91
92 SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
93 skb->mac_header -= frag_hdr_sz;
94 skb->network_header -= frag_hdr_sz;
95
96 fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
97 fptr->nexthdr = nexthdr;
98 fptr->reserved = 0;
99 fptr->identification = ipv6_proxy_select_ident(dev_net(skb->dev), skb);
100
101 /* Fragment the skb. ipv6 header and the remaining fields of the
102 * fragment header are updated in ipv6_gso_segment()
103 */
104 segs = skb_segment(skb, features);
105 }
28 106
107out:
29 return segs; 108 return segs;
30} 109}
31 110
@@ -75,7 +154,7 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
75 154
76static const struct net_offload udpv6_offload = { 155static const struct net_offload udpv6_offload = {
77 .callbacks = { 156 .callbacks = {
78 .gso_segment = udp6_tunnel_segment, 157 .gso_segment = udp6_ufo_fragment,
79 .gro_receive = udp6_gro_receive, 158 .gro_receive = udp6_gro_receive,
80 .gro_complete = udp6_gro_complete, 159 .gro_complete = udp6_gro_complete,
81 }, 160 },
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index f95943a13abc..fe04e23af986 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * xfrm6_input.c: based on net/ipv4/xfrm4_input.c 3 * xfrm6_input.c: based on net/ipv4/xfrm4_input.c
3 * 4 *
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 11d1314ab6c5..885ade234a49 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * xfrm6_policy.c: based on xfrm4_policy.c 3 * xfrm6_policy.c: based on xfrm4_policy.c
3 * 4 *
@@ -152,6 +153,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
152 switch (nexthdr) { 153 switch (nexthdr) {
153 case NEXTHDR_FRAGMENT: 154 case NEXTHDR_FRAGMENT:
154 onlyproto = 1; 155 onlyproto = 1;
156 /* fall through */
155 case NEXTHDR_ROUTING: 157 case NEXTHDR_ROUTING:
156 case NEXTHDR_HOP: 158 case NEXTHDR_HOP:
157 case NEXTHDR_DEST: 159 case NEXTHDR_DEST:
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 8a1f9c0d2a13..b15075a5c227 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * xfrm6_state.c: based on xfrm4_state.c 3 * xfrm6_state.c: based on xfrm4_state.c
3 * 4 *
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 4e438bc7ee87..f85f0d7480ac 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -338,6 +338,14 @@ static int __net_init xfrm6_tunnel_net_init(struct net *net)
338 338
339static void __net_exit xfrm6_tunnel_net_exit(struct net *net) 339static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
340{ 340{
341 struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
342 unsigned int i;
343
344 for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
345 WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));
346
347 for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++)
348 WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byspi[i]));
341} 349}
342 350
343static struct pernet_operations xfrm6_tunnel_net_ops = { 351static struct pernet_operations xfrm6_tunnel_net_ops = {
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index ac598ec90589..d21a9d128d3e 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1867,6 +1867,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1867 rc = -EPERM; 1867 rc = -EPERM;
1868 if (!capable(CAP_NET_ADMIN)) 1868 if (!capable(CAP_NET_ADMIN))
1869 break; 1869 break;
1870 /* fall through */
1870 case SIOCGIFADDR: 1871 case SIOCGIFADDR:
1871 rc = ipxitf_ioctl(cmd, argp); 1872 rc = ipxitf_ioctl(cmd, argp);
1872 break; 1873 break;
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
index 7d75e4c5c75d..38a3d51d9ead 100644
--- a/net/ipx/ipx_proc.c
+++ b/net/ipx/ipx_proc.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * IPX proc routines 3 * IPX proc routines
3 * 4 *
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index b5d91447f3dc..3cf93aa9f284 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Implements the IPX routing routines. 3 * Implements the IPX routing routines.
3 * Code moved from af_ipx.c. 4 * Code moved from af_ipx.c.
diff --git a/net/ipx/pe2.c b/net/ipx/pe2.c
index 32dcd601ab32..ba7d4214bbff 100644
--- a/net/ipx/pe2.c
+++ b/net/ipx/pe2.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/in.h> 2#include <linux/in.h>
2#include <linux/mm.h> 3#include <linux/mm.h>
3#include <linux/module.h> 4#include <linux/module.h>
diff --git a/net/ipx/sysctl_net_ipx.c b/net/ipx/sysctl_net_ipx.c
index 0dafcc561ed6..c3eef457db88 100644
--- a/net/ipx/sysctl_net_ipx.c
+++ b/net/ipx/sysctl_net_ipx.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* -*- linux-c -*- 2/* -*- linux-c -*-
2 * sysctl_net_ipx.c: sysctl interface to net IPX subsystem. 3 * sysctl_net_ipx.c: sysctl interface to net IPX subsystem.
3 * 4 *
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index c748e8a6a72c..bd5723315069 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/in.h> 2#include <linux/in.h>
2#include <linux/inet.h> 3#include <linux/inet.h>
3#include <linux/list.h> 4#include <linux/list.h>
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index af4e76ac88ff..0b750a22c4b9 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1650,7 +1650,7 @@ static int kcm_clone(struct socket *osock, struct kcm_clone *info,
1650 } 1650 }
1651 1651
1652 newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); 1652 newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
1653 if (unlikely(IS_ERR(newfile))) { 1653 if (IS_ERR(newfile)) {
1654 err = PTR_ERR(newfile); 1654 err = PTR_ERR(newfile);
1655 goto out_sock_alloc_fail; 1655 goto out_sock_alloc_fail;
1656 } 1656 }
diff --git a/net/key/af_key.c b/net/key/af_key.c
index a00d607e7224..3dffb892d52c 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3845,7 +3845,7 @@ static void __net_exit pfkey_net_exit(struct net *net)
3845 struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); 3845 struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
3846 3846
3847 pfkey_exit_proc(net); 3847 pfkey_exit_proc(net);
3848 BUG_ON(!hlist_empty(&net_pfkey->table)); 3848 WARN_ON(!hlist_empty(&net_pfkey->table));
3849} 3849}
3850 3850
3851static struct pernet_operations pfkey_net_ops = { 3851static struct pernet_operations pfkey_net_ops = {
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index 2870f41ea44d..399a7e5db2f4 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the L2TP. 3# Makefile for the L2TP.
3# 4#
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index ee485df73ccd..115918ad8eca 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -100,8 +100,6 @@ struct l2tp_skb_cb {
100 100
101#define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)]) 101#define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)])
102 102
103static atomic_t l2tp_tunnel_count;
104static atomic_t l2tp_session_count;
105static struct workqueue_struct *l2tp_wq; 103static struct workqueue_struct *l2tp_wq;
106 104
107/* per-net private data for this module */ 105/* per-net private data for this module */
@@ -216,12 +214,10 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
216} 214}
217EXPORT_SYMBOL_GPL(l2tp_tunnel_get); 215EXPORT_SYMBOL_GPL(l2tp_tunnel_get);
218 216
219/* Lookup a session. A new reference is held on the returned session. 217/* Lookup a session. A new reference is held on the returned session. */
220 * Optionally calls session->ref() too if do_ref is true.
221 */
222struct l2tp_session *l2tp_session_get(const struct net *net, 218struct l2tp_session *l2tp_session_get(const struct net *net,
223 struct l2tp_tunnel *tunnel, 219 struct l2tp_tunnel *tunnel,
224 u32 session_id, bool do_ref) 220 u32 session_id)
225{ 221{
226 struct hlist_head *session_list; 222 struct hlist_head *session_list;
227 struct l2tp_session *session; 223 struct l2tp_session *session;
@@ -235,8 +231,6 @@ struct l2tp_session *l2tp_session_get(const struct net *net,
235 hlist_for_each_entry_rcu(session, session_list, global_hlist) { 231 hlist_for_each_entry_rcu(session, session_list, global_hlist) {
236 if (session->session_id == session_id) { 232 if (session->session_id == session_id) {
237 l2tp_session_inc_refcount(session); 233 l2tp_session_inc_refcount(session);
238 if (do_ref && session->ref)
239 session->ref(session);
240 rcu_read_unlock_bh(); 234 rcu_read_unlock_bh();
241 235
242 return session; 236 return session;
@@ -252,8 +246,6 @@ struct l2tp_session *l2tp_session_get(const struct net *net,
252 hlist_for_each_entry(session, session_list, hlist) { 246 hlist_for_each_entry(session, session_list, hlist) {
253 if (session->session_id == session_id) { 247 if (session->session_id == session_id) {
254 l2tp_session_inc_refcount(session); 248 l2tp_session_inc_refcount(session);
255 if (do_ref && session->ref)
256 session->ref(session);
257 read_unlock_bh(&tunnel->hlist_lock); 249 read_unlock_bh(&tunnel->hlist_lock);
258 250
259 return session; 251 return session;
@@ -265,8 +257,7 @@ struct l2tp_session *l2tp_session_get(const struct net *net,
265} 257}
266EXPORT_SYMBOL_GPL(l2tp_session_get); 258EXPORT_SYMBOL_GPL(l2tp_session_get);
267 259
268struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, 260struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth)
269 bool do_ref)
270{ 261{
271 int hash; 262 int hash;
272 struct l2tp_session *session; 263 struct l2tp_session *session;
@@ -277,8 +268,6 @@ struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
277 hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) { 268 hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) {
278 if (++count > nth) { 269 if (++count > nth) {
279 l2tp_session_inc_refcount(session); 270 l2tp_session_inc_refcount(session);
280 if (do_ref && session->ref)
281 session->ref(session);
282 read_unlock_bh(&tunnel->hlist_lock); 271 read_unlock_bh(&tunnel->hlist_lock);
283 return session; 272 return session;
284 } 273 }
@@ -295,8 +284,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_get_nth);
295 * This is very inefficient but is only used by management interfaces. 284 * This is very inefficient but is only used by management interfaces.
296 */ 285 */
297struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, 286struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
298 const char *ifname, 287 const char *ifname)
299 bool do_ref)
300{ 288{
301 struct l2tp_net *pn = l2tp_pernet(net); 289 struct l2tp_net *pn = l2tp_pernet(net);
302 int hash; 290 int hash;
@@ -307,8 +295,6 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
307 hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) { 295 hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) {
308 if (!strcmp(session->ifname, ifname)) { 296 if (!strcmp(session->ifname, ifname)) {
309 l2tp_session_inc_refcount(session); 297 l2tp_session_inc_refcount(session);
310 if (do_ref && session->ref)
311 session->ref(session);
312 rcu_read_unlock_bh(); 298 rcu_read_unlock_bh();
313 299
314 return session; 300 return session;
@@ -322,8 +308,8 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
322} 308}
323EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname); 309EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname);
324 310
325static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel, 311int l2tp_session_register(struct l2tp_session *session,
326 struct l2tp_session *session) 312 struct l2tp_tunnel *tunnel)
327{ 313{
328 struct l2tp_session *session_walk; 314 struct l2tp_session *session_walk;
329 struct hlist_head *g_head; 315 struct hlist_head *g_head;
@@ -380,6 +366,7 @@ err_tlock:
380 366
381 return err; 367 return err;
382} 368}
369EXPORT_SYMBOL_GPL(l2tp_session_register);
383 370
384/* Lookup a tunnel by id 371/* Lookup a tunnel by id
385 */ 372 */
@@ -484,9 +471,6 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *
484 (*session->recv_skb)(session, skb, L2TP_SKB_CB(skb)->length); 471 (*session->recv_skb)(session, skb, L2TP_SKB_CB(skb)->length);
485 else 472 else
486 kfree_skb(skb); 473 kfree_skb(skb);
487
488 if (session->deref)
489 (*session->deref)(session);
490} 474}
491 475
492/* Dequeue skbs from the session's reorder_q, subject to packet order. 476/* Dequeue skbs from the session's reorder_q, subject to packet order.
@@ -515,8 +499,6 @@ start:
515 session->reorder_skip = 1; 499 session->reorder_skip = 1;
516 __skb_unlink(skb, &session->reorder_q); 500 __skb_unlink(skb, &session->reorder_q);
517 kfree_skb(skb); 501 kfree_skb(skb);
518 if (session->deref)
519 (*session->deref)(session);
520 continue; 502 continue;
521 } 503 }
522 504
@@ -689,9 +671,6 @@ discard:
689 * a data (not control) frame before coming here. Fields up to the 671 * a data (not control) frame before coming here. Fields up to the
690 * session-id have already been parsed and ptr points to the data 672 * session-id have already been parsed and ptr points to the data
691 * after the session-id. 673 * after the session-id.
692 *
693 * session->ref() must have been called prior to l2tp_recv_common().
694 * session->deref() will be called automatically after skb is processed.
695 */ 674 */
696void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, 675void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
697 unsigned char *ptr, unsigned char *optr, u16 hdrflags, 676 unsigned char *ptr, unsigned char *optr, u16 hdrflags,
@@ -858,9 +837,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
858discard: 837discard:
859 atomic_long_inc(&session->stats.rx_errors); 838 atomic_long_inc(&session->stats.rx_errors);
860 kfree_skb(skb); 839 kfree_skb(skb);
861
862 if (session->deref)
863 (*session->deref)(session);
864} 840}
865EXPORT_SYMBOL(l2tp_recv_common); 841EXPORT_SYMBOL(l2tp_recv_common);
866 842
@@ -874,8 +850,6 @@ int l2tp_session_queue_purge(struct l2tp_session *session)
874 while ((skb = skb_dequeue(&session->reorder_q))) { 850 while ((skb = skb_dequeue(&session->reorder_q))) {
875 atomic_long_inc(&session->stats.rx_errors); 851 atomic_long_inc(&session->stats.rx_errors);
876 kfree_skb(skb); 852 kfree_skb(skb);
877 if (session->deref)
878 (*session->deref)(session);
879 } 853 }
880 return 0; 854 return 0;
881} 855}
@@ -967,13 +941,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
967 } 941 }
968 942
969 /* Find the session context */ 943 /* Find the session context */
970 session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id, true); 944 session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id);
971 if (!session || !session->recv_skb) { 945 if (!session || !session->recv_skb) {
972 if (session) { 946 if (session)
973 if (session->deref)
974 session->deref(session);
975 l2tp_session_dec_refcount(session); 947 l2tp_session_dec_refcount(session);
976 }
977 948
978 /* Not found? Pass to userspace to deal with */ 949 /* Not found? Pass to userspace to deal with */
979 l2tp_info(tunnel, L2TP_MSG_DATA, 950 l2tp_info(tunnel, L2TP_MSG_DATA,
@@ -1274,9 +1245,6 @@ static void l2tp_tunnel_destruct(struct sock *sk)
1274 spin_lock_bh(&pn->l2tp_tunnel_list_lock); 1245 spin_lock_bh(&pn->l2tp_tunnel_list_lock);
1275 list_del_rcu(&tunnel->list); 1246 list_del_rcu(&tunnel->list);
1276 spin_unlock_bh(&pn->l2tp_tunnel_list_lock); 1247 spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
1277 atomic_dec(&l2tp_tunnel_count);
1278
1279 l2tp_tunnel_closeall(tunnel);
1280 1248
1281 tunnel->sock = NULL; 1249 tunnel->sock = NULL;
1282 l2tp_tunnel_dec_refcount(tunnel); 1250 l2tp_tunnel_dec_refcount(tunnel);
@@ -1314,8 +1282,8 @@ again:
1314 1282
1315 hlist_del_init(&session->hlist); 1283 hlist_del_init(&session->hlist);
1316 1284
1317 if (session->ref != NULL) 1285 if (test_and_set_bit(0, &session->dead))
1318 (*session->ref)(session); 1286 goto again;
1319 1287
1320 write_unlock_bh(&tunnel->hlist_lock); 1288 write_unlock_bh(&tunnel->hlist_lock);
1321 1289
@@ -1325,9 +1293,6 @@ again:
1325 if (session->session_close != NULL) 1293 if (session->session_close != NULL)
1326 (*session->session_close)(session); 1294 (*session->session_close)(session);
1327 1295
1328 if (session->deref != NULL)
1329 (*session->deref)(session);
1330
1331 l2tp_session_dec_refcount(session); 1296 l2tp_session_dec_refcount(session);
1332 1297
1333 write_lock_bh(&tunnel->hlist_lock); 1298 write_lock_bh(&tunnel->hlist_lock);
@@ -1658,7 +1623,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
1658 1623
1659 /* Add tunnel to our list */ 1624 /* Add tunnel to our list */
1660 INIT_LIST_HEAD(&tunnel->list); 1625 INIT_LIST_HEAD(&tunnel->list);
1661 atomic_inc(&l2tp_tunnel_count);
1662 1626
1663 /* Bump the reference count. The tunnel context is deleted 1627 /* Bump the reference count. The tunnel context is deleted
1664 * only when this drops to zero. Must be done before list insertion 1628 * only when this drops to zero. Must be done before list insertion
@@ -1685,14 +1649,12 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
1685 1649
1686/* This function is used by the netlink TUNNEL_DELETE command. 1650/* This function is used by the netlink TUNNEL_DELETE command.
1687 */ 1651 */
1688int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) 1652void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
1689{ 1653{
1690 l2tp_tunnel_inc_refcount(tunnel); 1654 if (!test_and_set_bit(0, &tunnel->dead)) {
1691 if (false == queue_work(l2tp_wq, &tunnel->del_work)) { 1655 l2tp_tunnel_inc_refcount(tunnel);
1692 l2tp_tunnel_dec_refcount(tunnel); 1656 queue_work(l2tp_wq, &tunnel->del_work);
1693 return 1;
1694 } 1657 }
1695 return 0;
1696} 1658}
1697EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); 1659EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
1698 1660
@@ -1706,8 +1668,6 @@ void l2tp_session_free(struct l2tp_session *session)
1706 1668
1707 if (tunnel) { 1669 if (tunnel) {
1708 BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC); 1670 BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
1709 if (session->session_id != 0)
1710 atomic_dec(&l2tp_session_count);
1711 sock_put(tunnel->sock); 1671 sock_put(tunnel->sock);
1712 session->tunnel = NULL; 1672 session->tunnel = NULL;
1713 l2tp_tunnel_dec_refcount(tunnel); 1673 l2tp_tunnel_dec_refcount(tunnel);
@@ -1750,15 +1710,16 @@ EXPORT_SYMBOL_GPL(__l2tp_session_unhash);
1750 */ 1710 */
1751int l2tp_session_delete(struct l2tp_session *session) 1711int l2tp_session_delete(struct l2tp_session *session)
1752{ 1712{
1753 if (session->ref) 1713 if (test_and_set_bit(0, &session->dead))
1754 (*session->ref)(session); 1714 return 0;
1715
1755 __l2tp_session_unhash(session); 1716 __l2tp_session_unhash(session);
1756 l2tp_session_queue_purge(session); 1717 l2tp_session_queue_purge(session);
1757 if (session->session_close != NULL) 1718 if (session->session_close != NULL)
1758 (*session->session_close)(session); 1719 (*session->session_close)(session);
1759 if (session->deref) 1720
1760 (*session->deref)(session);
1761 l2tp_session_dec_refcount(session); 1721 l2tp_session_dec_refcount(session);
1722
1762 return 0; 1723 return 0;
1763} 1724}
1764EXPORT_SYMBOL_GPL(l2tp_session_delete); 1725EXPORT_SYMBOL_GPL(l2tp_session_delete);
@@ -1784,7 +1745,6 @@ EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
1784struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) 1745struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
1785{ 1746{
1786 struct l2tp_session *session; 1747 struct l2tp_session *session;
1787 int err;
1788 1748
1789 session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL); 1749 session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL);
1790 if (session != NULL) { 1750 if (session != NULL) {
@@ -1842,17 +1802,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
1842 1802
1843 refcount_set(&session->ref_count, 1); 1803 refcount_set(&session->ref_count, 1);
1844 1804
1845 err = l2tp_session_add_to_tunnel(tunnel, session);
1846 if (err) {
1847 kfree(session);
1848
1849 return ERR_PTR(err);
1850 }
1851
1852 /* Ignore management session in session count value */
1853 if (session->session_id != 0)
1854 atomic_inc(&l2tp_session_count);
1855
1856 return session; 1805 return session;
1857 } 1806 }
1858 1807
@@ -1884,15 +1833,19 @@ static __net_exit void l2tp_exit_net(struct net *net)
1884{ 1833{
1885 struct l2tp_net *pn = l2tp_pernet(net); 1834 struct l2tp_net *pn = l2tp_pernet(net);
1886 struct l2tp_tunnel *tunnel = NULL; 1835 struct l2tp_tunnel *tunnel = NULL;
1836 int hash;
1887 1837
1888 rcu_read_lock_bh(); 1838 rcu_read_lock_bh();
1889 list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { 1839 list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
1890 (void)l2tp_tunnel_delete(tunnel); 1840 l2tp_tunnel_delete(tunnel);
1891 } 1841 }
1892 rcu_read_unlock_bh(); 1842 rcu_read_unlock_bh();
1893 1843
1894 flush_workqueue(l2tp_wq); 1844 flush_workqueue(l2tp_wq);
1895 rcu_barrier(); 1845 rcu_barrier();
1846
1847 for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
1848 WARN_ON_ONCE(!hlist_empty(&pn->l2tp_session_hlist[hash]));
1896} 1849}
1897 1850
1898static struct pernet_operations l2tp_net_ops = { 1851static struct pernet_operations l2tp_net_ops = {
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index a305e0c5925a..9534e16965cc 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -76,6 +76,7 @@ struct l2tp_session_cfg {
76struct l2tp_session { 76struct l2tp_session {
77 int magic; /* should be 77 int magic; /* should be
78 * L2TP_SESSION_MAGIC */ 78 * L2TP_SESSION_MAGIC */
79 long dead;
79 80
80 struct l2tp_tunnel *tunnel; /* back pointer to tunnel 81 struct l2tp_tunnel *tunnel; /* back pointer to tunnel
81 * context */ 82 * context */
@@ -128,8 +129,6 @@ struct l2tp_session {
128 int (*build_header)(struct l2tp_session *session, void *buf); 129 int (*build_header)(struct l2tp_session *session, void *buf);
129 void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len); 130 void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len);
130 void (*session_close)(struct l2tp_session *session); 131 void (*session_close)(struct l2tp_session *session);
131 void (*ref)(struct l2tp_session *session);
132 void (*deref)(struct l2tp_session *session);
133#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) 132#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
134 void (*show)(struct seq_file *m, void *priv); 133 void (*show)(struct seq_file *m, void *priv);
135#endif 134#endif
@@ -160,6 +159,9 @@ struct l2tp_tunnel_cfg {
160 159
161struct l2tp_tunnel { 160struct l2tp_tunnel {
162 int magic; /* Should be L2TP_TUNNEL_MAGIC */ 161 int magic; /* Should be L2TP_TUNNEL_MAGIC */
162
163 unsigned long dead;
164
163 struct rcu_head rcu; 165 struct rcu_head rcu;
164 rwlock_t hlist_lock; /* protect session_hlist */ 166 rwlock_t hlist_lock; /* protect session_hlist */
165 bool acpt_newsess; /* Indicates whether this 167 bool acpt_newsess; /* Indicates whether this
@@ -241,12 +243,10 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id);
241 243
242struct l2tp_session *l2tp_session_get(const struct net *net, 244struct l2tp_session *l2tp_session_get(const struct net *net,
243 struct l2tp_tunnel *tunnel, 245 struct l2tp_tunnel *tunnel,
244 u32 session_id, bool do_ref); 246 u32 session_id);
245struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, 247struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth);
246 bool do_ref);
247struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, 248struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
248 const char *ifname, 249 const char *ifname);
249 bool do_ref);
250struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id); 250struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id);
251struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth); 251struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth);
252 252
@@ -254,11 +254,14 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id,
254 u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, 254 u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg,
255 struct l2tp_tunnel **tunnelp); 255 struct l2tp_tunnel **tunnelp);
256void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); 256void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
257int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); 257void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
258struct l2tp_session *l2tp_session_create(int priv_size, 258struct l2tp_session *l2tp_session_create(int priv_size,
259 struct l2tp_tunnel *tunnel, 259 struct l2tp_tunnel *tunnel,
260 u32 session_id, u32 peer_session_id, 260 u32 session_id, u32 peer_session_id,
261 struct l2tp_session_cfg *cfg); 261 struct l2tp_session_cfg *cfg);
262int l2tp_session_register(struct l2tp_session *session,
263 struct l2tp_tunnel *tunnel);
264
262void __l2tp_session_unhash(struct l2tp_session *session); 265void __l2tp_session_unhash(struct l2tp_session *session);
263int l2tp_session_delete(struct l2tp_session *session); 266int l2tp_session_delete(struct l2tp_session *session);
264void l2tp_session_free(struct l2tp_session *session); 267void l2tp_session_free(struct l2tp_session *session);
@@ -291,37 +294,17 @@ static inline void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel)
291/* Session reference counts. Incremented when code obtains a reference 294/* Session reference counts. Incremented when code obtains a reference
292 * to a session. 295 * to a session.
293 */ 296 */
294static inline void l2tp_session_inc_refcount_1(struct l2tp_session *session) 297static inline void l2tp_session_inc_refcount(struct l2tp_session *session)
295{ 298{
296 refcount_inc(&session->ref_count); 299 refcount_inc(&session->ref_count);
297} 300}
298 301
299static inline void l2tp_session_dec_refcount_1(struct l2tp_session *session) 302static inline void l2tp_session_dec_refcount(struct l2tp_session *session)
300{ 303{
301 if (refcount_dec_and_test(&session->ref_count)) 304 if (refcount_dec_and_test(&session->ref_count))
302 l2tp_session_free(session); 305 l2tp_session_free(session);
303} 306}
304 307
305#ifdef L2TP_REFCNT_DEBUG
306#define l2tp_session_inc_refcount(_s) \
307do { \
308 pr_debug("l2tp_session_inc_refcount: %s:%d %s: cnt=%d\n", \
309 __func__, __LINE__, (_s)->name, \
310 refcount_read(&_s->ref_count)); \
311 l2tp_session_inc_refcount_1(_s); \
312} while (0)
313#define l2tp_session_dec_refcount(_s) \
314do { \
315 pr_debug("l2tp_session_dec_refcount: %s:%d %s: cnt=%d\n", \
316 __func__, __LINE__, (_s)->name, \
317 refcount_read(&_s->ref_count)); \
318 l2tp_session_dec_refcount_1(_s); \
319} while (0)
320#else
321#define l2tp_session_inc_refcount(s) l2tp_session_inc_refcount_1(s)
322#define l2tp_session_dec_refcount(s) l2tp_session_dec_refcount_1(s)
323#endif
324
325#define l2tp_printk(ptr, type, func, fmt, ...) \ 308#define l2tp_printk(ptr, type, func, fmt, ...) \
326do { \ 309do { \
327 if (((ptr)->debug) & (type)) \ 310 if (((ptr)->debug) & (type)) \
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 53bae54c4d6e..eb69411bcb47 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -53,7 +53,7 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
53 53
54static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd) 54static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
55{ 55{
56 pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); 56 pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
57 pd->session_idx++; 57 pd->session_idx++;
58 58
59 if (pd->session == NULL) { 59 if (pd->session == NULL) {
@@ -241,8 +241,6 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v)
241 l2tp_dfs_seq_tunnel_show(m, pd->tunnel); 241 l2tp_dfs_seq_tunnel_show(m, pd->tunnel);
242 } else { 242 } else {
243 l2tp_dfs_seq_session_show(m, pd->session); 243 l2tp_dfs_seq_session_show(m, pd->session);
244 if (pd->session->deref)
245 pd->session->deref(pd->session);
246 l2tp_session_dec_refcount(pd->session); 244 l2tp_session_dec_refcount(pd->session);
247 } 245 }
248 246
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 87da9ef61860..5c366ecfa1cb 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -41,10 +41,7 @@
41 41
42/* via netdev_priv() */ 42/* via netdev_priv() */
43struct l2tp_eth { 43struct l2tp_eth {
44 struct net_device *dev;
45 struct sock *tunnel_sock;
46 struct l2tp_session *session; 44 struct l2tp_session *session;
47 struct list_head list;
48 atomic_long_t tx_bytes; 45 atomic_long_t tx_bytes;
49 atomic_long_t tx_packets; 46 atomic_long_t tx_packets;
50 atomic_long_t tx_dropped; 47 atomic_long_t tx_dropped;
@@ -55,26 +52,12 @@ struct l2tp_eth {
55 52
56/* via l2tp_session_priv() */ 53/* via l2tp_session_priv() */
57struct l2tp_eth_sess { 54struct l2tp_eth_sess {
58 struct net_device *dev; 55 struct net_device __rcu *dev;
59}; 56};
60 57
61/* per-net private data for this module */
62static unsigned int l2tp_eth_net_id;
63struct l2tp_eth_net {
64 struct list_head l2tp_eth_dev_list;
65 spinlock_t l2tp_eth_lock;
66};
67
68static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net)
69{
70 return net_generic(net, l2tp_eth_net_id);
71}
72 58
73static int l2tp_eth_dev_init(struct net_device *dev) 59static int l2tp_eth_dev_init(struct net_device *dev)
74{ 60{
75 struct l2tp_eth *priv = netdev_priv(dev);
76
77 priv->dev = dev;
78 eth_hw_addr_random(dev); 61 eth_hw_addr_random(dev);
79 eth_broadcast_addr(dev->broadcast); 62 eth_broadcast_addr(dev->broadcast);
80 netdev_lockdep_set_classes(dev); 63 netdev_lockdep_set_classes(dev);
@@ -85,12 +68,13 @@ static int l2tp_eth_dev_init(struct net_device *dev)
85static void l2tp_eth_dev_uninit(struct net_device *dev) 68static void l2tp_eth_dev_uninit(struct net_device *dev)
86{ 69{
87 struct l2tp_eth *priv = netdev_priv(dev); 70 struct l2tp_eth *priv = netdev_priv(dev);
88 struct l2tp_eth_net *pn = l2tp_eth_pernet(dev_net(dev)); 71 struct l2tp_eth_sess *spriv;
89 72
90 spin_lock(&pn->l2tp_eth_lock); 73 spriv = l2tp_session_priv(priv->session);
91 list_del_init(&priv->list); 74 RCU_INIT_POINTER(spriv->dev, NULL);
92 spin_unlock(&pn->l2tp_eth_lock); 75 /* No need for synchronize_net() here. We're called by
93 dev_put(dev); 76 * unregister_netdev*(), which does the synchronisation for us.
77 */
94} 78}
95 79
96static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev) 80static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -148,8 +132,8 @@ static void l2tp_eth_dev_setup(struct net_device *dev)
148static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len) 132static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
149{ 133{
150 struct l2tp_eth_sess *spriv = l2tp_session_priv(session); 134 struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
151 struct net_device *dev = spriv->dev; 135 struct net_device *dev;
152 struct l2tp_eth *priv = netdev_priv(dev); 136 struct l2tp_eth *priv;
153 137
154 if (session->debug & L2TP_MSG_DATA) { 138 if (session->debug & L2TP_MSG_DATA) {
155 unsigned int length; 139 unsigned int length;
@@ -173,16 +157,25 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
173 skb_dst_drop(skb); 157 skb_dst_drop(skb);
174 nf_reset(skb); 158 nf_reset(skb);
175 159
160 rcu_read_lock();
161 dev = rcu_dereference(spriv->dev);
162 if (!dev)
163 goto error_rcu;
164
165 priv = netdev_priv(dev);
176 if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) { 166 if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) {
177 atomic_long_inc(&priv->rx_packets); 167 atomic_long_inc(&priv->rx_packets);
178 atomic_long_add(data_len, &priv->rx_bytes); 168 atomic_long_add(data_len, &priv->rx_bytes);
179 } else { 169 } else {
180 atomic_long_inc(&priv->rx_errors); 170 atomic_long_inc(&priv->rx_errors);
181 } 171 }
172 rcu_read_unlock();
173
182 return; 174 return;
183 175
176error_rcu:
177 rcu_read_unlock();
184error: 178error:
185 atomic_long_inc(&priv->rx_errors);
186 kfree_skb(skb); 179 kfree_skb(skb);
187} 180}
188 181
@@ -193,11 +186,15 @@ static void l2tp_eth_delete(struct l2tp_session *session)
193 186
194 if (session) { 187 if (session) {
195 spriv = l2tp_session_priv(session); 188 spriv = l2tp_session_priv(session);
196 dev = spriv->dev; 189
190 rtnl_lock();
191 dev = rtnl_dereference(spriv->dev);
197 if (dev) { 192 if (dev) {
198 unregister_netdev(dev); 193 unregister_netdevice(dev);
199 spriv->dev = NULL; 194 rtnl_unlock();
200 module_put(THIS_MODULE); 195 module_put(THIS_MODULE);
196 } else {
197 rtnl_unlock();
201 } 198 }
202 } 199 }
203} 200}
@@ -207,9 +204,20 @@ static void l2tp_eth_show(struct seq_file *m, void *arg)
207{ 204{
208 struct l2tp_session *session = arg; 205 struct l2tp_session *session = arg;
209 struct l2tp_eth_sess *spriv = l2tp_session_priv(session); 206 struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
210 struct net_device *dev = spriv->dev; 207 struct net_device *dev;
208
209 rcu_read_lock();
210 dev = rcu_dereference(spriv->dev);
211 if (!dev) {
212 rcu_read_unlock();
213 return;
214 }
215 dev_hold(dev);
216 rcu_read_unlock();
211 217
212 seq_printf(m, " interface %s\n", dev->name); 218 seq_printf(m, " interface %s\n", dev->name);
219
220 dev_put(dev);
213} 221}
214#endif 222#endif
215 223
@@ -273,7 +281,6 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
273 struct l2tp_eth *priv; 281 struct l2tp_eth *priv;
274 struct l2tp_eth_sess *spriv; 282 struct l2tp_eth_sess *spriv;
275 int rc; 283 int rc;
276 struct l2tp_eth_net *pn;
277 284
278 if (cfg->ifname) { 285 if (cfg->ifname) {
279 strlcpy(name, cfg->ifname, IFNAMSIZ); 286 strlcpy(name, cfg->ifname, IFNAMSIZ);
@@ -287,14 +294,14 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
287 peer_session_id, cfg); 294 peer_session_id, cfg);
288 if (IS_ERR(session)) { 295 if (IS_ERR(session)) {
289 rc = PTR_ERR(session); 296 rc = PTR_ERR(session);
290 goto out; 297 goto err;
291 } 298 }
292 299
293 dev = alloc_netdev(sizeof(*priv), name, name_assign_type, 300 dev = alloc_netdev(sizeof(*priv), name, name_assign_type,
294 l2tp_eth_dev_setup); 301 l2tp_eth_dev_setup);
295 if (!dev) { 302 if (!dev) {
296 rc = -ENOMEM; 303 rc = -ENOMEM;
297 goto out_del_session; 304 goto err_sess;
298 } 305 }
299 306
300 dev_net_set(dev, net); 307 dev_net_set(dev, net);
@@ -303,11 +310,8 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
303 l2tp_eth_adjust_mtu(tunnel, session, dev); 310 l2tp_eth_adjust_mtu(tunnel, session, dev);
304 311
305 priv = netdev_priv(dev); 312 priv = netdev_priv(dev);
306 priv->dev = dev;
307 priv->session = session; 313 priv->session = session;
308 INIT_LIST_HEAD(&priv->list);
309 314
310 priv->tunnel_sock = tunnel->sock;
311 session->recv_skb = l2tp_eth_dev_recv; 315 session->recv_skb = l2tp_eth_dev_recv;
312 session->session_close = l2tp_eth_delete; 316 session->session_close = l2tp_eth_delete;
313#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) 317#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
@@ -315,48 +319,50 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
315#endif 319#endif
316 320
317 spriv = l2tp_session_priv(session); 321 spriv = l2tp_session_priv(session);
318 spriv->dev = dev;
319 322
320 rc = register_netdev(dev); 323 l2tp_session_inc_refcount(session);
321 if (rc < 0)
322 goto out_del_dev;
323 324
324 __module_get(THIS_MODULE); 325 rtnl_lock();
325 /* Must be done after register_netdev() */
326 strlcpy(session->ifname, dev->name, IFNAMSIZ);
327 326
328 dev_hold(dev); 327 /* Register both device and session while holding the rtnl lock. This
329 pn = l2tp_eth_pernet(dev_net(dev)); 328 * ensures that l2tp_eth_delete() will see that there's a device to
330 spin_lock(&pn->l2tp_eth_lock); 329 * unregister, even if it happened to run before we assign spriv->dev.
331 list_add(&priv->list, &pn->l2tp_eth_dev_list); 330 */
332 spin_unlock(&pn->l2tp_eth_lock); 331 rc = l2tp_session_register(session, tunnel);
332 if (rc < 0) {
333 rtnl_unlock();
334 goto err_sess_dev;
335 }
333 336
334 return 0; 337 rc = register_netdevice(dev);
338 if (rc < 0) {
339 rtnl_unlock();
340 l2tp_session_delete(session);
341 l2tp_session_dec_refcount(session);
342 free_netdev(dev);
335 343
336out_del_dev: 344 return rc;
337 free_netdev(dev); 345 }
338 spriv->dev = NULL;
339out_del_session:
340 l2tp_session_delete(session);
341out:
342 return rc;
343}
344 346
345static __net_init int l2tp_eth_init_net(struct net *net) 347 strlcpy(session->ifname, dev->name, IFNAMSIZ);
346{ 348 rcu_assign_pointer(spriv->dev, dev);
347 struct l2tp_eth_net *pn = net_generic(net, l2tp_eth_net_id);
348 349
349 INIT_LIST_HEAD(&pn->l2tp_eth_dev_list); 350 rtnl_unlock();
350 spin_lock_init(&pn->l2tp_eth_lock); 351
352 l2tp_session_dec_refcount(session);
353
354 __module_get(THIS_MODULE);
351 355
352 return 0; 356 return 0;
353}
354 357
355static struct pernet_operations l2tp_eth_net_ops = { 358err_sess_dev:
356 .init = l2tp_eth_init_net, 359 l2tp_session_dec_refcount(session);
357 .id = &l2tp_eth_net_id, 360 free_netdev(dev);
358 .size = sizeof(struct l2tp_eth_net), 361err_sess:
359}; 362 kfree(session);
363err:
364 return rc;
365}
360 366
361 367
362static const struct l2tp_nl_cmd_ops l2tp_eth_nl_cmd_ops = { 368static const struct l2tp_nl_cmd_ops l2tp_eth_nl_cmd_ops = {
@@ -371,25 +377,18 @@ static int __init l2tp_eth_init(void)
371 377
372 err = l2tp_nl_register_ops(L2TP_PWTYPE_ETH, &l2tp_eth_nl_cmd_ops); 378 err = l2tp_nl_register_ops(L2TP_PWTYPE_ETH, &l2tp_eth_nl_cmd_ops);
373 if (err) 379 if (err)
374 goto out; 380 goto err;
375
376 err = register_pernet_device(&l2tp_eth_net_ops);
377 if (err)
378 goto out_unreg;
379 381
380 pr_info("L2TP ethernet pseudowire support (L2TPv3)\n"); 382 pr_info("L2TP ethernet pseudowire support (L2TPv3)\n");
381 383
382 return 0; 384 return 0;
383 385
384out_unreg: 386err:
385 l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
386out:
387 return err; 387 return err;
388} 388}
389 389
390static void __exit l2tp_eth_exit(void) 390static void __exit l2tp_eth_exit(void)
391{ 391{
392 unregister_pernet_device(&l2tp_eth_net_ops);
393 l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH); 392 l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
394} 393}
395 394
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 4d322c1b7233..ff61124fdf59 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -123,6 +123,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
123 unsigned char *ptr, *optr; 123 unsigned char *ptr, *optr;
124 struct l2tp_session *session; 124 struct l2tp_session *session;
125 struct l2tp_tunnel *tunnel = NULL; 125 struct l2tp_tunnel *tunnel = NULL;
126 struct iphdr *iph;
126 int length; 127 int length;
127 128
128 if (!pskb_may_pull(skb, 4)) 129 if (!pskb_may_pull(skb, 4))
@@ -143,7 +144,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
143 } 144 }
144 145
145 /* Ok, this is a data packet. Lookup the session. */ 146 /* Ok, this is a data packet. Lookup the session. */
146 session = l2tp_session_get(net, NULL, session_id, true); 147 session = l2tp_session_get(net, NULL, session_id);
147 if (!session) 148 if (!session)
148 goto discard; 149 goto discard;
149 150
@@ -178,24 +179,17 @@ pass_up:
178 goto discard; 179 goto discard;
179 180
180 tunnel_id = ntohl(*(__be32 *) &skb->data[4]); 181 tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
181 tunnel = l2tp_tunnel_find(net, tunnel_id); 182 iph = (struct iphdr *)skb_network_header(skb);
182 if (tunnel) {
183 sk = tunnel->sock;
184 sock_hold(sk);
185 } else {
186 struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
187
188 read_lock_bh(&l2tp_ip_lock);
189 sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr,
190 inet_iif(skb), tunnel_id);
191 if (!sk) {
192 read_unlock_bh(&l2tp_ip_lock);
193 goto discard;
194 }
195 183
196 sock_hold(sk); 184 read_lock_bh(&l2tp_ip_lock);
185 sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, inet_iif(skb),
186 tunnel_id);
187 if (!sk) {
197 read_unlock_bh(&l2tp_ip_lock); 188 read_unlock_bh(&l2tp_ip_lock);
189 goto discard;
198 } 190 }
191 sock_hold(sk);
192 read_unlock_bh(&l2tp_ip_lock);
199 193
200 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 194 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
201 goto discard_put; 195 goto discard_put;
@@ -205,8 +199,6 @@ pass_up:
205 return sk_receive_skb(sk, skb, 1); 199 return sk_receive_skb(sk, skb, 1);
206 200
207discard_sess: 201discard_sess:
208 if (session->deref)
209 session->deref(session);
210 l2tp_session_dec_refcount(session); 202 l2tp_session_dec_refcount(session);
211 goto discard; 203 goto discard;
212 204
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 88b397c30d86..192344688c06 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -136,6 +136,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
136 unsigned char *ptr, *optr; 136 unsigned char *ptr, *optr;
137 struct l2tp_session *session; 137 struct l2tp_session *session;
138 struct l2tp_tunnel *tunnel = NULL; 138 struct l2tp_tunnel *tunnel = NULL;
139 struct ipv6hdr *iph;
139 int length; 140 int length;
140 141
141 if (!pskb_may_pull(skb, 4)) 142 if (!pskb_may_pull(skb, 4))
@@ -156,7 +157,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
156 } 157 }
157 158
158 /* Ok, this is a data packet. Lookup the session. */ 159 /* Ok, this is a data packet. Lookup the session. */
159 session = l2tp_session_get(net, NULL, session_id, true); 160 session = l2tp_session_get(net, NULL, session_id);
160 if (!session) 161 if (!session)
161 goto discard; 162 goto discard;
162 163
@@ -192,24 +193,17 @@ pass_up:
192 goto discard; 193 goto discard;
193 194
194 tunnel_id = ntohl(*(__be32 *) &skb->data[4]); 195 tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
195 tunnel = l2tp_tunnel_find(net, tunnel_id); 196 iph = ipv6_hdr(skb);
196 if (tunnel) {
197 sk = tunnel->sock;
198 sock_hold(sk);
199 } else {
200 struct ipv6hdr *iph = ipv6_hdr(skb);
201
202 read_lock_bh(&l2tp_ip6_lock);
203 sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr,
204 inet6_iif(skb), tunnel_id);
205 if (!sk) {
206 read_unlock_bh(&l2tp_ip6_lock);
207 goto discard;
208 }
209 197
210 sock_hold(sk); 198 read_lock_bh(&l2tp_ip6_lock);
199 sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr,
200 inet6_iif(skb), tunnel_id);
201 if (!sk) {
211 read_unlock_bh(&l2tp_ip6_lock); 202 read_unlock_bh(&l2tp_ip6_lock);
203 goto discard;
212 } 204 }
205 sock_hold(sk);
206 read_unlock_bh(&l2tp_ip6_lock);
213 207
214 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 208 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
215 goto discard_put; 209 goto discard_put;
@@ -219,8 +213,6 @@ pass_up:
219 return sk_receive_skb(sk, skb, 1); 213 return sk_receive_skb(sk, skb, 1);
220 214
221discard_sess: 215discard_sess:
222 if (session->deref)
223 session->deref(session);
224 l2tp_session_dec_refcount(session); 216 l2tp_session_dec_refcount(session);
225 goto discard; 217 goto discard;
226 218
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 7135f4645d3a..a1f24fb2be98 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -48,8 +48,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq,
48/* Accessed under genl lock */ 48/* Accessed under genl lock */
49static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX]; 49static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX];
50 50
51static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info, 51static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info)
52 bool do_ref)
53{ 52{
54 u32 tunnel_id; 53 u32 tunnel_id;
55 u32 session_id; 54 u32 session_id;
@@ -60,15 +59,14 @@ static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info,
60 59
61 if (info->attrs[L2TP_ATTR_IFNAME]) { 60 if (info->attrs[L2TP_ATTR_IFNAME]) {
62 ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]); 61 ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
63 session = l2tp_session_get_by_ifname(net, ifname, do_ref); 62 session = l2tp_session_get_by_ifname(net, ifname);
64 } else if ((info->attrs[L2TP_ATTR_SESSION_ID]) && 63 } else if ((info->attrs[L2TP_ATTR_SESSION_ID]) &&
65 (info->attrs[L2TP_ATTR_CONN_ID])) { 64 (info->attrs[L2TP_ATTR_CONN_ID])) {
66 tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); 65 tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
67 session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]); 66 session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
68 tunnel = l2tp_tunnel_get(net, tunnel_id); 67 tunnel = l2tp_tunnel_get(net, tunnel_id);
69 if (tunnel) { 68 if (tunnel) {
70 session = l2tp_session_get(net, tunnel, session_id, 69 session = l2tp_session_get(net, tunnel, session_id);
71 do_ref);
72 l2tp_tunnel_dec_refcount(tunnel); 70 l2tp_tunnel_dec_refcount(tunnel);
73 } 71 }
74 } 72 }
@@ -282,7 +280,7 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info
282 l2tp_tunnel_notify(&l2tp_nl_family, info, 280 l2tp_tunnel_notify(&l2tp_nl_family, info,
283 tunnel, L2TP_CMD_TUNNEL_DELETE); 281 tunnel, L2TP_CMD_TUNNEL_DELETE);
284 282
285 (void) l2tp_tunnel_delete(tunnel); 283 l2tp_tunnel_delete(tunnel);
286 284
287 l2tp_tunnel_dec_refcount(tunnel); 285 l2tp_tunnel_dec_refcount(tunnel);
288 286
@@ -406,7 +404,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
406 if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) || 404 if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) ||
407 nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport))) 405 nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)))
408 goto nla_put_failure; 406 goto nla_put_failure;
409 /* NOBREAK */ 407 /* fall through */
410 case L2TP_ENCAPTYPE_IP: 408 case L2TP_ENCAPTYPE_IP:
411#if IS_ENABLED(CONFIG_IPV6) 409#if IS_ENABLED(CONFIG_IPV6)
412 if (np) { 410 if (np) {
@@ -649,7 +647,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
649 &cfg); 647 &cfg);
650 648
651 if (ret >= 0) { 649 if (ret >= 0) {
652 session = l2tp_session_get(net, tunnel, session_id, false); 650 session = l2tp_session_get(net, tunnel, session_id);
653 if (session) { 651 if (session) {
654 ret = l2tp_session_notify(&l2tp_nl_family, info, session, 652 ret = l2tp_session_notify(&l2tp_nl_family, info, session,
655 L2TP_CMD_SESSION_CREATE); 653 L2TP_CMD_SESSION_CREATE);
@@ -669,7 +667,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
669 struct l2tp_session *session; 667 struct l2tp_session *session;
670 u16 pw_type; 668 u16 pw_type;
671 669
672 session = l2tp_nl_session_get(info, true); 670 session = l2tp_nl_session_get(info);
673 if (session == NULL) { 671 if (session == NULL) {
674 ret = -ENODEV; 672 ret = -ENODEV;
675 goto out; 673 goto out;
@@ -683,8 +681,6 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
683 if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete) 681 if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
684 ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session); 682 ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session);
685 683
686 if (session->deref)
687 session->deref(session);
688 l2tp_session_dec_refcount(session); 684 l2tp_session_dec_refcount(session);
689 685
690out: 686out:
@@ -696,7 +692,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
696 int ret = 0; 692 int ret = 0;
697 struct l2tp_session *session; 693 struct l2tp_session *session;
698 694
699 session = l2tp_nl_session_get(info, false); 695 session = l2tp_nl_session_get(info);
700 if (session == NULL) { 696 if (session == NULL) {
701 ret = -ENODEV; 697 ret = -ENODEV;
702 goto out; 698 goto out;
@@ -828,7 +824,7 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
828 struct sk_buff *msg; 824 struct sk_buff *msg;
829 int ret; 825 int ret;
830 826
831 session = l2tp_nl_session_get(info, false); 827 session = l2tp_nl_session_get(info);
832 if (session == NULL) { 828 if (session == NULL) {
833 ret = -ENODEV; 829 ret = -ENODEV;
834 goto err; 830 goto err;
@@ -874,7 +870,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
874 goto out; 870 goto out;
875 } 871 }
876 872
877 session = l2tp_session_get_nth(tunnel, si, false); 873 session = l2tp_session_get_nth(tunnel, si);
878 if (session == NULL) { 874 if (session == NULL) {
879 ti++; 875 ti++;
880 tunnel = NULL; 876 tunnel = NULL;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 50e3ee9a9d61..b412fc3351dc 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -122,10 +122,11 @@
122struct pppol2tp_session { 122struct pppol2tp_session {
123 int owner; /* pid that opened the socket */ 123 int owner; /* pid that opened the socket */
124 124
125 struct sock *sock; /* Pointer to the session 125 struct mutex sk_lock; /* Protects .sk */
126 struct sock __rcu *sk; /* Pointer to the session
126 * PPPoX socket */ 127 * PPPoX socket */
127 struct sock *tunnel_sock; /* Pointer to the tunnel UDP 128 struct sock *__sk; /* Copy of .sk, for cleanup */
128 * socket */ 129 struct rcu_head rcu; /* For asynchronous release */
129 int flags; /* accessed by PPPIOCGFLAGS. 130 int flags; /* accessed by PPPIOCGFLAGS.
130 * Unused. */ 131 * Unused. */
131}; 132};
@@ -138,6 +139,24 @@ static const struct ppp_channel_ops pppol2tp_chan_ops = {
138 139
139static const struct proto_ops pppol2tp_ops; 140static const struct proto_ops pppol2tp_ops;
140 141
142/* Retrieves the pppol2tp socket associated to a session.
143 * A reference is held on the returned socket, so this function must be paired
144 * with sock_put().
145 */
146static struct sock *pppol2tp_session_get_sock(struct l2tp_session *session)
147{
148 struct pppol2tp_session *ps = l2tp_session_priv(session);
149 struct sock *sk;
150
151 rcu_read_lock();
152 sk = rcu_dereference(ps->sk);
153 if (sk)
154 sock_hold(sk);
155 rcu_read_unlock();
156
157 return sk;
158}
159
141/* Helpers to obtain tunnel/session contexts from sockets. 160/* Helpers to obtain tunnel/session contexts from sockets.
142 */ 161 */
143static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk) 162static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
@@ -224,7 +243,8 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
224 /* If the socket is bound, send it in to PPP's input queue. Otherwise 243 /* If the socket is bound, send it in to PPP's input queue. Otherwise
225 * queue it on the session socket. 244 * queue it on the session socket.
226 */ 245 */
227 sk = ps->sock; 246 rcu_read_lock();
247 sk = rcu_dereference(ps->sk);
228 if (sk == NULL) 248 if (sk == NULL)
229 goto no_sock; 249 goto no_sock;
230 250
@@ -247,30 +267,16 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
247 kfree_skb(skb); 267 kfree_skb(skb);
248 } 268 }
249 } 269 }
270 rcu_read_unlock();
250 271
251 return; 272 return;
252 273
253no_sock: 274no_sock:
275 rcu_read_unlock();
254 l2tp_info(session, L2TP_MSG_DATA, "%s: no socket\n", session->name); 276 l2tp_info(session, L2TP_MSG_DATA, "%s: no socket\n", session->name);
255 kfree_skb(skb); 277 kfree_skb(skb);
256} 278}
257 279
258static void pppol2tp_session_sock_hold(struct l2tp_session *session)
259{
260 struct pppol2tp_session *ps = l2tp_session_priv(session);
261
262 if (ps->sock)
263 sock_hold(ps->sock);
264}
265
266static void pppol2tp_session_sock_put(struct l2tp_session *session)
267{
268 struct pppol2tp_session *ps = l2tp_session_priv(session);
269
270 if (ps->sock)
271 sock_put(ps->sock);
272}
273
274/************************************************************************ 280/************************************************************************
275 * Transmit handling 281 * Transmit handling
276 ***********************************************************************/ 282 ***********************************************************************/
@@ -287,7 +293,6 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
287 int error; 293 int error;
288 struct l2tp_session *session; 294 struct l2tp_session *session;
289 struct l2tp_tunnel *tunnel; 295 struct l2tp_tunnel *tunnel;
290 struct pppol2tp_session *ps;
291 int uhlen; 296 int uhlen;
292 297
293 error = -ENOTCONN; 298 error = -ENOTCONN;
@@ -300,10 +305,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
300 if (session == NULL) 305 if (session == NULL)
301 goto error; 306 goto error;
302 307
303 ps = l2tp_session_priv(session); 308 tunnel = session->tunnel;
304 tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
305 if (tunnel == NULL)
306 goto error_put_sess;
307 309
308 uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0; 310 uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
309 311
@@ -314,7 +316,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
314 2 + total_len, /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */ 316 2 + total_len, /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
315 0, GFP_KERNEL); 317 0, GFP_KERNEL);
316 if (!skb) 318 if (!skb)
317 goto error_put_sess_tun; 319 goto error_put_sess;
318 320
319 /* Reserve space for headers. */ 321 /* Reserve space for headers. */
320 skb_reserve(skb, NET_SKB_PAD); 322 skb_reserve(skb, NET_SKB_PAD);
@@ -332,20 +334,17 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
332 error = memcpy_from_msg(skb_put(skb, total_len), m, total_len); 334 error = memcpy_from_msg(skb_put(skb, total_len), m, total_len);
333 if (error < 0) { 335 if (error < 0) {
334 kfree_skb(skb); 336 kfree_skb(skb);
335 goto error_put_sess_tun; 337 goto error_put_sess;
336 } 338 }
337 339
338 local_bh_disable(); 340 local_bh_disable();
339 l2tp_xmit_skb(session, skb, session->hdr_len); 341 l2tp_xmit_skb(session, skb, session->hdr_len);
340 local_bh_enable(); 342 local_bh_enable();
341 343
342 sock_put(ps->tunnel_sock);
343 sock_put(sk); 344 sock_put(sk);
344 345
345 return total_len; 346 return total_len;
346 347
347error_put_sess_tun:
348 sock_put(ps->tunnel_sock);
349error_put_sess: 348error_put_sess:
350 sock_put(sk); 349 sock_put(sk);
351error: 350error:
@@ -369,10 +368,8 @@ error:
369static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) 368static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
370{ 369{
371 struct sock *sk = (struct sock *) chan->private; 370 struct sock *sk = (struct sock *) chan->private;
372 struct sock *sk_tun;
373 struct l2tp_session *session; 371 struct l2tp_session *session;
374 struct l2tp_tunnel *tunnel; 372 struct l2tp_tunnel *tunnel;
375 struct pppol2tp_session *ps;
376 int uhlen, headroom; 373 int uhlen, headroom;
377 374
378 if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) 375 if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
@@ -383,13 +380,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
383 if (session == NULL) 380 if (session == NULL)
384 goto abort; 381 goto abort;
385 382
386 ps = l2tp_session_priv(session); 383 tunnel = session->tunnel;
387 sk_tun = ps->tunnel_sock;
388 if (sk_tun == NULL)
389 goto abort_put_sess;
390 tunnel = l2tp_sock_to_tunnel(sk_tun);
391 if (tunnel == NULL)
392 goto abort_put_sess;
393 384
394 uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0; 385 uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
395 headroom = NET_SKB_PAD + 386 headroom = NET_SKB_PAD +
@@ -398,7 +389,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
398 session->hdr_len + /* L2TP header */ 389 session->hdr_len + /* L2TP header */
399 2; /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */ 390 2; /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
400 if (skb_cow_head(skb, headroom)) 391 if (skb_cow_head(skb, headroom))
401 goto abort_put_sess_tun; 392 goto abort_put_sess;
402 393
403 /* Setup PPP header */ 394 /* Setup PPP header */
404 __skb_push(skb, 2); 395 __skb_push(skb, 2);
@@ -409,12 +400,10 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
409 l2tp_xmit_skb(session, skb, session->hdr_len); 400 l2tp_xmit_skb(session, skb, session->hdr_len);
410 local_bh_enable(); 401 local_bh_enable();
411 402
412 sock_put(sk_tun);
413 sock_put(sk); 403 sock_put(sk);
404
414 return 1; 405 return 1;
415 406
416abort_put_sess_tun:
417 sock_put(sk_tun);
418abort_put_sess: 407abort_put_sess:
419 sock_put(sk); 408 sock_put(sk);
420abort: 409abort:
@@ -431,16 +420,15 @@ abort:
431 */ 420 */
432static void pppol2tp_session_close(struct l2tp_session *session) 421static void pppol2tp_session_close(struct l2tp_session *session)
433{ 422{
434 struct pppol2tp_session *ps = l2tp_session_priv(session); 423 struct sock *sk;
435 struct sock *sk = ps->sock;
436 struct socket *sock = sk->sk_socket;
437 424
438 BUG_ON(session->magic != L2TP_SESSION_MAGIC); 425 BUG_ON(session->magic != L2TP_SESSION_MAGIC);
439 426
440 if (sock) { 427 sk = pppol2tp_session_get_sock(session);
441 inet_shutdown(sock, SEND_SHUTDOWN); 428 if (sk) {
442 /* Don't let the session go away before our socket does */ 429 if (sk->sk_socket)
443 l2tp_session_inc_refcount(session); 430 inet_shutdown(sk->sk_socket, SEND_SHUTDOWN);
431 sock_put(sk);
444 } 432 }
445} 433}
446 434
@@ -461,6 +449,14 @@ static void pppol2tp_session_destruct(struct sock *sk)
461 } 449 }
462} 450}
463 451
452static void pppol2tp_put_sk(struct rcu_head *head)
453{
454 struct pppol2tp_session *ps;
455
456 ps = container_of(head, typeof(*ps), rcu);
457 sock_put(ps->__sk);
458}
459
464/* Called when the PPPoX socket (session) is closed. 460/* Called when the PPPoX socket (session) is closed.
465 */ 461 */
466static int pppol2tp_release(struct socket *sock) 462static int pppol2tp_release(struct socket *sock)
@@ -486,11 +482,23 @@ static int pppol2tp_release(struct socket *sock)
486 482
487 session = pppol2tp_sock_to_session(sk); 483 session = pppol2tp_sock_to_session(sk);
488 484
489 /* Purge any queued data */
490 if (session != NULL) { 485 if (session != NULL) {
491 __l2tp_session_unhash(session); 486 struct pppol2tp_session *ps;
492 l2tp_session_queue_purge(session); 487
493 sock_put(sk); 488 l2tp_session_delete(session);
489
490 ps = l2tp_session_priv(session);
491 mutex_lock(&ps->sk_lock);
492 ps->__sk = rcu_dereference_protected(ps->sk,
493 lockdep_is_held(&ps->sk_lock));
494 RCU_INIT_POINTER(ps->sk, NULL);
495 mutex_unlock(&ps->sk_lock);
496 call_rcu(&ps->rcu, pppol2tp_put_sk);
497
498 /* Rely on the sock_put() call at the end of the function for
499 * dropping the reference held by pppol2tp_sock_to_session().
500 * The last reference will be dropped by pppol2tp_put_sk().
501 */
494 } 502 }
495 release_sock(sk); 503 release_sock(sk);
496 504
@@ -557,16 +565,46 @@ out:
557static void pppol2tp_show(struct seq_file *m, void *arg) 565static void pppol2tp_show(struct seq_file *m, void *arg)
558{ 566{
559 struct l2tp_session *session = arg; 567 struct l2tp_session *session = arg;
560 struct pppol2tp_session *ps = l2tp_session_priv(session); 568 struct sock *sk;
569
570 sk = pppol2tp_session_get_sock(session);
571 if (sk) {
572 struct pppox_sock *po = pppox_sk(sk);
561 573
562 if (ps) { 574 seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan));
563 struct pppox_sock *po = pppox_sk(ps->sock); 575 sock_put(sk);
564 if (po)
565 seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan));
566 } 576 }
567} 577}
568#endif 578#endif
569 579
580static void pppol2tp_session_init(struct l2tp_session *session)
581{
582 struct pppol2tp_session *ps;
583 struct dst_entry *dst;
584
585 session->recv_skb = pppol2tp_recv;
586 session->session_close = pppol2tp_session_close;
587#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
588 session->show = pppol2tp_show;
589#endif
590
591 ps = l2tp_session_priv(session);
592 mutex_init(&ps->sk_lock);
593 ps->owner = current->pid;
594
595 /* If PMTU discovery was enabled, use the MTU that was discovered */
596 dst = sk_dst_get(session->tunnel->sock);
597 if (dst) {
598 u32 pmtu = dst_mtu(dst);
599
600 if (pmtu) {
601 session->mtu = pmtu - PPPOL2TP_HEADER_OVERHEAD;
602 session->mru = pmtu - PPPOL2TP_HEADER_OVERHEAD;
603 }
604 dst_release(dst);
605 }
606}
607
570/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket 608/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
571 */ 609 */
572static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, 610static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
@@ -578,12 +616,12 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
578 struct l2tp_session *session = NULL; 616 struct l2tp_session *session = NULL;
579 struct l2tp_tunnel *tunnel; 617 struct l2tp_tunnel *tunnel;
580 struct pppol2tp_session *ps; 618 struct pppol2tp_session *ps;
581 struct dst_entry *dst;
582 struct l2tp_session_cfg cfg = { 0, }; 619 struct l2tp_session_cfg cfg = { 0, };
583 int error = 0; 620 int error = 0;
584 u32 tunnel_id, peer_tunnel_id; 621 u32 tunnel_id, peer_tunnel_id;
585 u32 session_id, peer_session_id; 622 u32 session_id, peer_session_id;
586 bool drop_refcnt = false; 623 bool drop_refcnt = false;
624 bool drop_tunnel = false;
587 int ver = 2; 625 int ver = 2;
588 int fd; 626 int fd;
589 627
@@ -652,7 +690,9 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
652 if (tunnel_id == 0) 690 if (tunnel_id == 0)
653 goto end; 691 goto end;
654 692
655 tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id); 693 tunnel = l2tp_tunnel_get(sock_net(sk), tunnel_id);
694 if (tunnel)
695 drop_tunnel = true;
656 696
657 /* Special case: create tunnel context if session_id and 697 /* Special case: create tunnel context if session_id and
658 * peer_session_id is 0. Otherwise look up tunnel using supplied 698 * peer_session_id is 0. Otherwise look up tunnel using supplied
@@ -685,7 +725,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
685 if (tunnel->peer_tunnel_id == 0) 725 if (tunnel->peer_tunnel_id == 0)
686 tunnel->peer_tunnel_id = peer_tunnel_id; 726 tunnel->peer_tunnel_id = peer_tunnel_id;
687 727
688 session = l2tp_session_get(sock_net(sk), tunnel, session_id, false); 728 session = l2tp_session_get(sock_net(sk), tunnel, session_id);
689 if (session) { 729 if (session) {
690 drop_refcnt = true; 730 drop_refcnt = true;
691 ps = l2tp_session_priv(session); 731 ps = l2tp_session_priv(session);
@@ -693,13 +733,10 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
693 /* Using a pre-existing session is fine as long as it hasn't 733 /* Using a pre-existing session is fine as long as it hasn't
694 * been connected yet. 734 * been connected yet.
695 */ 735 */
696 if (ps->sock) { 736 mutex_lock(&ps->sk_lock);
697 error = -EEXIST; 737 if (rcu_dereference_protected(ps->sk,
698 goto end; 738 lockdep_is_held(&ps->sk_lock))) {
699 } 739 mutex_unlock(&ps->sk_lock);
700
701 /* consistency checks */
702 if (ps->tunnel_sock != tunnel->sock) {
703 error = -EEXIST; 740 error = -EEXIST;
704 goto end; 741 goto end;
705 } 742 }
@@ -715,35 +752,19 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
715 error = PTR_ERR(session); 752 error = PTR_ERR(session);
716 goto end; 753 goto end;
717 } 754 }
718 }
719 755
720 /* Associate session with its PPPoL2TP socket */ 756 pppol2tp_session_init(session);
721 ps = l2tp_session_priv(session); 757 ps = l2tp_session_priv(session);
722 ps->owner = current->pid; 758 l2tp_session_inc_refcount(session);
723 ps->sock = sk;
724 ps->tunnel_sock = tunnel->sock;
725
726 session->recv_skb = pppol2tp_recv;
727 session->session_close = pppol2tp_session_close;
728#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
729 session->show = pppol2tp_show;
730#endif
731
732 /* We need to know each time a skb is dropped from the reorder
733 * queue.
734 */
735 session->ref = pppol2tp_session_sock_hold;
736 session->deref = pppol2tp_session_sock_put;
737
738 /* If PMTU discovery was enabled, use the MTU that was discovered */
739 dst = sk_dst_get(tunnel->sock);
740 if (dst != NULL) {
741 u32 pmtu = dst_mtu(dst);
742 759
743 if (pmtu != 0) 760 mutex_lock(&ps->sk_lock);
744 session->mtu = session->mru = pmtu - 761 error = l2tp_session_register(session, tunnel);
745 PPPOL2TP_HEADER_OVERHEAD; 762 if (error < 0) {
746 dst_release(dst); 763 mutex_unlock(&ps->sk_lock);
764 kfree(session);
765 goto end;
766 }
767 drop_refcnt = true;
747 } 768 }
748 769
749 /* Special case: if source & dest session_id == 0x0000, this 770 /* Special case: if source & dest session_id == 0x0000, this
@@ -768,12 +789,23 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
768 po->chan.mtu = session->mtu; 789 po->chan.mtu = session->mtu;
769 790
770 error = ppp_register_net_channel(sock_net(sk), &po->chan); 791 error = ppp_register_net_channel(sock_net(sk), &po->chan);
771 if (error) 792 if (error) {
793 mutex_unlock(&ps->sk_lock);
772 goto end; 794 goto end;
795 }
773 796
774out_no_ppp: 797out_no_ppp:
775 /* This is how we get the session context from the socket. */ 798 /* This is how we get the session context from the socket. */
776 sk->sk_user_data = session; 799 sk->sk_user_data = session;
800 rcu_assign_pointer(ps->sk, sk);
801 mutex_unlock(&ps->sk_lock);
802
803 /* Keep the reference we've grabbed on the session: sk doesn't expect
804 * the session to disappear. pppol2tp_session_destruct() is responsible
805 * for dropping it.
806 */
807 drop_refcnt = false;
808
777 sk->sk_state = PPPOX_CONNECTED; 809 sk->sk_state = PPPOX_CONNECTED;
778 l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n", 810 l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n",
779 session->name); 811 session->name);
@@ -781,6 +813,8 @@ out_no_ppp:
781end: 813end:
782 if (drop_refcnt) 814 if (drop_refcnt)
783 l2tp_session_dec_refcount(session); 815 l2tp_session_dec_refcount(session);
816 if (drop_tunnel)
817 l2tp_tunnel_dec_refcount(tunnel);
784 release_sock(sk); 818 release_sock(sk);
785 819
786 return error; 820 return error;
@@ -795,12 +829,11 @@ static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel,
795{ 829{
796 int error; 830 int error;
797 struct l2tp_session *session; 831 struct l2tp_session *session;
798 struct pppol2tp_session *ps;
799 832
800 /* Error if tunnel socket is not prepped */ 833 /* Error if tunnel socket is not prepped */
801 if (!tunnel->sock) { 834 if (!tunnel->sock) {
802 error = -ENOENT; 835 error = -ENOENT;
803 goto out; 836 goto err;
804 } 837 }
805 838
806 /* Default MTU values. */ 839 /* Default MTU values. */
@@ -815,18 +848,20 @@ static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel,
815 peer_session_id, cfg); 848 peer_session_id, cfg);
816 if (IS_ERR(session)) { 849 if (IS_ERR(session)) {
817 error = PTR_ERR(session); 850 error = PTR_ERR(session);
818 goto out; 851 goto err;
819 } 852 }
820 853
821 ps = l2tp_session_priv(session); 854 pppol2tp_session_init(session);
822 ps->tunnel_sock = tunnel->sock;
823 855
824 l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n", 856 error = l2tp_session_register(session, tunnel);
825 session->name); 857 if (error < 0)
858 goto err_sess;
826 859
827 error = 0; 860 return 0;
828 861
829out: 862err_sess:
863 kfree(session);
864err:
830 return error; 865 return error;
831} 866}
832 867
@@ -857,9 +892,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
857 goto end; 892 goto end;
858 893
859 pls = l2tp_session_priv(session); 894 pls = l2tp_session_priv(session);
860 tunnel = l2tp_sock_to_tunnel(pls->tunnel_sock); 895 tunnel = session->tunnel;
861 if (tunnel == NULL)
862 goto end_put_sess;
863 896
864 inet = inet_sk(tunnel->sock); 897 inet = inet_sk(tunnel->sock);
865 if ((tunnel->version == 2) && (tunnel->sock->sk_family == AF_INET)) { 898 if ((tunnel->version == 2) && (tunnel->sock->sk_family == AF_INET)) {
@@ -939,8 +972,6 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
939 *usockaddr_len = len; 972 *usockaddr_len = len;
940 error = 0; 973 error = 0;
941 974
942 sock_put(pls->tunnel_sock);
943end_put_sess:
944 sock_put(sk); 975 sock_put(sk);
945end: 976end:
946 return error; 977 return error;
@@ -987,8 +1018,9 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
987 "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n", 1018 "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
988 session->name, cmd, arg); 1019 session->name, cmd, arg);
989 1020
990 sk = ps->sock; 1021 sk = pppol2tp_session_get_sock(session);
991 sock_hold(sk); 1022 if (!sk)
1023 return -EBADR;
992 1024
993 switch (cmd) { 1025 switch (cmd) {
994 case SIOCGIFMTU: 1026 case SIOCGIFMTU:
@@ -1135,13 +1167,11 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
1135 /* resend to session ioctl handler */ 1167 /* resend to session ioctl handler */
1136 struct l2tp_session *session = 1168 struct l2tp_session *session =
1137 l2tp_session_get(sock_net(sk), tunnel, 1169 l2tp_session_get(sock_net(sk), tunnel,
1138 stats.session_id, true); 1170 stats.session_id);
1139 1171
1140 if (session) { 1172 if (session) {
1141 err = pppol2tp_session_ioctl(session, cmd, 1173 err = pppol2tp_session_ioctl(session, cmd,
1142 arg); 1174 arg);
1143 if (session->deref)
1144 session->deref(session);
1145 l2tp_session_dec_refcount(session); 1175 l2tp_session_dec_refcount(session);
1146 } else { 1176 } else {
1147 err = -EBADR; 1177 err = -EBADR;
@@ -1180,7 +1210,6 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
1180 struct sock *sk = sock->sk; 1210 struct sock *sk = sock->sk;
1181 struct l2tp_session *session; 1211 struct l2tp_session *session;
1182 struct l2tp_tunnel *tunnel; 1212 struct l2tp_tunnel *tunnel;
1183 struct pppol2tp_session *ps;
1184 int err; 1213 int err;
1185 1214
1186 if (!sk) 1215 if (!sk)
@@ -1204,16 +1233,10 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
1204 /* Special case: if session's session_id is zero, treat ioctl as a 1233 /* Special case: if session's session_id is zero, treat ioctl as a
1205 * tunnel ioctl 1234 * tunnel ioctl
1206 */ 1235 */
1207 ps = l2tp_session_priv(session);
1208 if ((session->session_id == 0) && 1236 if ((session->session_id == 0) &&
1209 (session->peer_session_id == 0)) { 1237 (session->peer_session_id == 0)) {
1210 err = -EBADF; 1238 tunnel = session->tunnel;
1211 tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
1212 if (tunnel == NULL)
1213 goto end_put_sess;
1214
1215 err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg); 1239 err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg);
1216 sock_put(ps->tunnel_sock);
1217 goto end_put_sess; 1240 goto end_put_sess;
1218 } 1241 }
1219 1242
@@ -1265,7 +1288,6 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
1265 int optname, int val) 1288 int optname, int val)
1266{ 1289{
1267 int err = 0; 1290 int err = 0;
1268 struct pppol2tp_session *ps = l2tp_session_priv(session);
1269 1291
1270 switch (optname) { 1292 switch (optname) {
1271 case PPPOL2TP_SO_RECVSEQ: 1293 case PPPOL2TP_SO_RECVSEQ:
@@ -1286,8 +1308,8 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
1286 } 1308 }
1287 session->send_seq = !!val; 1309 session->send_seq = !!val;
1288 { 1310 {
1289 struct sock *ssk = ps->sock; 1311 struct pppox_sock *po = pppox_sk(sk);
1290 struct pppox_sock *po = pppox_sk(ssk); 1312
1291 po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ : 1313 po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
1292 PPPOL2TP_L2TP_HDR_SIZE_NOSEQ; 1314 PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
1293 } 1315 }
@@ -1340,7 +1362,6 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
1340 struct sock *sk = sock->sk; 1362 struct sock *sk = sock->sk;
1341 struct l2tp_session *session; 1363 struct l2tp_session *session;
1342 struct l2tp_tunnel *tunnel; 1364 struct l2tp_tunnel *tunnel;
1343 struct pppol2tp_session *ps;
1344 int val; 1365 int val;
1345 int err; 1366 int err;
1346 1367
@@ -1365,20 +1386,14 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
1365 1386
1366 /* Special case: if session_id == 0x0000, treat as operation on tunnel 1387 /* Special case: if session_id == 0x0000, treat as operation on tunnel
1367 */ 1388 */
1368 ps = l2tp_session_priv(session);
1369 if ((session->session_id == 0) && 1389 if ((session->session_id == 0) &&
1370 (session->peer_session_id == 0)) { 1390 (session->peer_session_id == 0)) {
1371 err = -EBADF; 1391 tunnel = session->tunnel;
1372 tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
1373 if (tunnel == NULL)
1374 goto end_put_sess;
1375
1376 err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val); 1392 err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
1377 sock_put(ps->tunnel_sock); 1393 } else {
1378 } else
1379 err = pppol2tp_session_setsockopt(sk, session, optname, val); 1394 err = pppol2tp_session_setsockopt(sk, session, optname, val);
1395 }
1380 1396
1381end_put_sess:
1382 sock_put(sk); 1397 sock_put(sk);
1383end: 1398end:
1384 return err; 1399 return err;
@@ -1466,7 +1481,6 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
1466 struct l2tp_tunnel *tunnel; 1481 struct l2tp_tunnel *tunnel;
1467 int val, len; 1482 int val, len;
1468 int err; 1483 int err;
1469 struct pppol2tp_session *ps;
1470 1484
1471 if (level != SOL_PPPOL2TP) 1485 if (level != SOL_PPPOL2TP)
1472 return -EINVAL; 1486 return -EINVAL;
@@ -1490,16 +1504,10 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
1490 goto end; 1504 goto end;
1491 1505
1492 /* Special case: if session_id == 0x0000, treat as operation on tunnel */ 1506 /* Special case: if session_id == 0x0000, treat as operation on tunnel */
1493 ps = l2tp_session_priv(session);
1494 if ((session->session_id == 0) && 1507 if ((session->session_id == 0) &&
1495 (session->peer_session_id == 0)) { 1508 (session->peer_session_id == 0)) {
1496 err = -EBADF; 1509 tunnel = session->tunnel;
1497 tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
1498 if (tunnel == NULL)
1499 goto end_put_sess;
1500
1501 err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val); 1510 err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
1502 sock_put(ps->tunnel_sock);
1503 if (err) 1511 if (err)
1504 goto end_put_sess; 1512 goto end_put_sess;
1505 } else { 1513 } else {
@@ -1558,7 +1566,7 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
1558 1566
1559static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd) 1567static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
1560{ 1568{
1561 pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); 1569 pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
1562 pd->session_idx++; 1570 pd->session_idx++;
1563 1571
1564 if (pd->session == NULL) { 1572 if (pd->session == NULL) {
@@ -1626,8 +1634,9 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
1626{ 1634{
1627 struct l2tp_session *session = v; 1635 struct l2tp_session *session = v;
1628 struct l2tp_tunnel *tunnel = session->tunnel; 1636 struct l2tp_tunnel *tunnel = session->tunnel;
1629 struct pppol2tp_session *ps = l2tp_session_priv(session); 1637 unsigned char state;
1630 struct pppox_sock *po = pppox_sk(ps->sock); 1638 char user_data_ok;
1639 struct sock *sk;
1631 u32 ip = 0; 1640 u32 ip = 0;
1632 u16 port = 0; 1641 u16 port = 0;
1633 1642
@@ -1637,6 +1646,15 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
1637 port = ntohs(inet->inet_sport); 1646 port = ntohs(inet->inet_sport);
1638 } 1647 }
1639 1648
1649 sk = pppol2tp_session_get_sock(session);
1650 if (sk) {
1651 state = sk->sk_state;
1652 user_data_ok = (session == sk->sk_user_data) ? 'Y' : 'N';
1653 } else {
1654 state = 0;
1655 user_data_ok = 'N';
1656 }
1657
1640 seq_printf(m, " SESSION '%s' %08X/%d %04X/%04X -> " 1658 seq_printf(m, " SESSION '%s' %08X/%d %04X/%04X -> "
1641 "%04X/%04X %d %c\n", 1659 "%04X/%04X %d %c\n",
1642 session->name, ip, port, 1660 session->name, ip, port,
@@ -1644,9 +1662,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
1644 session->session_id, 1662 session->session_id,
1645 tunnel->peer_tunnel_id, 1663 tunnel->peer_tunnel_id,
1646 session->peer_session_id, 1664 session->peer_session_id,
1647 ps->sock->sk_state, 1665 state, user_data_ok);
1648 (session == ps->sock->sk_user_data) ?
1649 'Y' : 'N');
1650 seq_printf(m, " %d/%d/%c/%c/%s %08x %u\n", 1666 seq_printf(m, " %d/%d/%c/%c/%s %08x %u\n",
1651 session->mtu, session->mru, 1667 session->mtu, session->mru,
1652 session->recv_seq ? 'R' : '-', 1668 session->recv_seq ? 'R' : '-',
@@ -1663,8 +1679,12 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
1663 atomic_long_read(&session->stats.rx_bytes), 1679 atomic_long_read(&session->stats.rx_bytes),
1664 atomic_long_read(&session->stats.rx_errors)); 1680 atomic_long_read(&session->stats.rx_errors));
1665 1681
1666 if (po) 1682 if (sk) {
1683 struct pppox_sock *po = pppox_sk(sk);
1684
1667 seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); 1685 seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan));
1686 sock_put(sk);
1687 }
1668} 1688}
1669 1689
1670static int pppol2tp_seq_show(struct seq_file *m, void *v) 1690static int pppol2tp_seq_show(struct seq_file *m, void *v)
@@ -1689,8 +1709,6 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v)
1689 pppol2tp_seq_tunnel_show(m, pd->tunnel); 1709 pppol2tp_seq_tunnel_show(m, pd->tunnel);
1690 } else { 1710 } else {
1691 pppol2tp_seq_session_show(m, pd->session); 1711 pppol2tp_seq_session_show(m, pd->session);
1692 if (pd->session->deref)
1693 pd->session->deref(pd->session);
1694 l2tp_session_dec_refcount(pd->session); 1712 l2tp_session_dec_refcount(pd->session);
1695 } 1713 }
1696 1714
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index e15314e3b464..db6e0afe3a20 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -127,8 +127,8 @@ static struct lapb_cb *lapb_create_cb(void)
127 skb_queue_head_init(&lapb->write_queue); 127 skb_queue_head_init(&lapb->write_queue);
128 skb_queue_head_init(&lapb->ack_queue); 128 skb_queue_head_init(&lapb->ack_queue);
129 129
130 init_timer(&lapb->t1timer); 130 timer_setup(&lapb->t1timer, NULL, 0);
131 init_timer(&lapb->t2timer); 131 timer_setup(&lapb->t2timer, NULL, 0);
132 132
133 lapb->t1 = LAPB_DEFAULT_T1; 133 lapb->t1 = LAPB_DEFAULT_T1;
134 lapb->t2 = LAPB_DEFAULT_T2; 134 lapb->t2 = LAPB_DEFAULT_T2;
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c
index 1a5535bc3b8d..5d4ae01951b5 100644
--- a/net/lapb/lapb_timer.c
+++ b/net/lapb/lapb_timer.c
@@ -35,15 +35,14 @@
35#include <linux/interrupt.h> 35#include <linux/interrupt.h>
36#include <net/lapb.h> 36#include <net/lapb.h>
37 37
38static void lapb_t1timer_expiry(unsigned long); 38static void lapb_t1timer_expiry(struct timer_list *);
39static void lapb_t2timer_expiry(unsigned long); 39static void lapb_t2timer_expiry(struct timer_list *);
40 40
41void lapb_start_t1timer(struct lapb_cb *lapb) 41void lapb_start_t1timer(struct lapb_cb *lapb)
42{ 42{
43 del_timer(&lapb->t1timer); 43 del_timer(&lapb->t1timer);
44 44
45 lapb->t1timer.data = (unsigned long)lapb; 45 lapb->t1timer.function = lapb_t1timer_expiry;
46 lapb->t1timer.function = &lapb_t1timer_expiry;
47 lapb->t1timer.expires = jiffies + lapb->t1; 46 lapb->t1timer.expires = jiffies + lapb->t1;
48 47
49 add_timer(&lapb->t1timer); 48 add_timer(&lapb->t1timer);
@@ -53,8 +52,7 @@ void lapb_start_t2timer(struct lapb_cb *lapb)
53{ 52{
54 del_timer(&lapb->t2timer); 53 del_timer(&lapb->t2timer);
55 54
56 lapb->t2timer.data = (unsigned long)lapb; 55 lapb->t2timer.function = lapb_t2timer_expiry;
57 lapb->t2timer.function = &lapb_t2timer_expiry;
58 lapb->t2timer.expires = jiffies + lapb->t2; 56 lapb->t2timer.expires = jiffies + lapb->t2;
59 57
60 add_timer(&lapb->t2timer); 58 add_timer(&lapb->t2timer);
@@ -75,9 +73,9 @@ int lapb_t1timer_running(struct lapb_cb *lapb)
75 return timer_pending(&lapb->t1timer); 73 return timer_pending(&lapb->t1timer);
76} 74}
77 75
78static void lapb_t2timer_expiry(unsigned long param) 76static void lapb_t2timer_expiry(struct timer_list *t)
79{ 77{
80 struct lapb_cb *lapb = (struct lapb_cb *)param; 78 struct lapb_cb *lapb = from_timer(lapb, t, t2timer);
81 79
82 if (lapb->condition & LAPB_ACK_PENDING_CONDITION) { 80 if (lapb->condition & LAPB_ACK_PENDING_CONDITION) {
83 lapb->condition &= ~LAPB_ACK_PENDING_CONDITION; 81 lapb->condition &= ~LAPB_ACK_PENDING_CONDITION;
@@ -85,9 +83,9 @@ static void lapb_t2timer_expiry(unsigned long param)
85 } 83 }
86} 84}
87 85
88static void lapb_t1timer_expiry(unsigned long param) 86static void lapb_t1timer_expiry(struct timer_list *t)
89{ 87{
90 struct lapb_cb *lapb = (struct lapb_cb *)param; 88 struct lapb_cb *lapb = from_timer(lapb, t, t1timer);
91 89
92 switch (lapb->state) { 90 switch (lapb->state) {
93 91
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index ea225bd2672c..f59648018060 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -1318,9 +1318,8 @@ static int llc_conn_ac_inc_vs_by_1(struct sock *sk, struct sk_buff *skb)
1318 return 0; 1318 return 0;
1319} 1319}
1320 1320
1321static void llc_conn_tmr_common_cb(unsigned long timeout_data, u8 type) 1321static void llc_conn_tmr_common_cb(struct sock *sk, u8 type)
1322{ 1322{
1323 struct sock *sk = (struct sock *)timeout_data;
1324 struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); 1323 struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC);
1325 1324
1326 bh_lock_sock(sk); 1325 bh_lock_sock(sk);
@@ -1334,24 +1333,32 @@ static void llc_conn_tmr_common_cb(unsigned long timeout_data, u8 type)
1334 bh_unlock_sock(sk); 1333 bh_unlock_sock(sk);
1335} 1334}
1336 1335
1337void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data) 1336void llc_conn_pf_cycle_tmr_cb(struct timer_list *t)
1338{ 1337{
1339 llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_P_TMR); 1338 struct llc_sock *llc = from_timer(llc, t, pf_cycle_timer.timer);
1339
1340 llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_P_TMR);
1340} 1341}
1341 1342
1342void llc_conn_busy_tmr_cb(unsigned long timeout_data) 1343void llc_conn_busy_tmr_cb(struct timer_list *t)
1343{ 1344{
1344 llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_BUSY_TMR); 1345 struct llc_sock *llc = from_timer(llc, t, busy_state_timer.timer);
1346
1347 llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_BUSY_TMR);
1345} 1348}
1346 1349
1347void llc_conn_ack_tmr_cb(unsigned long timeout_data) 1350void llc_conn_ack_tmr_cb(struct timer_list *t)
1348{ 1351{
1349 llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_ACK_TMR); 1352 struct llc_sock *llc = from_timer(llc, t, ack_timer.timer);
1353
1354 llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_ACK_TMR);
1350} 1355}
1351 1356
1352void llc_conn_rej_tmr_cb(unsigned long timeout_data) 1357void llc_conn_rej_tmr_cb(struct timer_list *t)
1353{ 1358{
1354 llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_REJ_TMR); 1359 struct llc_sock *llc = from_timer(llc, t, rej_sent_timer.timer);
1360
1361 llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_REJ_TMR);
1355} 1362}
1356 1363
1357int llc_conn_ac_rst_vs(struct sock *sk, struct sk_buff *skb) 1364int llc_conn_ac_rst_vs(struct sock *sk, struct sk_buff *skb)
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 5e91b47f0d2a..9177dbb16dce 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -902,20 +902,16 @@ static void llc_sk_init(struct sock *sk)
902 llc->inc_cntr = llc->dec_cntr = 2; 902 llc->inc_cntr = llc->dec_cntr = 2;
903 llc->dec_step = llc->connect_step = 1; 903 llc->dec_step = llc->connect_step = 1;
904 904
905 setup_timer(&llc->ack_timer.timer, llc_conn_ack_tmr_cb, 905 timer_setup(&llc->ack_timer.timer, llc_conn_ack_tmr_cb, 0);
906 (unsigned long)sk);
907 llc->ack_timer.expire = sysctl_llc2_ack_timeout; 906 llc->ack_timer.expire = sysctl_llc2_ack_timeout;
908 907
909 setup_timer(&llc->pf_cycle_timer.timer, llc_conn_pf_cycle_tmr_cb, 908 timer_setup(&llc->pf_cycle_timer.timer, llc_conn_pf_cycle_tmr_cb, 0);
910 (unsigned long)sk);
911 llc->pf_cycle_timer.expire = sysctl_llc2_p_timeout; 909 llc->pf_cycle_timer.expire = sysctl_llc2_p_timeout;
912 910
913 setup_timer(&llc->rej_sent_timer.timer, llc_conn_rej_tmr_cb, 911 timer_setup(&llc->rej_sent_timer.timer, llc_conn_rej_tmr_cb, 0);
914 (unsigned long)sk);
915 llc->rej_sent_timer.expire = sysctl_llc2_rej_timeout; 912 llc->rej_sent_timer.expire = sysctl_llc2_rej_timeout;
916 913
917 setup_timer(&llc->busy_state_timer.timer, llc_conn_busy_tmr_cb, 914 timer_setup(&llc->busy_state_timer.timer, llc_conn_busy_tmr_cb, 0);
918 (unsigned long)sk);
919 llc->busy_state_timer.expire = sysctl_llc2_busy_timeout; 915 llc->busy_state_timer.expire = sysctl_llc2_busy_timeout;
920 916
921 llc->n2 = 2; /* max retransmit */ 917 llc->n2 = 2; /* max retransmit */
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index dd3e83328ad5..82cb93f66b9b 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -193,7 +193,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
193 */ 193 */
194 rcv = rcu_dereference(sap->rcv_func); 194 rcv = rcu_dereference(sap->rcv_func);
195 dest = llc_pdu_type(skb); 195 dest = llc_pdu_type(skb);
196 sap_handler = dest ? ACCESS_ONCE(llc_type_handlers[dest - 1]) : NULL; 196 sap_handler = dest ? READ_ONCE(llc_type_handlers[dest - 1]) : NULL;
197 if (unlikely(!sap_handler)) { 197 if (unlikely(!sap_handler)) {
198 if (rcv) 198 if (rcv)
199 rcv(skb, dev, pt, orig_dev); 199 rcv(skb, dev, pt, orig_dev);
@@ -214,7 +214,7 @@ drop:
214 kfree_skb(skb); 214 kfree_skb(skb);
215 goto out; 215 goto out;
216handle_station: 216handle_station:
217 sta_handler = ACCESS_ONCE(llc_station_handler); 217 sta_handler = READ_ONCE(llc_station_handler);
218 if (!sta_handler) 218 if (!sta_handler)
219 goto drop; 219 goto drop;
220 sta_handler(skb); 220 sta_handler(skb);
diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c
index 799bafc2af39..8443a6d841b0 100644
--- a/net/llc/sysctl_net_llc.c
+++ b/net/llc/sysctl_net_llc.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * sysctl_net_llc.c: sysctl interface to LLC net subsystem. 3 * sysctl_net_llc.c: sysctl interface to LLC net subsystem.
3 * 4 *
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 282912245938..e3589ade62e0 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1obj-$(CONFIG_MAC80211) += mac80211.o 2obj-$(CONFIG_MAC80211) += mac80211.o
2 3
3# mac80211 objects 4# mac80211 objects
@@ -6,6 +7,7 @@ mac80211-y := \
6 driver-ops.o \ 7 driver-ops.o \
7 sta_info.o \ 8 sta_info.o \
8 wep.o \ 9 wep.o \
10 aead_api.o \
9 wpa.o \ 11 wpa.o \
10 scan.o offchannel.o \ 12 scan.o offchannel.o \
11 ht.o agg-tx.o agg-rx.o \ 13 ht.o agg-tx.o agg-rx.o \
@@ -15,8 +17,6 @@ mac80211-y := \
15 rate.o \ 17 rate.o \
16 michael.o \ 18 michael.o \
17 tkip.o \ 19 tkip.o \
18 aes_ccm.o \
19 aes_gcm.o \
20 aes_cmac.o \ 20 aes_cmac.o \
21 aes_gmac.o \ 21 aes_gmac.o \
22 fils_aead.o \ 22 fils_aead.o \
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aead_api.c
index a4e0d59a40dd..160f9df30402 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aead_api.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * Copyright 2003-2004, Instant802 Networks, Inc. 2 * Copyright 2003-2004, Instant802 Networks, Inc.
3 * Copyright 2005-2006, Devicescape Software, Inc. 3 * Copyright 2005-2006, Devicescape Software, Inc.
4 * Copyright 2014-2015, Qualcomm Atheros, Inc.
4 * 5 *
5 * Rewrite: Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> 6 * Rewrite: Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
6 * 7 *
@@ -12,30 +13,29 @@
12#include <linux/kernel.h> 13#include <linux/kernel.h>
13#include <linux/types.h> 14#include <linux/types.h>
14#include <linux/err.h> 15#include <linux/err.h>
16#include <linux/scatterlist.h>
15#include <crypto/aead.h> 17#include <crypto/aead.h>
16 18
17#include <net/mac80211.h> 19#include "aead_api.h"
18#include "key.h"
19#include "aes_ccm.h"
20 20
21int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, 21int aead_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len,
22 u8 *data, size_t data_len, u8 *mic, 22 u8 *data, size_t data_len, u8 *mic)
23 size_t mic_len)
24{ 23{
24 size_t mic_len = crypto_aead_authsize(tfm);
25 struct scatterlist sg[3]; 25 struct scatterlist sg[3];
26 struct aead_request *aead_req; 26 struct aead_request *aead_req;
27 int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); 27 int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
28 u8 *__aad; 28 u8 *__aad;
29 29
30 aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC); 30 aead_req = kzalloc(reqsize + aad_len, GFP_ATOMIC);
31 if (!aead_req) 31 if (!aead_req)
32 return -ENOMEM; 32 return -ENOMEM;
33 33
34 __aad = (u8 *)aead_req + reqsize; 34 __aad = (u8 *)aead_req + reqsize;
35 memcpy(__aad, aad, CCM_AAD_LEN); 35 memcpy(__aad, aad, aad_len);
36 36
37 sg_init_table(sg, 3); 37 sg_init_table(sg, 3);
38 sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); 38 sg_set_buf(&sg[0], __aad, aad_len);
39 sg_set_buf(&sg[1], data, data_len); 39 sg_set_buf(&sg[1], data, data_len);
40 sg_set_buf(&sg[2], mic, mic_len); 40 sg_set_buf(&sg[2], mic, mic_len);
41 41
@@ -49,10 +49,10 @@ int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
49 return 0; 49 return 0;
50} 50}
51 51
52int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, 52int aead_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len,
53 u8 *data, size_t data_len, u8 *mic, 53 u8 *data, size_t data_len, u8 *mic)
54 size_t mic_len)
55{ 54{
55 size_t mic_len = crypto_aead_authsize(tfm);
56 struct scatterlist sg[3]; 56 struct scatterlist sg[3];
57 struct aead_request *aead_req; 57 struct aead_request *aead_req;
58 int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); 58 int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
@@ -62,15 +62,15 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
62 if (data_len == 0) 62 if (data_len == 0)
63 return -EINVAL; 63 return -EINVAL;
64 64
65 aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC); 65 aead_req = kzalloc(reqsize + aad_len, GFP_ATOMIC);
66 if (!aead_req) 66 if (!aead_req)
67 return -ENOMEM; 67 return -ENOMEM;
68 68
69 __aad = (u8 *)aead_req + reqsize; 69 __aad = (u8 *)aead_req + reqsize;
70 memcpy(__aad, aad, CCM_AAD_LEN); 70 memcpy(__aad, aad, aad_len);
71 71
72 sg_init_table(sg, 3); 72 sg_init_table(sg, 3);
73 sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); 73 sg_set_buf(&sg[0], __aad, aad_len);
74 sg_set_buf(&sg[1], data, data_len); 74 sg_set_buf(&sg[1], data, data_len);
75 sg_set_buf(&sg[2], mic, mic_len); 75 sg_set_buf(&sg[2], mic, mic_len);
76 76
@@ -84,14 +84,14 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
84 return err; 84 return err;
85} 85}
86 86
87struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[], 87struct crypto_aead *
88 size_t key_len, 88aead_key_setup_encrypt(const char *alg, const u8 key[],
89 size_t mic_len) 89 size_t key_len, size_t mic_len)
90{ 90{
91 struct crypto_aead *tfm; 91 struct crypto_aead *tfm;
92 int err; 92 int err;
93 93
94 tfm = crypto_alloc_aead("ccm(aes)", 0, CRYPTO_ALG_ASYNC); 94 tfm = crypto_alloc_aead(alg, 0, CRYPTO_ALG_ASYNC);
95 if (IS_ERR(tfm)) 95 if (IS_ERR(tfm))
96 return tfm; 96 return tfm;
97 97
@@ -109,7 +109,7 @@ free_aead:
109 return ERR_PTR(err); 109 return ERR_PTR(err);
110} 110}
111 111
112void ieee80211_aes_key_free(struct crypto_aead *tfm) 112void aead_key_free(struct crypto_aead *tfm)
113{ 113{
114 crypto_free_aead(tfm); 114 crypto_free_aead(tfm);
115} 115}
diff --git a/net/mac80211/aead_api.h b/net/mac80211/aead_api.h
new file mode 100644
index 000000000000..5e39ea843bbf
--- /dev/null
+++ b/net/mac80211/aead_api.h
@@ -0,0 +1,27 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License version 2 as
4 * published by the Free Software Foundation.
5 */
6
7#ifndef _AEAD_API_H
8#define _AEAD_API_H
9
10#include <crypto/aead.h>
11#include <linux/crypto.h>
12
13struct crypto_aead *
14aead_key_setup_encrypt(const char *alg, const u8 key[],
15 size_t key_len, size_t mic_len);
16
17int aead_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
18 size_t aad_len, u8 *data,
19 size_t data_len, u8 *mic);
20
21int aead_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
22 size_t aad_len, u8 *data,
23 size_t data_len, u8 *mic);
24
25void aead_key_free(struct crypto_aead *tfm);
26
27#endif /* _AEAD_API_H */
diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h
index fcd3254c5cf0..e9b7ca0bde5b 100644
--- a/net/mac80211/aes_ccm.h
+++ b/net/mac80211/aes_ccm.h
@@ -10,19 +10,39 @@
10#ifndef AES_CCM_H 10#ifndef AES_CCM_H
11#define AES_CCM_H 11#define AES_CCM_H
12 12
13#include <linux/crypto.h> 13#include "aead_api.h"
14 14
15#define CCM_AAD_LEN 32 15#define CCM_AAD_LEN 32
16 16
17struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[], 17static inline struct crypto_aead *
18 size_t key_len, 18ieee80211_aes_key_setup_encrypt(const u8 key[], size_t key_len, size_t mic_len)
19 size_t mic_len); 19{
20int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, 20 return aead_key_setup_encrypt("ccm(aes)", key, key_len, mic_len);
21 u8 *data, size_t data_len, u8 *mic, 21}
22 size_t mic_len); 22
23int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, 23static inline int
24 u8 *data, size_t data_len, u8 *mic, 24ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm,
25 size_t mic_len); 25 u8 *b_0, u8 *aad, u8 *data,
26void ieee80211_aes_key_free(struct crypto_aead *tfm); 26 size_t data_len, u8 *mic)
27{
28 return aead_encrypt(tfm, b_0, aad + 2,
29 be16_to_cpup((__be16 *)aad),
30 data, data_len, mic);
31}
32
33static inline int
34ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm,
35 u8 *b_0, u8 *aad, u8 *data,
36 size_t data_len, u8 *mic)
37{
38 return aead_decrypt(tfm, b_0, aad + 2,
39 be16_to_cpup((__be16 *)aad),
40 data, data_len, mic);
41}
42
43static inline void ieee80211_aes_key_free(struct crypto_aead *tfm)
44{
45 return aead_key_free(tfm);
46}
27 47
28#endif /* AES_CCM_H */ 48#endif /* AES_CCM_H */
diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c
deleted file mode 100644
index 8a4397cc1b08..000000000000
--- a/net/mac80211/aes_gcm.c
+++ /dev/null
@@ -1,109 +0,0 @@
1/*
2 * Copyright 2014-2015, Qualcomm Atheros, Inc.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/kernel.h>
10#include <linux/types.h>
11#include <linux/err.h>
12#include <crypto/aead.h>
13
14#include <net/mac80211.h>
15#include "key.h"
16#include "aes_gcm.h"
17
18int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
19 u8 *data, size_t data_len, u8 *mic)
20{
21 struct scatterlist sg[3];
22 struct aead_request *aead_req;
23 int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
24 u8 *__aad;
25
26 aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC);
27 if (!aead_req)
28 return -ENOMEM;
29
30 __aad = (u8 *)aead_req + reqsize;
31 memcpy(__aad, aad, GCM_AAD_LEN);
32
33 sg_init_table(sg, 3);
34 sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
35 sg_set_buf(&sg[1], data, data_len);
36 sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN);
37
38 aead_request_set_tfm(aead_req, tfm);
39 aead_request_set_crypt(aead_req, sg, sg, data_len, j_0);
40 aead_request_set_ad(aead_req, sg[0].length);
41
42 crypto_aead_encrypt(aead_req);
43 kzfree(aead_req);
44 return 0;
45}
46
47int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
48 u8 *data, size_t data_len, u8 *mic)
49{
50 struct scatterlist sg[3];
51 struct aead_request *aead_req;
52 int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
53 u8 *__aad;
54 int err;
55
56 if (data_len == 0)
57 return -EINVAL;
58
59 aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC);
60 if (!aead_req)
61 return -ENOMEM;
62
63 __aad = (u8 *)aead_req + reqsize;
64 memcpy(__aad, aad, GCM_AAD_LEN);
65
66 sg_init_table(sg, 3);
67 sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
68 sg_set_buf(&sg[1], data, data_len);
69 sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN);
70
71 aead_request_set_tfm(aead_req, tfm);
72 aead_request_set_crypt(aead_req, sg, sg,
73 data_len + IEEE80211_GCMP_MIC_LEN, j_0);
74 aead_request_set_ad(aead_req, sg[0].length);
75
76 err = crypto_aead_decrypt(aead_req);
77 kzfree(aead_req);
78
79 return err;
80}
81
82struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[],
83 size_t key_len)
84{
85 struct crypto_aead *tfm;
86 int err;
87
88 tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
89 if (IS_ERR(tfm))
90 return tfm;
91
92 err = crypto_aead_setkey(tfm, key, key_len);
93 if (err)
94 goto free_aead;
95 err = crypto_aead_setauthsize(tfm, IEEE80211_GCMP_MIC_LEN);
96 if (err)
97 goto free_aead;
98
99 return tfm;
100
101free_aead:
102 crypto_free_aead(tfm);
103 return ERR_PTR(err);
104}
105
106void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm)
107{
108 crypto_free_aead(tfm);
109}
diff --git a/net/mac80211/aes_gcm.h b/net/mac80211/aes_gcm.h
index 55aed5352494..d2b096033009 100644
--- a/net/mac80211/aes_gcm.h
+++ b/net/mac80211/aes_gcm.h
@@ -9,16 +9,38 @@
9#ifndef AES_GCM_H 9#ifndef AES_GCM_H
10#define AES_GCM_H 10#define AES_GCM_H
11 11
12#include <linux/crypto.h> 12#include "aead_api.h"
13 13
14#define GCM_AAD_LEN 32 14#define GCM_AAD_LEN 32
15 15
16int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, 16static inline int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm,
17 u8 *data, size_t data_len, u8 *mic); 17 u8 *j_0, u8 *aad, u8 *data,
18int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, 18 size_t data_len, u8 *mic)
19 u8 *data, size_t data_len, u8 *mic); 19{
20struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], 20 return aead_encrypt(tfm, j_0, aad + 2,
21 size_t key_len); 21 be16_to_cpup((__be16 *)aad),
22void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm); 22 data, data_len, mic);
23}
24
25static inline int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm,
26 u8 *j_0, u8 *aad, u8 *data,
27 size_t data_len, u8 *mic)
28{
29 return aead_decrypt(tfm, j_0, aad + 2,
30 be16_to_cpup((__be16 *)aad),
31 data, data_len, mic);
32}
33
34static inline struct crypto_aead *
35ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], size_t key_len)
36{
37 return aead_key_setup_encrypt("gcm(aes)", key,
38 key_len, IEEE80211_GCMP_MIC_LEN);
39}
40
41static inline void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm)
42{
43 return aead_key_free(tfm);
44}
23 45
24#endif /* AES_GCM_H */ 46#endif /* AES_GCM_H */
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 2849a1fc41c5..d444752dbf40 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -151,21 +151,17 @@ EXPORT_SYMBOL(ieee80211_stop_rx_ba_session);
151 * After accepting the AddBA Request we activated a timer, 151 * After accepting the AddBA Request we activated a timer,
152 * resetting it after each frame that arrives from the originator. 152 * resetting it after each frame that arrives from the originator.
153 */ 153 */
154static void sta_rx_agg_session_timer_expired(unsigned long data) 154static void sta_rx_agg_session_timer_expired(struct timer_list *t)
155{ 155{
156 /* not an elegant detour, but there is no choice as the timer passes 156 struct tid_ampdu_rx *tid_rx_timer =
157 * only one argument, and various sta_info are needed here, so init 157 from_timer(tid_rx_timer, t, session_timer);
158 * flow in sta_info_create gives the TID as data, while the timer_to_id 158 struct sta_info *sta = tid_rx_timer->sta;
159 * array gives the sta through container_of */ 159 u8 tid = tid_rx_timer->tid;
160 u8 *ptid = (u8 *)data;
161 u8 *timer_to_id = ptid - *ptid;
162 struct sta_info *sta = container_of(timer_to_id, struct sta_info,
163 timer_to_tid[0]);
164 struct tid_ampdu_rx *tid_rx; 160 struct tid_ampdu_rx *tid_rx;
165 unsigned long timeout; 161 unsigned long timeout;
166 162
167 rcu_read_lock(); 163 rcu_read_lock();
168 tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[*ptid]); 164 tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
169 if (!tid_rx) { 165 if (!tid_rx) {
170 rcu_read_unlock(); 166 rcu_read_unlock();
171 return; 167 return;
@@ -180,21 +176,18 @@ static void sta_rx_agg_session_timer_expired(unsigned long data)
180 rcu_read_unlock(); 176 rcu_read_unlock();
181 177
182 ht_dbg(sta->sdata, "RX session timer expired on %pM tid %d\n", 178 ht_dbg(sta->sdata, "RX session timer expired on %pM tid %d\n",
183 sta->sta.addr, (u16)*ptid); 179 sta->sta.addr, tid);
184 180
185 set_bit(*ptid, sta->ampdu_mlme.tid_rx_timer_expired); 181 set_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired);
186 ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); 182 ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work);
187} 183}
188 184
189static void sta_rx_agg_reorder_timer_expired(unsigned long data) 185static void sta_rx_agg_reorder_timer_expired(struct timer_list *t)
190{ 186{
191 u8 *ptid = (u8 *)data; 187 struct tid_ampdu_rx *tid_rx = from_timer(tid_rx, t, reorder_timer);
192 u8 *timer_to_id = ptid - *ptid;
193 struct sta_info *sta = container_of(timer_to_id, struct sta_info,
194 timer_to_tid[0]);
195 188
196 rcu_read_lock(); 189 rcu_read_lock();
197 ieee80211_release_reorder_timeout(sta, *ptid); 190 ieee80211_release_reorder_timeout(tid_rx->sta, tid_rx->tid);
198 rcu_read_unlock(); 191 rcu_read_unlock();
199} 192}
200 193
@@ -356,14 +349,12 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
356 spin_lock_init(&tid_agg_rx->reorder_lock); 349 spin_lock_init(&tid_agg_rx->reorder_lock);
357 350
358 /* rx timer */ 351 /* rx timer */
359 setup_deferrable_timer(&tid_agg_rx->session_timer, 352 timer_setup(&tid_agg_rx->session_timer,
360 sta_rx_agg_session_timer_expired, 353 sta_rx_agg_session_timer_expired, TIMER_DEFERRABLE);
361 (unsigned long)&sta->timer_to_tid[tid]);
362 354
363 /* rx reorder timer */ 355 /* rx reorder timer */
364 setup_timer(&tid_agg_rx->reorder_timer, 356 timer_setup(&tid_agg_rx->reorder_timer,
365 sta_rx_agg_reorder_timer_expired, 357 sta_rx_agg_reorder_timer_expired, 0);
366 (unsigned long)&sta->timer_to_tid[tid]);
367 358
368 /* prepare reordering buffer */ 359 /* prepare reordering buffer */
369 tid_agg_rx->reorder_buf = 360 tid_agg_rx->reorder_buf =
@@ -399,6 +390,8 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
399 tid_agg_rx->auto_seq = auto_seq; 390 tid_agg_rx->auto_seq = auto_seq;
400 tid_agg_rx->started = false; 391 tid_agg_rx->started = false;
401 tid_agg_rx->reorder_buf_filtered = 0; 392 tid_agg_rx->reorder_buf_filtered = 0;
393 tid_agg_rx->tid = tid;
394 tid_agg_rx->sta = sta;
402 status = WLAN_STATUS_SUCCESS; 395 status = WLAN_STATUS_SUCCESS;
403 396
404 /* activate it for RX */ 397 /* activate it for RX */
@@ -459,7 +452,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
459} 452}
460 453
461void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, 454void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
462 const u8 *addr, unsigned int bit) 455 const u8 *addr, unsigned int tid)
463{ 456{
464 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); 457 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
465 struct ieee80211_local *local = sdata->local; 458 struct ieee80211_local *local = sdata->local;
@@ -470,7 +463,7 @@ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
470 if (!sta) 463 if (!sta)
471 goto unlock; 464 goto unlock;
472 465
473 set_bit(bit, sta->ampdu_mlme.tid_rx_manage_offl); 466 set_bit(tid, sta->ampdu_mlme.tid_rx_manage_offl);
474 ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); 467 ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
475 unlock: 468 unlock:
476 rcu_read_unlock(); 469 rcu_read_unlock();
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index bef516ec47f9..5f8ab5be369f 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -330,6 +330,11 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
330 330
331 spin_lock_bh(&sta->lock); 331 spin_lock_bh(&sta->lock);
332 332
333 /* free struct pending for start, if present */
334 tid_tx = sta->ampdu_mlme.tid_start_tx[tid];
335 kfree(tid_tx);
336 sta->ampdu_mlme.tid_start_tx[tid] = NULL;
337
333 tid_tx = rcu_dereference_protected_tid_tx(sta, tid); 338 tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
334 if (!tid_tx) { 339 if (!tid_tx) {
335 spin_unlock_bh(&sta->lock); 340 spin_unlock_bh(&sta->lock);
@@ -422,15 +427,12 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
422 * add Block Ack response will arrive from the recipient. 427 * add Block Ack response will arrive from the recipient.
423 * If this timer expires sta_addba_resp_timer_expired will be executed. 428 * If this timer expires sta_addba_resp_timer_expired will be executed.
424 */ 429 */
425static void sta_addba_resp_timer_expired(unsigned long data) 430static void sta_addba_resp_timer_expired(struct timer_list *t)
426{ 431{
427 /* not an elegant detour, but there is no choice as the timer passes 432 struct tid_ampdu_tx *tid_tx_timer =
428 * only one argument, and both sta_info and TID are needed, so init 433 from_timer(tid_tx_timer, t, addba_resp_timer);
429 * flow in sta_info_create gives the TID as data, while the timer_to_id 434 struct sta_info *sta = tid_tx_timer->sta;
430 * array gives the sta through container_of */ 435 u8 tid = tid_tx_timer->tid;
431 u16 tid = *(u8 *)data;
432 struct sta_info *sta = container_of((void *)data,
433 struct sta_info, timer_to_tid[tid]);
434 struct tid_ampdu_tx *tid_tx; 436 struct tid_ampdu_tx *tid_tx;
435 437
436 /* check if the TID waits for addBA response */ 438 /* check if the TID waits for addBA response */
@@ -525,21 +527,17 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
525 * After accepting the AddBA Response we activated a timer, 527 * After accepting the AddBA Response we activated a timer,
526 * resetting it after each frame that we send. 528 * resetting it after each frame that we send.
527 */ 529 */
528static void sta_tx_agg_session_timer_expired(unsigned long data) 530static void sta_tx_agg_session_timer_expired(struct timer_list *t)
529{ 531{
530 /* not an elegant detour, but there is no choice as the timer passes 532 struct tid_ampdu_tx *tid_tx_timer =
531 * only one argument, and various sta_info are needed here, so init 533 from_timer(tid_tx_timer, t, session_timer);
532 * flow in sta_info_create gives the TID as data, while the timer_to_id 534 struct sta_info *sta = tid_tx_timer->sta;
533 * array gives the sta through container_of */ 535 u8 tid = tid_tx_timer->tid;
534 u8 *ptid = (u8 *)data;
535 u8 *timer_to_id = ptid - *ptid;
536 struct sta_info *sta = container_of(timer_to_id, struct sta_info,
537 timer_to_tid[0]);
538 struct tid_ampdu_tx *tid_tx; 536 struct tid_ampdu_tx *tid_tx;
539 unsigned long timeout; 537 unsigned long timeout;
540 538
541 rcu_read_lock(); 539 rcu_read_lock();
542 tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[*ptid]); 540 tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]);
543 if (!tid_tx || test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { 541 if (!tid_tx || test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
544 rcu_read_unlock(); 542 rcu_read_unlock();
545 return; 543 return;
@@ -555,9 +553,9 @@ static void sta_tx_agg_session_timer_expired(unsigned long data)
555 rcu_read_unlock(); 553 rcu_read_unlock();
556 554
557 ht_dbg(sta->sdata, "tx session timer expired on %pM tid %d\n", 555 ht_dbg(sta->sdata, "tx session timer expired on %pM tid %d\n",
558 sta->sta.addr, (u16)*ptid); 556 sta->sta.addr, tid);
559 557
560 ieee80211_stop_tx_ba_session(&sta->sta, *ptid); 558 ieee80211_stop_tx_ba_session(&sta->sta, tid);
561} 559}
562 560
563int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, 561int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
@@ -670,16 +668,15 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
670 __set_bit(HT_AGG_STATE_WANT_START, &tid_tx->state); 668 __set_bit(HT_AGG_STATE_WANT_START, &tid_tx->state);
671 669
672 tid_tx->timeout = timeout; 670 tid_tx->timeout = timeout;
671 tid_tx->sta = sta;
672 tid_tx->tid = tid;
673 673
674 /* response timer */ 674 /* response timer */
675 setup_timer(&tid_tx->addba_resp_timer, 675 timer_setup(&tid_tx->addba_resp_timer, sta_addba_resp_timer_expired, 0);
676 sta_addba_resp_timer_expired,
677 (unsigned long)&sta->timer_to_tid[tid]);
678 676
679 /* tx timer */ 677 /* tx timer */
680 setup_deferrable_timer(&tid_tx->session_timer, 678 timer_setup(&tid_tx->session_timer,
681 sta_tx_agg_session_timer_expired, 679 sta_tx_agg_session_timer_expired, TIMER_DEFERRABLE);
682 (unsigned long)&sta->timer_to_tid[tid]);
683 680
684 /* assign a dialog token */ 681 /* assign a dialog token */
685 sta->ampdu_mlme.dialog_token_allocator++; 682 sta->ampdu_mlme.dialog_token_allocator++;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index a354f1939e49..fb15d3b97cb2 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2727,12 +2727,6 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
2727 if (!ieee80211_sdata_running(sdata)) 2727 if (!ieee80211_sdata_running(sdata))
2728 return -ENETDOWN; 2728 return -ENETDOWN;
2729 2729
2730 if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
2731 ret = drv_set_bitrate_mask(local, sdata, mask);
2732 if (ret)
2733 return ret;
2734 }
2735
2736 /* 2730 /*
2737 * If active validate the setting and reject it if it doesn't leave 2731 * If active validate the setting and reject it if it doesn't leave
2738 * at least one basic rate usable, since we really have to be able 2732 * at least one basic rate usable, since we really have to be able
@@ -2748,6 +2742,12 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
2748 return -EINVAL; 2742 return -EINVAL;
2749 } 2743 }
2750 2744
2745 if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
2746 ret = drv_set_bitrate_mask(local, sdata, mask);
2747 if (ret)
2748 return ret;
2749 }
2750
2751 for (i = 0; i < NUM_NL80211_BANDS; i++) { 2751 for (i = 0; i < NUM_NL80211_BANDS; i++) {
2752 struct ieee80211_supported_band *sband = wiphy->bands[i]; 2752 struct ieee80211_supported_band *sband = wiphy->bands[i];
2753 int j; 2753 int j;
diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h
index 1956b3115dd5..d90a8f9cc3fd 100644
--- a/net/mac80211/debug.h
+++ b/net/mac80211/debug.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __MAC80211_DEBUG_H 2#ifndef __MAC80211_DEBUG_H
2#define __MAC80211_DEBUG_H 3#define __MAC80211_DEBUG_H
3#include <net/cfg80211.h> 4#include <net/cfg80211.h>
diff --git a/net/mac80211/debugfs.h b/net/mac80211/debugfs.h
index 60c35afee29d..d2c424787463 100644
--- a/net/mac80211/debugfs.h
+++ b/net/mac80211/debugfs.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __MAC80211_DEBUGFS_H 2#ifndef __MAC80211_DEBUGFS_H
2#define __MAC80211_DEBUGFS_H 3#define __MAC80211_DEBUGFS_H
3 4
diff --git a/net/mac80211/debugfs_key.h b/net/mac80211/debugfs_key.h
index 32adc77e9c77..1cd7b8bff56c 100644
--- a/net/mac80211/debugfs_key.h
+++ b/net/mac80211/debugfs_key.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __MAC80211_DEBUGFS_KEY_H 2#ifndef __MAC80211_DEBUGFS_KEY_H
2#define __MAC80211_DEBUGFS_KEY_H 3#define __MAC80211_DEBUGFS_KEY_H
3 4
diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h
index 9f5501a9a795..a7e9d8d518f9 100644
--- a/net/mac80211/debugfs_netdev.h
+++ b/net/mac80211/debugfs_netdev.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* routines exported for debugfs handling */ 2/* routines exported for debugfs handling */
2 3
3#ifndef __IEEE80211_DEBUGFS_NETDEV_H 4#ifndef __IEEE80211_DEBUGFS_NETDEV_H
diff --git a/net/mac80211/debugfs_sta.h b/net/mac80211/debugfs_sta.h
index 8b608903259f..d2e7c27ad6d1 100644
--- a/net/mac80211/debugfs_sta.h
+++ b/net/mac80211/debugfs_sta.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __MAC80211_DEBUGFS_STA_H 2#ifndef __MAC80211_DEBUGFS_STA_H
2#define __MAC80211_DEBUGFS_STA_H 3#define __MAC80211_DEBUGFS_STA_H
3 4
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 09f77e4a8a79..c7f93fd9ca7a 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2* Portions of this file 3* Portions of this file
3* Copyright(c) 2016 Intel Deutschland GmbH 4* Copyright(c) 2016 Intel Deutschland GmbH
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index d6d0b4201e40..41f5e48f8021 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -290,13 +290,15 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
290{ 290{
291 int i; 291 int i;
292 292
293 mutex_lock(&sta->ampdu_mlme.mtx);
293 for (i = 0; i < IEEE80211_NUM_TIDS; i++) { 294 for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
294 __ieee80211_stop_tx_ba_session(sta, i, reason); 295 ___ieee80211_stop_tx_ba_session(sta, i, reason);
295 __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, 296 ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
296 WLAN_REASON_QSTA_LEAVE_QBSS, 297 WLAN_REASON_QSTA_LEAVE_QBSS,
297 reason != AGG_STOP_DESTROY_STA && 298 reason != AGG_STOP_DESTROY_STA &&
298 reason != AGG_STOP_PEER_REQUEST); 299 reason != AGG_STOP_PEER_REQUEST);
299 } 300 }
301 mutex_unlock(&sta->ampdu_mlme.mtx);
300 302
301 /* stopping might queue the work again - so cancel only afterwards */ 303 /* stopping might queue the work again - so cancel only afterwards */
302 cancel_work_sync(&sta->ampdu_mlme.work); 304 cancel_work_sync(&sta->ampdu_mlme.work);
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index e9c6aa3ed05b..db07e0de9a03 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1711,10 +1711,10 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata)
1711 sdata_unlock(sdata); 1711 sdata_unlock(sdata);
1712} 1712}
1713 1713
1714static void ieee80211_ibss_timer(unsigned long data) 1714static void ieee80211_ibss_timer(struct timer_list *t)
1715{ 1715{
1716 struct ieee80211_sub_if_data *sdata = 1716 struct ieee80211_sub_if_data *sdata =
1717 (struct ieee80211_sub_if_data *) data; 1717 from_timer(sdata, t, u.ibss.timer);
1718 1718
1719 ieee80211_queue_work(&sdata->local->hw, &sdata->work); 1719 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
1720} 1720}
@@ -1723,8 +1723,7 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata)
1723{ 1723{
1724 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; 1724 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
1725 1725
1726 setup_timer(&ifibss->timer, ieee80211_ibss_timer, 1726 timer_setup(&ifibss->timer, ieee80211_ibss_timer, 0);
1727 (unsigned long) sdata);
1728 INIT_LIST_HEAD(&ifibss->incomplete_stations); 1727 INIT_LIST_HEAD(&ifibss->incomplete_stations);
1729 spin_lock_init(&ifibss->incomplete_lock); 1728 spin_lock_init(&ifibss->incomplete_lock);
1730 INIT_WORK(&ifibss->csa_connection_drop_work, 1729 INIT_WORK(&ifibss->csa_connection_drop_work,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9675814f64db..885d00b41911 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1057,6 +1057,7 @@ struct tpt_led_trigger {
1057 const struct ieee80211_tpt_blink *blink_table; 1057 const struct ieee80211_tpt_blink *blink_table;
1058 unsigned int blink_table_len; 1058 unsigned int blink_table_len;
1059 struct timer_list timer; 1059 struct timer_list timer;
1060 struct ieee80211_local *local;
1060 unsigned long prev_traffic; 1061 unsigned long prev_traffic;
1061 unsigned long tx_bytes, rx_bytes; 1062 unsigned long tx_bytes, rx_bytes;
1062 unsigned int active, want; 1063 unsigned int active, want;
@@ -1932,7 +1933,7 @@ static inline int ieee80211_ac_from_tid(int tid)
1932 1933
1933void ieee80211_dynamic_ps_enable_work(struct work_struct *work); 1934void ieee80211_dynamic_ps_enable_work(struct work_struct *work);
1934void ieee80211_dynamic_ps_disable_work(struct work_struct *work); 1935void ieee80211_dynamic_ps_disable_work(struct work_struct *work);
1935void ieee80211_dynamic_ps_timer(unsigned long data); 1936void ieee80211_dynamic_ps_timer(struct timer_list *t);
1936void ieee80211_send_nullfunc(struct ieee80211_local *local, 1937void ieee80211_send_nullfunc(struct ieee80211_local *local,
1937 struct ieee80211_sub_if_data *sdata, 1938 struct ieee80211_sub_if_data *sdata,
1938 bool powersave); 1939 bool powersave);
@@ -2009,6 +2010,8 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
2009 struct txq_info *txq, int tid); 2010 struct txq_info *txq, int tid);
2010void ieee80211_txq_purge(struct ieee80211_local *local, 2011void ieee80211_txq_purge(struct ieee80211_local *local,
2011 struct txq_info *txqi); 2012 struct txq_info *txqi);
2013void ieee80211_txq_remove_vlan(struct ieee80211_local *local,
2014 struct ieee80211_sub_if_data *sdata);
2012void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, 2015void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
2013 u16 transaction, u16 auth_alg, u16 status, 2016 u16 transaction, u16 auth_alg, u16 status,
2014 const u8 *extra, size_t extra_len, const u8 *bssid, 2017 const u8 *extra, size_t extra_len, const u8 *bssid,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index f75029abf728..13b16f90e1cf 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -793,9 +793,7 @@ static int ieee80211_open(struct net_device *dev)
793static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, 793static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
794 bool going_down) 794 bool going_down)
795{ 795{
796 struct ieee80211_sub_if_data *txq_sdata = sdata;
797 struct ieee80211_local *local = sdata->local; 796 struct ieee80211_local *local = sdata->local;
798 struct fq *fq = &local->fq;
799 unsigned long flags; 797 unsigned long flags;
800 struct sk_buff *skb, *tmp; 798 struct sk_buff *skb, *tmp;
801 u32 hw_reconf_flags = 0; 799 u32 hw_reconf_flags = 0;
@@ -939,9 +937,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
939 937
940 switch (sdata->vif.type) { 938 switch (sdata->vif.type) {
941 case NL80211_IFTYPE_AP_VLAN: 939 case NL80211_IFTYPE_AP_VLAN:
942 txq_sdata = container_of(sdata->bss,
943 struct ieee80211_sub_if_data, u.ap);
944
945 mutex_lock(&local->mtx); 940 mutex_lock(&local->mtx);
946 list_del(&sdata->u.vlan.list); 941 list_del(&sdata->u.vlan.list);
947 mutex_unlock(&local->mtx); 942 mutex_unlock(&local->mtx);
@@ -998,8 +993,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
998 skb_queue_purge(&sdata->skb_queue); 993 skb_queue_purge(&sdata->skb_queue);
999 } 994 }
1000 995
1001 sdata->bss = NULL;
1002
1003 spin_lock_irqsave(&local->queue_stop_reason_lock, flags); 996 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
1004 for (i = 0; i < IEEE80211_MAX_QUEUES; i++) { 997 for (i = 0; i < IEEE80211_MAX_QUEUES; i++) {
1005 skb_queue_walk_safe(&local->pending[i], skb, tmp) { 998 skb_queue_walk_safe(&local->pending[i], skb, tmp) {
@@ -1012,22 +1005,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
1012 } 1005 }
1013 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 1006 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
1014 1007
1015 if (txq_sdata->vif.txq) { 1008 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
1016 struct txq_info *txqi = to_txq_info(txq_sdata->vif.txq); 1009 ieee80211_txq_remove_vlan(local, sdata);
1017
1018 /*
1019 * FIXME FIXME
1020 *
1021 * We really shouldn't purge the *entire* txqi since that
1022 * contains frames for the other AP_VLANs (and possibly
1023 * the AP itself) as well, but there's no API in FQ now
1024 * to be able to filter.
1025 */
1026 1010
1027 spin_lock_bh(&fq->lock); 1011 sdata->bss = NULL;
1028 ieee80211_txq_purge(local, txqi);
1029 spin_unlock_bh(&fq->lock);
1030 }
1031 1012
1032 if (local->open_count == 0) 1013 if (local->open_count == 0)
1033 ieee80211_clear_tx_pending(local); 1014 ieee80211_clear_tx_pending(local);
@@ -1772,7 +1753,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
1772 sizeof(void *)); 1753 sizeof(void *));
1773 int txq_size = 0; 1754 int txq_size = 0;
1774 1755
1775 if (local->ops->wake_tx_queue) 1756 if (local->ops->wake_tx_queue &&
1757 type != NL80211_IFTYPE_AP_VLAN &&
1758 type != NL80211_IFTYPE_MONITOR)
1776 txq_size += sizeof(struct txq_info) + 1759 txq_size += sizeof(struct txq_info) +
1777 local->hw.txq_data_size; 1760 local->hw.txq_data_size;
1778 1761
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index a98fc2b5e0dc..938049395f90 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -4,7 +4,7 @@
4 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 4 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
5 * Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net> 5 * Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net>
6 * Copyright 2013-2014 Intel Mobile Communications GmbH 6 * Copyright 2013-2014 Intel Mobile Communications GmbH
7 * Copyright 2015 Intel Deutschland GmbH 7 * Copyright 2015-2017 Intel Deutschland GmbH
8 * 8 *
9 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as 10 * it under the terms of the GNU General Public License version 2 as
@@ -19,6 +19,7 @@
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/export.h> 20#include <linux/export.h>
21#include <net/mac80211.h> 21#include <net/mac80211.h>
22#include <crypto/algapi.h>
22#include <asm/unaligned.h> 23#include <asm/unaligned.h>
23#include "ieee80211_i.h" 24#include "ieee80211_i.h"
24#include "driver-ops.h" 25#include "driver-ops.h"
@@ -609,6 +610,39 @@ void ieee80211_key_free_unused(struct ieee80211_key *key)
609 ieee80211_key_free_common(key); 610 ieee80211_key_free_common(key);
610} 611}
611 612
613static bool ieee80211_key_identical(struct ieee80211_sub_if_data *sdata,
614 struct ieee80211_key *old,
615 struct ieee80211_key *new)
616{
617 u8 tkip_old[WLAN_KEY_LEN_TKIP], tkip_new[WLAN_KEY_LEN_TKIP];
618 u8 *tk_old, *tk_new;
619
620 if (!old || new->conf.keylen != old->conf.keylen)
621 return false;
622
623 tk_old = old->conf.key;
624 tk_new = new->conf.key;
625
626 /*
627 * In station mode, don't compare the TX MIC key, as it's never used
628 * and offloaded rekeying may not care to send it to the host. This
629 * is the case in iwlwifi, for example.
630 */
631 if (sdata->vif.type == NL80211_IFTYPE_STATION &&
632 new->conf.cipher == WLAN_CIPHER_SUITE_TKIP &&
633 new->conf.keylen == WLAN_KEY_LEN_TKIP &&
634 !(new->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) {
635 memcpy(tkip_old, tk_old, WLAN_KEY_LEN_TKIP);
636 memcpy(tkip_new, tk_new, WLAN_KEY_LEN_TKIP);
637 memset(tkip_old + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8);
638 memset(tkip_new + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8);
639 tk_old = tkip_old;
640 tk_new = tkip_new;
641 }
642
643 return !crypto_memneq(tk_old, tk_new, new->conf.keylen);
644}
645
612int ieee80211_key_link(struct ieee80211_key *key, 646int ieee80211_key_link(struct ieee80211_key *key,
613 struct ieee80211_sub_if_data *sdata, 647 struct ieee80211_sub_if_data *sdata,
614 struct sta_info *sta) 648 struct sta_info *sta)
@@ -620,9 +654,6 @@ int ieee80211_key_link(struct ieee80211_key *key,
620 654
621 pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE; 655 pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
622 idx = key->conf.keyidx; 656 idx = key->conf.keyidx;
623 key->local = sdata->local;
624 key->sdata = sdata;
625 key->sta = sta;
626 657
627 mutex_lock(&sdata->local->key_mtx); 658 mutex_lock(&sdata->local->key_mtx);
628 659
@@ -633,6 +664,20 @@ int ieee80211_key_link(struct ieee80211_key *key,
633 else 664 else
634 old_key = key_mtx_dereference(sdata->local, sdata->keys[idx]); 665 old_key = key_mtx_dereference(sdata->local, sdata->keys[idx]);
635 666
667 /*
668 * Silently accept key re-installation without really installing the
669 * new version of the key to avoid nonce reuse or replay issues.
670 */
671 if (ieee80211_key_identical(sdata, old_key, key)) {
672 ieee80211_key_free_unused(key);
673 ret = 0;
674 goto out;
675 }
676
677 key->local = sdata->local;
678 key->sdata = sdata;
679 key->sta = sta;
680
636 increment_tailroom_need_count(sdata); 681 increment_tailroom_need_count(sdata);
637 682
638 ieee80211_key_replace(sdata, sta, pairwise, old_key, key); 683 ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
@@ -648,6 +693,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
648 ret = 0; 693 ret = 0;
649 } 694 }
650 695
696 out:
651 mutex_unlock(&sdata->local->key_mtx); 697 mutex_unlock(&sdata->local->key_mtx);
652 698
653 return ret; 699 return ret;
diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index 0505845b7ab8..ba0b507ea691 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -248,10 +248,10 @@ static unsigned long tpt_trig_traffic(struct ieee80211_local *local,
248 return DIV_ROUND_UP(delta, 1024 / 8); 248 return DIV_ROUND_UP(delta, 1024 / 8);
249} 249}
250 250
251static void tpt_trig_timer(unsigned long data) 251static void tpt_trig_timer(struct timer_list *t)
252{ 252{
253 struct ieee80211_local *local = (void *)data; 253 struct tpt_led_trigger *tpt_trig = from_timer(tpt_trig, t, timer);
254 struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger; 254 struct ieee80211_local *local = tpt_trig->local;
255 struct led_classdev *led_cdev; 255 struct led_classdev *led_cdev;
256 unsigned long on, off, tpt; 256 unsigned long on, off, tpt;
257 int i; 257 int i;
@@ -306,8 +306,9 @@ __ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw,
306 tpt_trig->blink_table = blink_table; 306 tpt_trig->blink_table = blink_table;
307 tpt_trig->blink_table_len = blink_table_len; 307 tpt_trig->blink_table_len = blink_table_len;
308 tpt_trig->want = flags; 308 tpt_trig->want = flags;
309 tpt_trig->local = local;
309 310
310 setup_timer(&tpt_trig->timer, tpt_trig_timer, (unsigned long)local); 311 timer_setup(&tpt_trig->timer, tpt_trig_timer, 0);
311 312
312 local->tpt_led_trigger = tpt_trig; 313 local->tpt_led_trigger = tpt_trig;
313 314
@@ -326,7 +327,7 @@ static void ieee80211_start_tpt_led_trig(struct ieee80211_local *local)
326 tpt_trig_traffic(local, tpt_trig); 327 tpt_trig_traffic(local, tpt_trig);
327 tpt_trig->running = true; 328 tpt_trig->running = true;
328 329
329 tpt_trig_timer((unsigned long)local); 330 tpt_trig_timer(&tpt_trig->timer);
330 mod_timer(&tpt_trig->timer, round_jiffies(jiffies + HZ)); 331 mod_timer(&tpt_trig->timer, round_jiffies(jiffies + HZ));
331} 332}
332 333
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 8aa1f5b6a051..e054a2fd8d38 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -633,8 +633,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
633 ieee80211_dynamic_ps_enable_work); 633 ieee80211_dynamic_ps_enable_work);
634 INIT_WORK(&local->dynamic_ps_disable_work, 634 INIT_WORK(&local->dynamic_ps_disable_work,
635 ieee80211_dynamic_ps_disable_work); 635 ieee80211_dynamic_ps_disable_work);
636 setup_timer(&local->dynamic_ps_timer, 636 timer_setup(&local->dynamic_ps_timer, ieee80211_dynamic_ps_timer, 0);
637 ieee80211_dynamic_ps_timer, (unsigned long) local);
638 637
639 INIT_WORK(&local->sched_scan_stopped_work, 638 INIT_WORK(&local->sched_scan_stopped_work,
640 ieee80211_sched_scan_stopped_work); 639 ieee80211_sched_scan_stopped_work);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index a550c707cd8a..5e27364e10ac 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -37,9 +37,10 @@ void ieee80211s_stop(void)
37 kmem_cache_destroy(rm_cache); 37 kmem_cache_destroy(rm_cache);
38} 38}
39 39
40static void ieee80211_mesh_housekeeping_timer(unsigned long data) 40static void ieee80211_mesh_housekeeping_timer(struct timer_list *t)
41{ 41{
42 struct ieee80211_sub_if_data *sdata = (void *) data; 42 struct ieee80211_sub_if_data *sdata =
43 from_timer(sdata, t, u.mesh.housekeeping_timer);
43 struct ieee80211_local *local = sdata->local; 44 struct ieee80211_local *local = sdata->local;
44 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 45 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
45 46
@@ -528,18 +529,18 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
528 return 0; 529 return 0;
529} 530}
530 531
531static void ieee80211_mesh_path_timer(unsigned long data) 532static void ieee80211_mesh_path_timer(struct timer_list *t)
532{ 533{
533 struct ieee80211_sub_if_data *sdata = 534 struct ieee80211_sub_if_data *sdata =
534 (struct ieee80211_sub_if_data *) data; 535 from_timer(sdata, t, u.mesh.mesh_path_timer);
535 536
536 ieee80211_queue_work(&sdata->local->hw, &sdata->work); 537 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
537} 538}
538 539
539static void ieee80211_mesh_path_root_timer(unsigned long data) 540static void ieee80211_mesh_path_root_timer(struct timer_list *t)
540{ 541{
541 struct ieee80211_sub_if_data *sdata = 542 struct ieee80211_sub_if_data *sdata =
542 (struct ieee80211_sub_if_data *) data; 543 from_timer(sdata, t, u.mesh.mesh_path_root_timer);
543 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 544 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
544 545
545 set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags); 546 set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags);
@@ -675,8 +676,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
675 enum nl80211_band band; 676 enum nl80211_band band;
676 u8 *pos; 677 u8 *pos;
677 struct ieee80211_sub_if_data *sdata; 678 struct ieee80211_sub_if_data *sdata;
678 int hdr_len = offsetof(struct ieee80211_mgmt, u.beacon) + 679 int hdr_len = offsetofend(struct ieee80211_mgmt, u.beacon);
679 sizeof(mgmt->u.beacon);
680 680
681 sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); 681 sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh);
682 rcu_read_lock(); 682 rcu_read_lock();
@@ -1443,9 +1443,8 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
1443 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 1443 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
1444 static u8 zero_addr[ETH_ALEN] = {}; 1444 static u8 zero_addr[ETH_ALEN] = {};
1445 1445
1446 setup_timer(&ifmsh->housekeeping_timer, 1446 timer_setup(&ifmsh->housekeeping_timer,
1447 ieee80211_mesh_housekeeping_timer, 1447 ieee80211_mesh_housekeeping_timer, 0);
1448 (unsigned long) sdata);
1449 1448
1450 ifmsh->accepting_plinks = true; 1449 ifmsh->accepting_plinks = true;
1451 atomic_set(&ifmsh->mpaths, 0); 1450 atomic_set(&ifmsh->mpaths, 0);
@@ -1459,12 +1458,9 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
1459 1458
1460 mesh_pathtbl_init(sdata); 1459 mesh_pathtbl_init(sdata);
1461 1460
1462 setup_timer(&ifmsh->mesh_path_timer, 1461 timer_setup(&ifmsh->mesh_path_timer, ieee80211_mesh_path_timer, 0);
1463 ieee80211_mesh_path_timer, 1462 timer_setup(&ifmsh->mesh_path_root_timer,
1464 (unsigned long) sdata); 1463 ieee80211_mesh_path_root_timer, 0);
1465 setup_timer(&ifmsh->mesh_path_root_timer,
1466 ieee80211_mesh_path_root_timer,
1467 (unsigned long) sdata);
1468 INIT_LIST_HEAD(&ifmsh->preq_queue.list); 1464 INIT_LIST_HEAD(&ifmsh->preq_queue.list);
1469 skb_queue_head_init(&ifmsh->ps.bc_buf); 1465 skb_queue_head_init(&ifmsh->ps.bc_buf);
1470 spin_lock_init(&ifmsh->mesh_preq_queue_lock); 1466 spin_lock_init(&ifmsh->mesh_preq_queue_lock);
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 7e5f271e3c30..ee56f18cad3f 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -275,6 +275,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
275 u8 *hw_addr, struct ieee802_11_elems *ie); 275 u8 *hw_addr, struct ieee802_11_elems *ie);
276bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie); 276bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie);
277u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); 277u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata);
278void mesh_plink_timer(struct timer_list *t);
278void mesh_plink_broken(struct sta_info *sta); 279void mesh_plink_broken(struct sta_info *sta);
279u32 mesh_plink_deactivate(struct sta_info *sta); 280u32 mesh_plink_deactivate(struct sta_info *sta);
280u32 mesh_plink_open(struct sta_info *sta); 281u32 mesh_plink_open(struct sta_info *sta);
@@ -295,7 +296,7 @@ void mesh_path_tx_pending(struct mesh_path *mpath);
295int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata); 296int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata);
296void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata); 297void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata);
297int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr); 298int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr);
298void mesh_path_timer(unsigned long data); 299void mesh_path_timer(struct timer_list *t);
299void mesh_path_flush_by_nexthop(struct sta_info *sta); 300void mesh_path_flush_by_nexthop(struct sta_info *sta);
300void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, 301void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata,
301 struct sk_buff *skb); 302 struct sk_buff *skb);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index d8bbd0d2225a..4f7826d7b47c 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -111,8 +111,8 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
111 struct sk_buff *skb; 111 struct sk_buff *skb;
112 struct ieee80211_mgmt *mgmt; 112 struct ieee80211_mgmt *mgmt;
113 u8 *pos, ie_len; 113 u8 *pos, ie_len;
114 int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) + 114 int hdr_len = offsetofend(struct ieee80211_mgmt,
115 sizeof(mgmt->u.action.u.mesh_action); 115 u.action.u.mesh_action);
116 116
117 skb = dev_alloc_skb(local->tx_headroom + 117 skb = dev_alloc_skb(local->tx_headroom +
118 hdr_len + 118 hdr_len +
@@ -242,8 +242,8 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata,
242 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 242 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
243 struct ieee80211_mgmt *mgmt; 243 struct ieee80211_mgmt *mgmt;
244 u8 *pos, ie_len; 244 u8 *pos, ie_len;
245 int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) + 245 int hdr_len = offsetofend(struct ieee80211_mgmt,
246 sizeof(mgmt->u.action.u.mesh_action); 246 u.action.u.mesh_action);
247 247
248 if (time_before(jiffies, ifmsh->next_perr)) 248 if (time_before(jiffies, ifmsh->next_perr))
249 return -EAGAIN; 249 return -EAGAIN;
@@ -1194,9 +1194,9 @@ endlookup:
1194 return err; 1194 return err;
1195} 1195}
1196 1196
1197void mesh_path_timer(unsigned long data) 1197void mesh_path_timer(struct timer_list *t)
1198{ 1198{
1199 struct mesh_path *mpath = (void *) data; 1199 struct mesh_path *mpath = from_timer(mpath, t, timer);
1200 struct ieee80211_sub_if_data *sdata = mpath->sdata; 1200 struct ieee80211_sub_if_data *sdata = mpath->sdata;
1201 int ret; 1201 int ret;
1202 1202
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 97269caafecd..86c8dfef56a4 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -399,8 +399,7 @@ struct mesh_path *mesh_path_new(struct ieee80211_sub_if_data *sdata,
399 skb_queue_head_init(&new_mpath->frame_queue); 399 skb_queue_head_init(&new_mpath->frame_queue);
400 new_mpath->exp_time = jiffies; 400 new_mpath->exp_time = jiffies;
401 spin_lock_init(&new_mpath->state_lock); 401 spin_lock_init(&new_mpath->state_lock);
402 setup_timer(&new_mpath->timer, mesh_path_timer, 402 timer_setup(&new_mpath->timer, mesh_path_timer, 0);
403 (unsigned long) new_mpath);
404 403
405 return new_mpath; 404 return new_mpath;
406} 405}
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index f69c6c38ca43..e2d00cce3c17 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -220,8 +220,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
220 bool include_plid = false; 220 bool include_plid = false;
221 u16 peering_proto = 0; 221 u16 peering_proto = 0;
222 u8 *pos, ie_len = 4; 222 u8 *pos, ie_len = 4;
223 int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.self_prot) + 223 int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.self_prot);
224 sizeof(mgmt->u.action.u.self_prot);
225 int err = -ENOMEM; 224 int err = -ENOMEM;
226 225
227 skb = dev_alloc_skb(local->tx_headroom + 226 skb = dev_alloc_skb(local->tx_headroom +
@@ -604,8 +603,9 @@ out:
604 ieee80211_mbss_info_change_notify(sdata, changed); 603 ieee80211_mbss_info_change_notify(sdata, changed);
605} 604}
606 605
607static void mesh_plink_timer(unsigned long data) 606void mesh_plink_timer(struct timer_list *t)
608{ 607{
608 struct mesh_sta *mesh = from_timer(mesh, t, plink_timer);
609 struct sta_info *sta; 609 struct sta_info *sta;
610 u16 reason = 0; 610 u16 reason = 0;
611 struct ieee80211_sub_if_data *sdata; 611 struct ieee80211_sub_if_data *sdata;
@@ -617,7 +617,7 @@ static void mesh_plink_timer(unsigned long data)
617 * del_timer_sync() this timer after having made sure 617 * del_timer_sync() this timer after having made sure
618 * it cannot be readded (by deleting the plink.) 618 * it cannot be readded (by deleting the plink.)
619 */ 619 */
620 sta = (struct sta_info *) data; 620 sta = mesh->plink_sta;
621 621
622 if (sta->sdata->local->quiescing) 622 if (sta->sdata->local->quiescing)
623 return; 623 return;
@@ -697,11 +697,8 @@ static void mesh_plink_timer(unsigned long data)
697 697
698static inline void mesh_plink_timer_set(struct sta_info *sta, u32 timeout) 698static inline void mesh_plink_timer_set(struct sta_info *sta, u32 timeout)
699{ 699{
700 sta->mesh->plink_timer.expires = jiffies + msecs_to_jiffies(timeout);
701 sta->mesh->plink_timer.data = (unsigned long) sta;
702 sta->mesh->plink_timer.function = mesh_plink_timer;
703 sta->mesh->plink_timeout = timeout; 700 sta->mesh->plink_timeout = timeout;
704 add_timer(&sta->mesh->plink_timer); 701 mod_timer(&sta->mesh->plink_timer, jiffies + msecs_to_jiffies(timeout));
705} 702}
706 703
707static bool llid_in_use(struct ieee80211_sub_if_data *sdata, 704static bool llid_in_use(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 3b8e2709d8de..04460440d731 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -145,7 +145,6 @@ static u32
145ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, 145ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
146 struct ieee80211_supported_band *sband, 146 struct ieee80211_supported_band *sband,
147 struct ieee80211_channel *channel, 147 struct ieee80211_channel *channel,
148 const struct ieee80211_ht_cap *ht_cap,
149 const struct ieee80211_ht_operation *ht_oper, 148 const struct ieee80211_ht_operation *ht_oper,
150 const struct ieee80211_vht_operation *vht_oper, 149 const struct ieee80211_vht_operation *vht_oper,
151 struct cfg80211_chan_def *chandef, bool tracking) 150 struct cfg80211_chan_def *chandef, bool tracking)
@@ -163,20 +162,13 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
163 chandef->center_freq1 = channel->center_freq; 162 chandef->center_freq1 = channel->center_freq;
164 chandef->center_freq2 = 0; 163 chandef->center_freq2 = 0;
165 164
166 if (!ht_cap || !ht_oper || !sta_ht_cap.ht_supported) { 165 if (!ht_oper || !sta_ht_cap.ht_supported) {
167 ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; 166 ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
168 goto out; 167 goto out;
169 } 168 }
170 169
171 chandef->width = NL80211_CHAN_WIDTH_20; 170 chandef->width = NL80211_CHAN_WIDTH_20;
172 171
173 if (!(ht_cap->cap_info &
174 cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40))) {
175 ret = IEEE80211_STA_DISABLE_40MHZ;
176 vht_chandef = *chandef;
177 goto out;
178 }
179
180 ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan, 172 ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan,
181 channel->band); 173 channel->band);
182 /* check that channel matches the right operating channel */ 174 /* check that channel matches the right operating channel */
@@ -344,7 +336,7 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
344 336
345 /* calculate new channel (type) based on HT/VHT operation IEs */ 337 /* calculate new channel (type) based on HT/VHT operation IEs */
346 flags = ieee80211_determine_chantype(sdata, sband, chan, 338 flags = ieee80211_determine_chantype(sdata, sband, chan,
347 ht_cap, ht_oper, vht_oper, 339 ht_oper, vht_oper,
348 &chandef, true); 340 &chandef, true);
349 341
350 /* 342 /*
@@ -780,11 +772,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
780 WLAN_EID_SUPPORTED_REGULATORY_CLASSES, 772 WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
781 WLAN_EID_HT_CAPABILITY, 773 WLAN_EID_HT_CAPABILITY,
782 WLAN_EID_BSS_COEX_2040, 774 WLAN_EID_BSS_COEX_2040,
775 /* luckily this is almost always there */
783 WLAN_EID_EXT_CAPABILITY, 776 WLAN_EID_EXT_CAPABILITY,
784 WLAN_EID_QOS_TRAFFIC_CAPA, 777 WLAN_EID_QOS_TRAFFIC_CAPA,
785 WLAN_EID_TIM_BCAST_REQ, 778 WLAN_EID_TIM_BCAST_REQ,
786 WLAN_EID_INTERWORKING, 779 WLAN_EID_INTERWORKING,
787 /* 60GHz doesn't happen right now */ 780 /* 60 GHz (Multi-band, DMG, MMS) can't happen */
788 WLAN_EID_VHT_CAPABILITY, 781 WLAN_EID_VHT_CAPABILITY,
789 WLAN_EID_OPMODE_NOTIF, 782 WLAN_EID_OPMODE_NOTIF,
790 }; 783 };
@@ -811,22 +804,16 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
811 /* if present, add any custom IEs that go before VHT */ 804 /* if present, add any custom IEs that go before VHT */
812 if (assoc_data->ie_len) { 805 if (assoc_data->ie_len) {
813 static const u8 before_vht[] = { 806 static const u8 before_vht[] = {
814 WLAN_EID_SSID, 807 /*
815 WLAN_EID_SUPP_RATES, 808 * no need to list the ones split off before HT
816 WLAN_EID_EXT_SUPP_RATES, 809 * or generated here
817 WLAN_EID_PWR_CAPABILITY, 810 */
818 WLAN_EID_SUPPORTED_CHANNELS,
819 WLAN_EID_RSN,
820 WLAN_EID_QOS_CAPA,
821 WLAN_EID_RRM_ENABLED_CAPABILITIES,
822 WLAN_EID_MOBILITY_DOMAIN,
823 WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
824 WLAN_EID_HT_CAPABILITY,
825 WLAN_EID_BSS_COEX_2040, 811 WLAN_EID_BSS_COEX_2040,
826 WLAN_EID_EXT_CAPABILITY, 812 WLAN_EID_EXT_CAPABILITY,
827 WLAN_EID_QOS_TRAFFIC_CAPA, 813 WLAN_EID_QOS_TRAFFIC_CAPA,
828 WLAN_EID_TIM_BCAST_REQ, 814 WLAN_EID_TIM_BCAST_REQ,
829 WLAN_EID_INTERWORKING, 815 WLAN_EID_INTERWORKING,
816 /* 60 GHz (Multi-band, DMG, MMS) can't happen */
830 }; 817 };
831 818
832 /* RIC already taken above, so no need to handle here anymore */ 819 /* RIC already taken above, so no need to handle here anymore */
@@ -1079,10 +1066,10 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success)
1079} 1066}
1080EXPORT_SYMBOL(ieee80211_chswitch_done); 1067EXPORT_SYMBOL(ieee80211_chswitch_done);
1081 1068
1082static void ieee80211_chswitch_timer(unsigned long data) 1069static void ieee80211_chswitch_timer(struct timer_list *t)
1083{ 1070{
1084 struct ieee80211_sub_if_data *sdata = 1071 struct ieee80211_sub_if_data *sdata =
1085 (struct ieee80211_sub_if_data *) data; 1072 from_timer(sdata, t, u.mgd.chswitch_timer);
1086 1073
1087 ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.chswitch_work); 1074 ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.chswitch_work);
1088} 1075}
@@ -1590,9 +1577,9 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
1590 } 1577 }
1591} 1578}
1592 1579
1593void ieee80211_dynamic_ps_timer(unsigned long data) 1580void ieee80211_dynamic_ps_timer(struct timer_list *t)
1594{ 1581{
1595 struct ieee80211_local *local = (void *) data; 1582 struct ieee80211_local *local = from_timer(local, t, dynamic_ps_timer);
1596 1583
1597 ieee80211_queue_work(&local->hw, &local->dynamic_ps_enable_work); 1584 ieee80211_queue_work(&local->hw, &local->dynamic_ps_enable_work);
1598} 1585}
@@ -3724,10 +3711,10 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
3724 sdata_unlock(sdata); 3711 sdata_unlock(sdata);
3725} 3712}
3726 3713
3727static void ieee80211_sta_timer(unsigned long data) 3714static void ieee80211_sta_timer(struct timer_list *t)
3728{ 3715{
3729 struct ieee80211_sub_if_data *sdata = 3716 struct ieee80211_sub_if_data *sdata =
3730 (struct ieee80211_sub_if_data *) data; 3717 from_timer(sdata, t, u.mgd.timer);
3731 3718
3732 ieee80211_queue_work(&sdata->local->hw, &sdata->work); 3719 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
3733} 3720}
@@ -4004,10 +3991,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
4004 sdata_unlock(sdata); 3991 sdata_unlock(sdata);
4005} 3992}
4006 3993
4007static void ieee80211_sta_bcn_mon_timer(unsigned long data) 3994static void ieee80211_sta_bcn_mon_timer(struct timer_list *t)
4008{ 3995{
4009 struct ieee80211_sub_if_data *sdata = 3996 struct ieee80211_sub_if_data *sdata =
4010 (struct ieee80211_sub_if_data *) data; 3997 from_timer(sdata, t, u.mgd.bcn_mon_timer);
4011 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 3998 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
4012 3999
4013 if (sdata->vif.csa_active && !ifmgd->csa_waiting_bcn) 4000 if (sdata->vif.csa_active && !ifmgd->csa_waiting_bcn)
@@ -4018,10 +4005,10 @@ static void ieee80211_sta_bcn_mon_timer(unsigned long data)
4018 &sdata->u.mgd.beacon_connection_loss_work); 4005 &sdata->u.mgd.beacon_connection_loss_work);
4019} 4006}
4020 4007
4021static void ieee80211_sta_conn_mon_timer(unsigned long data) 4008static void ieee80211_sta_conn_mon_timer(struct timer_list *t)
4022{ 4009{
4023 struct ieee80211_sub_if_data *sdata = 4010 struct ieee80211_sub_if_data *sdata =
4024 (struct ieee80211_sub_if_data *) data; 4011 from_timer(sdata, t, u.mgd.conn_mon_timer);
4025 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 4012 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
4026 struct ieee80211_local *local = sdata->local; 4013 struct ieee80211_local *local = sdata->local;
4027 4014
@@ -4152,14 +4139,10 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
4152 INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_mgd_work); 4139 INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_mgd_work);
4153 INIT_DELAYED_WORK(&ifmgd->tdls_peer_del_work, 4140 INIT_DELAYED_WORK(&ifmgd->tdls_peer_del_work,
4154 ieee80211_tdls_peer_del_work); 4141 ieee80211_tdls_peer_del_work);
4155 setup_timer(&ifmgd->timer, ieee80211_sta_timer, 4142 timer_setup(&ifmgd->timer, ieee80211_sta_timer, 0);
4156 (unsigned long) sdata); 4143 timer_setup(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, 0);
4157 setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, 4144 timer_setup(&ifmgd->conn_mon_timer, ieee80211_sta_conn_mon_timer, 0);
4158 (unsigned long) sdata); 4145 timer_setup(&ifmgd->chswitch_timer, ieee80211_chswitch_timer, 0);
4159 setup_timer(&ifmgd->conn_mon_timer, ieee80211_sta_conn_mon_timer,
4160 (unsigned long) sdata);
4161 setup_timer(&ifmgd->chswitch_timer, ieee80211_chswitch_timer,
4162 (unsigned long) sdata);
4163 INIT_DELAYED_WORK(&ifmgd->tx_tspec_wk, 4146 INIT_DELAYED_WORK(&ifmgd->tx_tspec_wk,
4164 ieee80211_sta_handle_tspec_ac_params_wk); 4147 ieee80211_sta_handle_tspec_ac_params_wk);
4165 4148
@@ -4317,7 +4300,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
4317 4300
4318 ifmgd->flags |= ieee80211_determine_chantype(sdata, sband, 4301 ifmgd->flags |= ieee80211_determine_chantype(sdata, sband,
4319 cbss->channel, 4302 cbss->channel,
4320 ht_cap, ht_oper, vht_oper, 4303 ht_oper, vht_oper,
4321 &chandef, false); 4304 &chandef, false);
4322 4305
4323 sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss), 4306 sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss),
diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c
index 88e6ebbbe24f..d351dc1162be 100644
--- a/net/mac80211/ocb.c
+++ b/net/mac80211/ocb.c
@@ -150,9 +150,10 @@ void ieee80211_ocb_work(struct ieee80211_sub_if_data *sdata)
150 sdata_unlock(sdata); 150 sdata_unlock(sdata);
151} 151}
152 152
153static void ieee80211_ocb_housekeeping_timer(unsigned long data) 153static void ieee80211_ocb_housekeeping_timer(struct timer_list *t)
154{ 154{
155 struct ieee80211_sub_if_data *sdata = (void *)data; 155 struct ieee80211_sub_if_data *sdata =
156 from_timer(sdata, t, u.ocb.housekeeping_timer);
156 struct ieee80211_local *local = sdata->local; 157 struct ieee80211_local *local = sdata->local;
157 struct ieee80211_if_ocb *ifocb = &sdata->u.ocb; 158 struct ieee80211_if_ocb *ifocb = &sdata->u.ocb;
158 159
@@ -165,9 +166,8 @@ void ieee80211_ocb_setup_sdata(struct ieee80211_sub_if_data *sdata)
165{ 166{
166 struct ieee80211_if_ocb *ifocb = &sdata->u.ocb; 167 struct ieee80211_if_ocb *ifocb = &sdata->u.ocb;
167 168
168 setup_timer(&ifocb->housekeeping_timer, 169 timer_setup(&ifocb->housekeeping_timer,
169 ieee80211_ocb_housekeeping_timer, 170 ieee80211_ocb_housekeeping_timer, 0);
170 (unsigned long)sdata);
171 INIT_LIST_HEAD(&ifocb->incomplete_stations); 171 INIT_LIST_HEAD(&ifocb->incomplete_stations);
172 spin_lock_init(&ifocb->incomplete_lock); 172 spin_lock_init(&ifocb->incomplete_lock);
173} 173}
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index a87d195c4a61..38c45e1dafd8 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <net/mac80211.h> 2#include <net/mac80211.h>
2#include <net/rtnetlink.h> 3#include <net/rtnetlink.h>
3 4
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 47d2ed570470..ef2becaade50 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -7,7 +7,7 @@
7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net> 8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
9 * Copyright 2013-2015 Intel Mobile Communications GmbH 9 * Copyright 2013-2015 Intel Mobile Communications GmbH
10 * Copyright 2016 Intel Deutschland GmbH 10 * Copyright 2016-2017 Intel Deutschland GmbH
11 * 11 *
12 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2 as 13 * it under the terms of the GNU General Public License version 2 as
@@ -183,6 +183,20 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
183 return bss; 183 return bss;
184} 184}
185 185
186static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata,
187 u32 scan_flags, const u8 *da)
188{
189 if (!sdata)
190 return false;
191 /* accept broadcast for OCE */
192 if (scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP &&
193 is_broadcast_ether_addr(da))
194 return true;
195 if (scan_flags & NL80211_SCAN_FLAG_RANDOM_ADDR)
196 return true;
197 return ether_addr_equal(da, sdata->vif.addr);
198}
199
186void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) 200void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
187{ 201{
188 struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); 202 struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
@@ -208,19 +222,24 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
208 if (ieee80211_is_probe_resp(mgmt->frame_control)) { 222 if (ieee80211_is_probe_resp(mgmt->frame_control)) {
209 struct cfg80211_scan_request *scan_req; 223 struct cfg80211_scan_request *scan_req;
210 struct cfg80211_sched_scan_request *sched_scan_req; 224 struct cfg80211_sched_scan_request *sched_scan_req;
225 u32 scan_req_flags = 0, sched_scan_req_flags = 0;
211 226
212 scan_req = rcu_dereference(local->scan_req); 227 scan_req = rcu_dereference(local->scan_req);
213 sched_scan_req = rcu_dereference(local->sched_scan_req); 228 sched_scan_req = rcu_dereference(local->sched_scan_req);
214 229
215 /* ignore ProbeResp to foreign address unless scanning 230 if (scan_req)
216 * with randomised address 231 scan_req_flags = scan_req->flags;
232
233 if (sched_scan_req)
234 sched_scan_req_flags = sched_scan_req->flags;
235
236 /* ignore ProbeResp to foreign address or non-bcast (OCE)
237 * unless scanning with randomised address
217 */ 238 */
218 if (!(sdata1 && 239 if (!ieee80211_scan_accept_presp(sdata1, scan_req_flags,
219 (ether_addr_equal(mgmt->da, sdata1->vif.addr) || 240 mgmt->da) &&
220 scan_req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR)) && 241 !ieee80211_scan_accept_presp(sdata2, sched_scan_req_flags,
221 !(sdata2 && 242 mgmt->da))
222 (ether_addr_equal(mgmt->da, sdata2->vif.addr) ||
223 sched_scan_req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR)))
224 return; 243 return;
225 244
226 elements = mgmt->u.probe_resp.variable; 245 elements = mgmt->u.probe_resp.variable;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 69615016d5bf..0c5627f8a104 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -329,10 +329,12 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
329 sta->mesh = kzalloc(sizeof(*sta->mesh), gfp); 329 sta->mesh = kzalloc(sizeof(*sta->mesh), gfp);
330 if (!sta->mesh) 330 if (!sta->mesh)
331 goto free; 331 goto free;
332 sta->mesh->plink_sta = sta;
332 spin_lock_init(&sta->mesh->plink_lock); 333 spin_lock_init(&sta->mesh->plink_lock);
333 if (ieee80211_vif_is_mesh(&sdata->vif) && 334 if (ieee80211_vif_is_mesh(&sdata->vif) &&
334 !sdata->u.mesh.user_mpm) 335 !sdata->u.mesh.user_mpm)
335 init_timer(&sta->mesh->plink_timer); 336 timer_setup(&sta->mesh->plink_timer, mesh_plink_timer,
337 0);
336 sta->mesh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE; 338 sta->mesh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
337 } 339 }
338#endif 340#endif
@@ -377,14 +379,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
377 if (sta_prepare_rate_control(local, sta, gfp)) 379 if (sta_prepare_rate_control(local, sta, gfp))
378 goto free_txq; 380 goto free_txq;
379 381
380 for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
381 /*
382 * timer_to_tid must be initialized with identity mapping
383 * to enable session_timer's data differentiation. See
384 * sta_rx_agg_session_timer_expired for usage.
385 */
386 sta->timer_to_tid[i] = i;
387 }
388 for (i = 0; i < IEEE80211_NUM_ACS; i++) { 382 for (i = 0; i < IEEE80211_NUM_ACS; i++) {
389 skb_queue_head_init(&sta->ps_tx_buf[i]); 383 skb_queue_head_init(&sta->ps_tx_buf[i]);
390 skb_queue_head_init(&sta->tx_filtered[i]); 384 skb_queue_head_init(&sta->tx_filtered[i]);
@@ -515,6 +509,31 @@ static int sta_info_insert_drv_state(struct ieee80211_local *local,
515 return err; 509 return err;
516} 510}
517 511
512static void
513ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
514{
515 struct ieee80211_local *local = sdata->local;
516 bool allow_p2p_go_ps = sdata->vif.p2p;
517 struct sta_info *sta;
518
519 rcu_read_lock();
520 list_for_each_entry_rcu(sta, &local->sta_list, list) {
521 if (sdata != sta->sdata ||
522 !test_sta_flag(sta, WLAN_STA_ASSOC))
523 continue;
524 if (!sta->sta.support_p2p_ps) {
525 allow_p2p_go_ps = false;
526 break;
527 }
528 }
529 rcu_read_unlock();
530
531 if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) {
532 sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps;
533 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS);
534 }
535}
536
518/* 537/*
519 * should be called with sta_mtx locked 538 * should be called with sta_mtx locked
520 * this function replaces the mutex lock 539 * this function replaces the mutex lock
@@ -561,6 +580,13 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
561 goto out_remove; 580 goto out_remove;
562 581
563 set_sta_flag(sta, WLAN_STA_INSERTED); 582 set_sta_flag(sta, WLAN_STA_INSERTED);
583
584 if (sta->sta_state >= IEEE80211_STA_ASSOC) {
585 ieee80211_recalc_min_chandef(sta->sdata);
586 if (!sta->sta.support_p2p_ps)
587 ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
588 }
589
564 /* accept BA sessions now */ 590 /* accept BA sessions now */
565 clear_sta_flag(sta, WLAN_STA_BLOCK_BA); 591 clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
566 592
@@ -1030,9 +1056,9 @@ int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata,
1030 return ret; 1056 return ret;
1031} 1057}
1032 1058
1033static void sta_info_cleanup(unsigned long data) 1059static void sta_info_cleanup(struct timer_list *t)
1034{ 1060{
1035 struct ieee80211_local *local = (struct ieee80211_local *) data; 1061 struct ieee80211_local *local = from_timer(local, t, sta_cleanup);
1036 struct sta_info *sta; 1062 struct sta_info *sta;
1037 bool timer_needed = false; 1063 bool timer_needed = false;
1038 1064
@@ -1064,8 +1090,7 @@ int sta_info_init(struct ieee80211_local *local)
1064 mutex_init(&local->sta_mtx); 1090 mutex_init(&local->sta_mtx);
1065 INIT_LIST_HEAD(&local->sta_list); 1091 INIT_LIST_HEAD(&local->sta_list);
1066 1092
1067 setup_timer(&local->sta_cleanup, sta_info_cleanup, 1093 timer_setup(&local->sta_cleanup, sta_info_cleanup, 0);
1068 (unsigned long)local);
1069 return 0; 1094 return 0;
1070} 1095}
1071 1096
@@ -1788,31 +1813,6 @@ void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,
1788} 1813}
1789EXPORT_SYMBOL(ieee80211_sta_set_buffered); 1814EXPORT_SYMBOL(ieee80211_sta_set_buffered);
1790 1815
1791static void
1792ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
1793{
1794 struct ieee80211_local *local = sdata->local;
1795 bool allow_p2p_go_ps = sdata->vif.p2p;
1796 struct sta_info *sta;
1797
1798 rcu_read_lock();
1799 list_for_each_entry_rcu(sta, &local->sta_list, list) {
1800 if (sdata != sta->sdata ||
1801 !test_sta_flag(sta, WLAN_STA_ASSOC))
1802 continue;
1803 if (!sta->sta.support_p2p_ps) {
1804 allow_p2p_go_ps = false;
1805 break;
1806 }
1807 }
1808 rcu_read_unlock();
1809
1810 if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) {
1811 sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps;
1812 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS);
1813 }
1814}
1815
1816int sta_info_move_state(struct sta_info *sta, 1816int sta_info_move_state(struct sta_info *sta,
1817 enum ieee80211_sta_state new_state) 1817 enum ieee80211_sta_state new_state)
1818{ 1818{
@@ -2008,7 +2008,7 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
2008 2008
2009static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) 2009static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
2010{ 2010{
2011 u16 rate = ACCESS_ONCE(sta_get_last_rx_stats(sta)->last_rate); 2011 u16 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate);
2012 2012
2013 if (rate == STA_STATS_RATE_INVALID) 2013 if (rate == STA_STATS_RATE_INVALID)
2014 return -EINVAL; 2014 return -EINVAL;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 3acbdfa9f649..cd53619435b6 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -126,6 +126,8 @@ enum ieee80211_agg_stop_reason {
126 AGG_STOP_DESTROY_STA, 126 AGG_STOP_DESTROY_STA,
127}; 127};
128 128
129struct sta_info;
130
129/** 131/**
130 * struct tid_ampdu_tx - TID aggregation information (Tx). 132 * struct tid_ampdu_tx - TID aggregation information (Tx).
131 * 133 *
@@ -133,8 +135,10 @@ enum ieee80211_agg_stop_reason {
133 * @session_timer: check if we keep Tx-ing on the TID (by timeout value) 135 * @session_timer: check if we keep Tx-ing on the TID (by timeout value)
134 * @addba_resp_timer: timer for peer's response to addba request 136 * @addba_resp_timer: timer for peer's response to addba request
135 * @pending: pending frames queue -- use sta's spinlock to protect 137 * @pending: pending frames queue -- use sta's spinlock to protect
138 * @sta: station we are attached to
136 * @dialog_token: dialog token for aggregation session 139 * @dialog_token: dialog token for aggregation session
137 * @timeout: session timeout value to be filled in ADDBA requests 140 * @timeout: session timeout value to be filled in ADDBA requests
141 * @tid: TID number
138 * @state: session state (see above) 142 * @state: session state (see above)
139 * @last_tx: jiffies of last tx activity 143 * @last_tx: jiffies of last tx activity
140 * @stop_initiator: initiator of a session stop 144 * @stop_initiator: initiator of a session stop
@@ -158,6 +162,7 @@ struct tid_ampdu_tx {
158 struct timer_list session_timer; 162 struct timer_list session_timer;
159 struct timer_list addba_resp_timer; 163 struct timer_list addba_resp_timer;
160 struct sk_buff_head pending; 164 struct sk_buff_head pending;
165 struct sta_info *sta;
161 unsigned long state; 166 unsigned long state;
162 unsigned long last_tx; 167 unsigned long last_tx;
163 u16 timeout; 168 u16 timeout;
@@ -169,6 +174,7 @@ struct tid_ampdu_tx {
169 u16 failed_bar_ssn; 174 u16 failed_bar_ssn;
170 bool bar_pending; 175 bool bar_pending;
171 bool amsdu; 176 bool amsdu;
177 u8 tid;
172}; 178};
173 179
174/** 180/**
@@ -181,12 +187,14 @@ struct tid_ampdu_tx {
181 * @reorder_time: jiffies when skb was added 187 * @reorder_time: jiffies when skb was added
182 * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value) 188 * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value)
183 * @reorder_timer: releases expired frames from the reorder buffer. 189 * @reorder_timer: releases expired frames from the reorder buffer.
190 * @sta: station we are attached to
184 * @last_rx: jiffies of last rx activity 191 * @last_rx: jiffies of last rx activity
185 * @head_seq_num: head sequence number in reordering buffer. 192 * @head_seq_num: head sequence number in reordering buffer.
186 * @stored_mpdu_num: number of MPDUs in reordering buffer 193 * @stored_mpdu_num: number of MPDUs in reordering buffer
187 * @ssn: Starting Sequence Number expected to be aggregated. 194 * @ssn: Starting Sequence Number expected to be aggregated.
188 * @buf_size: buffer size for incoming A-MPDUs 195 * @buf_size: buffer size for incoming A-MPDUs
189 * @timeout: reset timer value (in TUs). 196 * @timeout: reset timer value (in TUs).
197 * @tid: TID number
190 * @rcu_head: RCU head used for freeing this struct 198 * @rcu_head: RCU head used for freeing this struct
191 * @reorder_lock: serializes access to reorder buffer, see below. 199 * @reorder_lock: serializes access to reorder buffer, see below.
192 * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and 200 * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and
@@ -208,6 +216,7 @@ struct tid_ampdu_rx {
208 u64 reorder_buf_filtered; 216 u64 reorder_buf_filtered;
209 struct sk_buff_head *reorder_buf; 217 struct sk_buff_head *reorder_buf;
210 unsigned long *reorder_time; 218 unsigned long *reorder_time;
219 struct sta_info *sta;
211 struct timer_list session_timer; 220 struct timer_list session_timer;
212 struct timer_list reorder_timer; 221 struct timer_list reorder_timer;
213 unsigned long last_rx; 222 unsigned long last_rx;
@@ -216,6 +225,7 @@ struct tid_ampdu_rx {
216 u16 ssn; 225 u16 ssn;
217 u16 buf_size; 226 u16 buf_size;
218 u16 timeout; 227 u16 timeout;
228 u8 tid;
219 u8 auto_seq:1, 229 u8 auto_seq:1,
220 removed:1, 230 removed:1,
221 started:1; 231 started:1;
@@ -344,6 +354,7 @@ DECLARE_EWMA(mesh_fail_avg, 20, 8)
344 * @plink_state: peer link state 354 * @plink_state: peer link state
345 * @plink_timeout: timeout of peer link 355 * @plink_timeout: timeout of peer link
346 * @plink_timer: peer link watch timer 356 * @plink_timer: peer link watch timer
357 * @plink_sta: peer link watch timer's sta_info
347 * @t_offset: timing offset relative to this host 358 * @t_offset: timing offset relative to this host
348 * @t_offset_setpoint: reference timing offset of this sta to be used when 359 * @t_offset_setpoint: reference timing offset of this sta to be used when
349 * calculating clockdrift 360 * calculating clockdrift
@@ -356,6 +367,7 @@ DECLARE_EWMA(mesh_fail_avg, 20, 8)
356 */ 367 */
357struct mesh_sta { 368struct mesh_sta {
358 struct timer_list plink_timer; 369 struct timer_list plink_timer;
370 struct sta_info *plink_sta;
359 371
360 s64 t_offset; 372 s64 t_offset;
361 s64 t_offset_setpoint; 373 s64 t_offset_setpoint;
@@ -398,7 +410,7 @@ struct ieee80211_sta_rx_stats {
398 u64 msdu[IEEE80211_NUM_TIDS + 1]; 410 u64 msdu[IEEE80211_NUM_TIDS + 1];
399}; 411};
400 412
401/** 413/*
402 * The bandwidth threshold below which the per-station CoDel parameters will be 414 * The bandwidth threshold below which the per-station CoDel parameters will be
403 * scaled to be more lenient (to prevent starvation of slow stations). This 415 * scaled to be more lenient (to prevent starvation of slow stations). This
404 * value will be scaled by the number of active stations when it is being 416 * value will be scaled by the number of active stations when it is being
@@ -445,7 +457,6 @@ struct ieee80211_sta_rx_stats {
445 * plus one for non-QoS frames) 457 * plus one for non-QoS frames)
446 * @tid_seq: per-TID sequence numbers for sending to this STA 458 * @tid_seq: per-TID sequence numbers for sending to this STA
447 * @ampdu_mlme: A-MPDU state machine state 459 * @ampdu_mlme: A-MPDU state machine state
448 * @timer_to_tid: identity mapping to ID timers
449 * @mesh: mesh STA information 460 * @mesh: mesh STA information
450 * @debugfs_dir: debug filesystem directory dentry 461 * @debugfs_dir: debug filesystem directory dentry
451 * @dead: set to true when sta is unlinked 462 * @dead: set to true when sta is unlinked
@@ -552,7 +563,6 @@ struct sta_info {
552 * Aggregation information, locked with lock. 563 * Aggregation information, locked with lock.
553 */ 564 */
554 struct sta_ampdu_mlme ampdu_mlme; 565 struct sta_ampdu_mlme ampdu_mlme;
555 u8 timer_to_tid[IEEE80211_NUM_TIDS];
556 566
557#ifdef CONFIG_MAC80211_DEBUGFS 567#ifdef CONFIG_MAC80211_DEBUGFS
558 struct dentry *debugfs_dir; 568 struct dentry *debugfs_dir;
diff --git a/net/mac80211/trace.c b/net/mac80211/trace.c
index edfe0c170a1c..837857261b66 100644
--- a/net/mac80211/trace.c
+++ b/net/mac80211/trace.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* bug in tracepoint.h, it should include this */ 2/* bug in tracepoint.h, it should include this */
2#include <linux/module.h> 3#include <linux/module.h>
3 4
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 3d9ac17af407..591ad02e1fa4 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2* Portions of this file 3* Portions of this file
3* Copyright(c) 2016 Intel Deutschland GmbH 4* Copyright(c) 2016 Intel Deutschland GmbH
diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h
index 768f7c22a190..366b9e6f043e 100644
--- a/net/mac80211/trace_msg.h
+++ b/net/mac80211/trace_msg.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifdef CONFIG_MAC80211_MESSAGE_TRACING 2#ifdef CONFIG_MAC80211_MESSAGE_TRACING
2 3
3#if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) 4#if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 94826680cf2b..7b8154474b9e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1396,6 +1396,40 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local,
1396 fq_flow_get_default_func); 1396 fq_flow_get_default_func);
1397} 1397}
1398 1398
1399static bool fq_vlan_filter_func(struct fq *fq, struct fq_tin *tin,
1400 struct fq_flow *flow, struct sk_buff *skb,
1401 void *data)
1402{
1403 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
1404
1405 return info->control.vif == data;
1406}
1407
1408void ieee80211_txq_remove_vlan(struct ieee80211_local *local,
1409 struct ieee80211_sub_if_data *sdata)
1410{
1411 struct fq *fq = &local->fq;
1412 struct txq_info *txqi;
1413 struct fq_tin *tin;
1414 struct ieee80211_sub_if_data *ap;
1415
1416 if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP_VLAN))
1417 return;
1418
1419 ap = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap);
1420
1421 if (!ap->vif.txq)
1422 return;
1423
1424 txqi = to_txq_info(ap->vif.txq);
1425 tin = &txqi->tin;
1426
1427 spin_lock_bh(&fq->lock);
1428 fq_tin_filter(fq, tin, fq_vlan_filter_func, &sdata->vif,
1429 fq_skb_free_func);
1430 spin_unlock_bh(&fq->lock);
1431}
1432
1399void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, 1433void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
1400 struct sta_info *sta, 1434 struct sta_info *sta,
1401 struct txq_info *txqi, int tid) 1435 struct txq_info *txqi, int tid)
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 6aef6793d052..d57e5f6bd8b6 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1392,10 +1392,10 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
1392 /* insert custom IEs that go before HT */ 1392 /* insert custom IEs that go before HT */
1393 if (ie && ie_len) { 1393 if (ie && ie_len) {
1394 static const u8 before_ht[] = { 1394 static const u8 before_ht[] = {
1395 WLAN_EID_SSID, 1395 /*
1396 WLAN_EID_SUPP_RATES, 1396 * no need to list the ones split off already
1397 WLAN_EID_REQUEST, 1397 * (or generated here)
1398 WLAN_EID_EXT_SUPP_RATES, 1398 */
1399 WLAN_EID_DS_PARAMS, 1399 WLAN_EID_DS_PARAMS,
1400 WLAN_EID_SUPPORTED_REGULATORY_CLASSES, 1400 WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
1401 }; 1401 };
@@ -1424,20 +1424,17 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
1424 /* insert custom IEs that go before VHT */ 1424 /* insert custom IEs that go before VHT */
1425 if (ie && ie_len) { 1425 if (ie && ie_len) {
1426 static const u8 before_vht[] = { 1426 static const u8 before_vht[] = {
1427 WLAN_EID_SSID, 1427 /*
1428 WLAN_EID_SUPP_RATES, 1428 * no need to list the ones split off already
1429 WLAN_EID_REQUEST, 1429 * (or generated here)
1430 WLAN_EID_EXT_SUPP_RATES, 1430 */
1431 WLAN_EID_DS_PARAMS,
1432 WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
1433 WLAN_EID_HT_CAPABILITY,
1434 WLAN_EID_BSS_COEX_2040, 1431 WLAN_EID_BSS_COEX_2040,
1435 WLAN_EID_EXT_CAPABILITY, 1432 WLAN_EID_EXT_CAPABILITY,
1436 WLAN_EID_SSID_LIST, 1433 WLAN_EID_SSID_LIST,
1437 WLAN_EID_CHANNEL_USAGE, 1434 WLAN_EID_CHANNEL_USAGE,
1438 WLAN_EID_INTERWORKING, 1435 WLAN_EID_INTERWORKING,
1439 WLAN_EID_MESH_ID, 1436 WLAN_EID_MESH_ID,
1440 /* 60 GHz can't happen here right now */ 1437 /* 60 GHz (Multi-band, DMG, MMS) can't happen */
1441 }; 1438 };
1442 noffset = ieee80211_ie_split(ie, ie_len, 1439 noffset = ieee80211_ie_split(ie, ie_len,
1443 before_vht, ARRAY_SIZE(before_vht), 1440 before_vht, ARRAY_SIZE(before_vht),
@@ -2980,8 +2977,8 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata,
2980 struct ieee80211_mgmt *mgmt; 2977 struct ieee80211_mgmt *mgmt;
2981 struct ieee80211_local *local = sdata->local; 2978 struct ieee80211_local *local = sdata->local;
2982 int freq; 2979 int freq;
2983 int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.chan_switch) + 2980 int hdr_len = offsetofend(struct ieee80211_mgmt,
2984 sizeof(mgmt->u.action.u.chan_switch); 2981 u.action.u.chan_switch);
2985 u8 *pos; 2982 u8 *pos;
2986 2983
2987 if (sdata->vif.type != NL80211_IFTYPE_ADHOC && 2984 if (sdata->vif.type != NL80211_IFTYPE_ADHOC &&
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 19ec2189d3ac..b9276ac849fa 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -386,6 +386,16 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
386 386
387 bw = ieee80211_sta_cap_rx_bw(sta); 387 bw = ieee80211_sta_cap_rx_bw(sta);
388 bw = min(bw, sta->cur_max_bandwidth); 388 bw = min(bw, sta->cur_max_bandwidth);
389
390 /* Don't consider AP's bandwidth for TDLS peers, section 11.23.1 of
391 * IEEE80211-2016 specification makes higher bandwidth operation
392 * possible on the TDLS link if the peers have wider bandwidth
393 * capability.
394 */
395 if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
396 test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW))
397 return bw;
398
389 bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width)); 399 bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width));
390 400
391 return bw; 401 return bw;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 0d722ea98a1b..b58722d9de37 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -464,7 +464,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb,
464 pos += IEEE80211_CCMP_HDR_LEN; 464 pos += IEEE80211_CCMP_HDR_LEN;
465 ccmp_special_blocks(skb, pn, b_0, aad); 465 ccmp_special_blocks(skb, pn, b_0, aad);
466 return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len, 466 return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len,
467 skb_put(skb, mic_len), mic_len); 467 skb_put(skb, mic_len));
468} 468}
469 469
470 470
@@ -543,7 +543,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
543 key->u.ccmp.tfm, b_0, aad, 543 key->u.ccmp.tfm, b_0, aad,
544 skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN, 544 skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN,
545 data_len, 545 data_len,
546 skb->data + skb->len - mic_len, mic_len)) 546 skb->data + skb->len - mic_len))
547 return RX_DROP_UNUSABLE; 547 return RX_DROP_UNUSABLE;
548 } 548 }
549 549
diff --git a/net/mac802154/cfg.h b/net/mac802154/cfg.h
index e2718f981e82..3bb089685500 100644
--- a/net/mac802154/cfg.h
+++ b/net/mac802154/cfg.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* mac802154 configuration hooks for cfg802154 2/* mac802154 configuration hooks for cfg802154
2 */ 3 */
3 4
diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h
index fd9daf2ecec9..d23f0db98015 100644
--- a/net/mac802154/driver-ops.h
+++ b/net/mac802154/driver-ops.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __MAC802154_DRIVER_OPS 2#ifndef __MAC802154_DRIVER_OPS
2#define __MAC802154_DRIVER_OPS 3#define __MAC802154_DRIVER_OPS
3 4
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 1e1c9b20bab7..2fb703d70803 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -623,13 +623,18 @@ llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
623 u8 iv[16]; 623 u8 iv[16];
624 struct scatterlist src; 624 struct scatterlist src;
625 SKCIPHER_REQUEST_ON_STACK(req, key->tfm0); 625 SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
626 int err; 626 int err, datalen;
627 unsigned char *data;
627 628
628 llsec_geniv(iv, sec->params.hwaddr, &hdr->sec); 629 llsec_geniv(iv, sec->params.hwaddr, &hdr->sec);
629 sg_init_one(&src, skb->data, skb->len); 630 /* Compute data payload offset and data length */
631 data = skb_mac_header(skb) + skb->mac_len;
632 datalen = skb_tail_pointer(skb) - data;
633 sg_init_one(&src, data, datalen);
634
630 skcipher_request_set_tfm(req, key->tfm0); 635 skcipher_request_set_tfm(req, key->tfm0);
631 skcipher_request_set_callback(req, 0, NULL, NULL); 636 skcipher_request_set_callback(req, 0, NULL, NULL);
632 skcipher_request_set_crypt(req, &src, &src, skb->len, iv); 637 skcipher_request_set_crypt(req, &src, &src, datalen, iv);
633 err = crypto_skcipher_encrypt(req); 638 err = crypto_skcipher_encrypt(req);
634 skcipher_request_zero(req); 639 skcipher_request_zero(req);
635 return err; 640 return err;
@@ -713,7 +718,8 @@ int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb)
713 if (hlen < 0 || hdr.fc.type != IEEE802154_FC_TYPE_DATA) 718 if (hlen < 0 || hdr.fc.type != IEEE802154_FC_TYPE_DATA)
714 return -EINVAL; 719 return -EINVAL;
715 720
716 if (!hdr.fc.security_enabled || hdr.sec.level == 0) { 721 if (!hdr.fc.security_enabled ||
722 (hdr.sec.level == IEEE802154_SCF_SECLEVEL_NONE)) {
717 skb_push(skb, hlen); 723 skb_push(skb, hlen);
718 return 0; 724 return 0;
719 } 725 }
diff --git a/net/mac802154/trace.c b/net/mac802154/trace.c
index 863e5e6b983d..c36e3d541a42 100644
--- a/net/mac802154/trace.c
+++ b/net/mac802154/trace.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/module.h> 2#include <linux/module.h>
2 3
3#ifndef __CHECKER__ 4#ifndef __CHECKER__
diff --git a/net/mac802154/trace.h b/net/mac802154/trace.h
index 6f30e0c93a16..2c8a43d3607f 100644
--- a/net/mac802154/trace.h
+++ b/net/mac802154/trace.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* Based on net/mac80211/trace.h */ 2/* Based on net/mac80211/trace.h */
2 3
3#undef TRACE_SYSTEM 4#undef TRACE_SYSTEM
diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig
index 5c467ef97311..801ea9098387 100644
--- a/net/mpls/Kconfig
+++ b/net/mpls/Kconfig
@@ -24,6 +24,7 @@ config NET_MPLS_GSO
24 24
25config MPLS_ROUTING 25config MPLS_ROUTING
26 tristate "MPLS: routing support" 26 tristate "MPLS: routing support"
27 depends on NET_IP_TUNNEL || NET_IP_TUNNEL=n
27 ---help--- 28 ---help---
28 Add support for forwarding of mpls packets. 29 Add support for forwarding of mpls packets.
29 30
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index c5b9ce41d66f..8ca9915befc8 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -16,6 +16,7 @@
16#include <net/arp.h> 16#include <net/arp.h>
17#include <net/ip_fib.h> 17#include <net/ip_fib.h>
18#include <net/netevent.h> 18#include <net/netevent.h>
19#include <net/ip_tunnels.h>
19#include <net/netns/generic.h> 20#include <net/netns/generic.h>
20#if IS_ENABLED(CONFIG_IPV6) 21#if IS_ENABLED(CONFIG_IPV6)
21#include <net/ipv6.h> 22#include <net/ipv6.h>
@@ -39,6 +40,36 @@ static int one = 1;
39static int label_limit = (1 << 20) - 1; 40static int label_limit = (1 << 20) - 1;
40static int ttl_max = 255; 41static int ttl_max = 255;
41 42
43#if IS_ENABLED(CONFIG_NET_IP_TUNNEL)
44static size_t ipgre_mpls_encap_hlen(struct ip_tunnel_encap *e)
45{
46 return sizeof(struct mpls_shim_hdr);
47}
48
49static const struct ip_tunnel_encap_ops mpls_iptun_ops = {
50 .encap_hlen = ipgre_mpls_encap_hlen,
51};
52
53static int ipgre_tunnel_encap_add_mpls_ops(void)
54{
55 return ip_tunnel_encap_add_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS);
56}
57
58static void ipgre_tunnel_encap_del_mpls_ops(void)
59{
60 ip_tunnel_encap_del_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS);
61}
62#else
63static int ipgre_tunnel_encap_add_mpls_ops(void)
64{
65 return 0;
66}
67
68static void ipgre_tunnel_encap_del_mpls_ops(void)
69{
70}
71#endif
72
42static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, 73static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
43 struct nlmsghdr *nlh, struct net *net, u32 portid, 74 struct nlmsghdr *nlh, struct net *net, u32 portid,
44 unsigned int nlm_flags); 75 unsigned int nlm_flags);
@@ -2485,6 +2516,10 @@ static int __init mpls_init(void)
2485 0); 2516 0);
2486 rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf, 2517 rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf,
2487 mpls_netconf_dump_devconf, 0); 2518 mpls_netconf_dump_devconf, 0);
2519 err = ipgre_tunnel_encap_add_mpls_ops();
2520 if (err)
2521 pr_err("Can't add mpls over gre tunnel ops\n");
2522
2488 err = 0; 2523 err = 0;
2489out: 2524out:
2490 return err; 2525 return err;
@@ -2502,6 +2537,7 @@ static void __exit mpls_exit(void)
2502 dev_remove_pack(&mpls_packet_type); 2537 dev_remove_pack(&mpls_packet_type);
2503 unregister_netdevice_notifier(&mpls_dev_notifier); 2538 unregister_netdevice_notifier(&mpls_dev_notifier);
2504 unregister_pernet_subsys(&mpls_net_ops); 2539 unregister_pernet_subsys(&mpls_net_ops);
2540 ipgre_tunnel_encap_del_mpls_ops();
2505} 2541}
2506module_exit(mpls_exit); 2542module_exit(mpls_exit);
2507 2543
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index cf65aec2e551..768a302879b4 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef MPLS_INTERNAL_H 2#ifndef MPLS_INTERNAL_H
2#define MPLS_INTERNAL_H 3#define MPLS_INTERNAL_H
3#include <net/mpls.h> 4#include <net/mpls.h>
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index af3d636534ef..d30f7bd741d0 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -286,6 +286,7 @@ struct ncsi_dev_priv {
286 struct work_struct work; /* For channel management */ 286 struct work_struct work; /* For channel management */
287 struct packet_type ptype; /* NCSI packet Rx handler */ 287 struct packet_type ptype; /* NCSI packet Rx handler */
288 struct list_head node; /* Form NCSI device list */ 288 struct list_head node; /* Form NCSI device list */
289#define NCSI_MAX_VLAN_VIDS 15
289 struct list_head vlan_vids; /* List of active VLAN IDs */ 290 struct list_head vlan_vids; /* List of active VLAN IDs */
290}; 291};
291 292
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index 6898e7229285..67e708e98ccf 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -73,6 +73,9 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
73 ncm->data[2] = data; 73 ncm->data[2] = data;
74 ncm->data[4] = ntohl(lsc->oem_status); 74 ncm->data[4] = ntohl(lsc->oem_status);
75 75
76 netdev_info(ndp->ndev.dev, "NCSI: LSC AEN - channel %u state %s\n",
77 nc->id, data & 0x1 ? "up" : "down");
78
76 chained = !list_empty(&nc->link); 79 chained = !list_empty(&nc->link);
77 state = nc->state; 80 state = nc->state;
78 spin_unlock_irqrestore(&nc->lock, flags); 81 spin_unlock_irqrestore(&nc->lock, flags);
@@ -145,6 +148,8 @@ static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp,
145 ncm = &nc->modes[NCSI_MODE_LINK]; 148 ncm = &nc->modes[NCSI_MODE_LINK];
146 hncdsc = (struct ncsi_aen_hncdsc_pkt *)h; 149 hncdsc = (struct ncsi_aen_hncdsc_pkt *)h;
147 ncm->data[3] = ntohl(hncdsc->status); 150 ncm->data[3] = ntohl(hncdsc->status);
151 netdev_info(ndp->ndev.dev, "NCSI: HNCDSC AEN - channel %u state %s\n",
152 nc->id, ncm->data[3] & 0x3 ? "up" : "down");
148 if (!list_empty(&nc->link) || 153 if (!list_empty(&nc->link) ||
149 nc->state != NCSI_CHANNEL_ACTIVE) { 154 nc->state != NCSI_CHANNEL_ACTIVE) {
150 spin_unlock_irqrestore(&nc->lock, flags); 155 spin_unlock_irqrestore(&nc->lock, flags);
@@ -187,7 +192,7 @@ static struct ncsi_aen_handler {
187} ncsi_aen_handlers[] = { 192} ncsi_aen_handlers[] = {
188 { NCSI_PKT_AEN_LSC, 12, ncsi_aen_handler_lsc }, 193 { NCSI_PKT_AEN_LSC, 12, ncsi_aen_handler_lsc },
189 { NCSI_PKT_AEN_CR, 4, ncsi_aen_handler_cr }, 194 { NCSI_PKT_AEN_CR, 4, ncsi_aen_handler_cr },
190 { NCSI_PKT_AEN_HNCDSC, 4, ncsi_aen_handler_hncdsc } 195 { NCSI_PKT_AEN_HNCDSC, 8, ncsi_aen_handler_hncdsc }
191}; 196};
192 197
193int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb) 198int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb)
@@ -212,10 +217,18 @@ int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb)
212 } 217 }
213 218
214 ret = ncsi_validate_aen_pkt(h, nah->payload); 219 ret = ncsi_validate_aen_pkt(h, nah->payload);
215 if (ret) 220 if (ret) {
221 netdev_warn(ndp->ndev.dev,
222 "NCSI: 'bad' packet ignored for AEN type 0x%x\n",
223 h->type);
216 goto out; 224 goto out;
225 }
217 226
218 ret = nah->handler(ndp, h); 227 ret = nah->handler(ndp, h);
228 if (ret)
229 netdev_err(ndp->ndev.dev,
230 "NCSI: Handler for AEN type 0x%x returned %d\n",
231 h->type, ret);
219out: 232out:
220 consume_skb(skb); 233 consume_skb(skb);
221 return ret; 234 return ret;
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 3fd3c39e6278..c989211bbabc 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -38,7 +38,7 @@ static inline int ncsi_filter_size(int table)
38 return sizes[table]; 38 return sizes[table];
39} 39}
40 40
41u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index) 41static u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
42{ 42{
43 struct ncsi_channel_filter *ncf; 43 struct ncsi_channel_filter *ncf;
44 int size; 44 int size;
@@ -184,11 +184,12 @@ report:
184 nd->handler(nd); 184 nd->handler(nd);
185} 185}
186 186
187static void ncsi_channel_monitor(unsigned long data) 187static void ncsi_channel_monitor(struct timer_list *t)
188{ 188{
189 struct ncsi_channel *nc = (struct ncsi_channel *)data; 189 struct ncsi_channel *nc = from_timer(nc, t, monitor.timer);
190 struct ncsi_package *np = nc->package; 190 struct ncsi_package *np = nc->package;
191 struct ncsi_dev_priv *ndp = np->ndp; 191 struct ncsi_dev_priv *ndp = np->ndp;
192 struct ncsi_channel_mode *ncm;
192 struct ncsi_cmd_arg nca; 193 struct ncsi_cmd_arg nca;
193 bool enabled, chained; 194 bool enabled, chained;
194 unsigned int monitor_state; 195 unsigned int monitor_state;
@@ -202,11 +203,15 @@ static void ncsi_channel_monitor(unsigned long data)
202 monitor_state = nc->monitor.state; 203 monitor_state = nc->monitor.state;
203 spin_unlock_irqrestore(&nc->lock, flags); 204 spin_unlock_irqrestore(&nc->lock, flags);
204 205
205 if (!enabled || chained) 206 if (!enabled || chained) {
207 ncsi_stop_channel_monitor(nc);
206 return; 208 return;
209 }
207 if (state != NCSI_CHANNEL_INACTIVE && 210 if (state != NCSI_CHANNEL_INACTIVE &&
208 state != NCSI_CHANNEL_ACTIVE) 211 state != NCSI_CHANNEL_ACTIVE) {
212 ncsi_stop_channel_monitor(nc);
209 return; 213 return;
214 }
210 215
211 switch (monitor_state) { 216 switch (monitor_state) {
212 case NCSI_CHANNEL_MONITOR_START: 217 case NCSI_CHANNEL_MONITOR_START:
@@ -217,28 +222,30 @@ static void ncsi_channel_monitor(unsigned long data)
217 nca.type = NCSI_PKT_CMD_GLS; 222 nca.type = NCSI_PKT_CMD_GLS;
218 nca.req_flags = 0; 223 nca.req_flags = 0;
219 ret = ncsi_xmit_cmd(&nca); 224 ret = ncsi_xmit_cmd(&nca);
220 if (ret) { 225 if (ret)
221 netdev_err(ndp->ndev.dev, "Error %d sending GLS\n", 226 netdev_err(ndp->ndev.dev, "Error %d sending GLS\n",
222 ret); 227 ret);
223 return;
224 }
225
226 break; 228 break;
227 case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX: 229 case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX:
228 break; 230 break;
229 default: 231 default:
230 if (!(ndp->flags & NCSI_DEV_HWA) && 232 netdev_err(ndp->ndev.dev, "NCSI Channel %d timed out!\n",
231 state == NCSI_CHANNEL_ACTIVE) { 233 nc->id);
234 if (!(ndp->flags & NCSI_DEV_HWA)) {
232 ncsi_report_link(ndp, true); 235 ncsi_report_link(ndp, true);
233 ndp->flags |= NCSI_DEV_RESHUFFLE; 236 ndp->flags |= NCSI_DEV_RESHUFFLE;
234 } 237 }
235 238
239 ncsi_stop_channel_monitor(nc);
240
241 ncm = &nc->modes[NCSI_MODE_LINK];
236 spin_lock_irqsave(&nc->lock, flags); 242 spin_lock_irqsave(&nc->lock, flags);
237 nc->state = NCSI_CHANNEL_INVISIBLE; 243 nc->state = NCSI_CHANNEL_INVISIBLE;
244 ncm->data[2] &= ~0x1;
238 spin_unlock_irqrestore(&nc->lock, flags); 245 spin_unlock_irqrestore(&nc->lock, flags);
239 246
240 spin_lock_irqsave(&ndp->lock, flags); 247 spin_lock_irqsave(&ndp->lock, flags);
241 nc->state = NCSI_CHANNEL_INACTIVE; 248 nc->state = NCSI_CHANNEL_ACTIVE;
242 list_add_tail_rcu(&nc->link, &ndp->channel_queue); 249 list_add_tail_rcu(&nc->link, &ndp->channel_queue);
243 spin_unlock_irqrestore(&ndp->lock, flags); 250 spin_unlock_irqrestore(&ndp->lock, flags);
244 ncsi_process_next_channel(ndp); 251 ncsi_process_next_channel(ndp);
@@ -306,8 +313,7 @@ struct ncsi_channel *ncsi_add_channel(struct ncsi_package *np, unsigned char id)
306 nc->package = np; 313 nc->package = np;
307 nc->state = NCSI_CHANNEL_INACTIVE; 314 nc->state = NCSI_CHANNEL_INACTIVE;
308 nc->monitor.enabled = false; 315 nc->monitor.enabled = false;
309 setup_timer(&nc->monitor.timer, 316 timer_setup(&nc->monitor.timer, ncsi_channel_monitor, 0);
310 ncsi_channel_monitor, (unsigned long)nc);
311 spin_lock_init(&nc->lock); 317 spin_lock_init(&nc->lock);
312 INIT_LIST_HEAD(&nc->link); 318 INIT_LIST_HEAD(&nc->link);
313 for (index = 0; index < NCSI_CAP_MAX; index++) 319 for (index = 0; index < NCSI_CAP_MAX; index++)
@@ -522,9 +528,9 @@ struct ncsi_dev *ncsi_find_dev(struct net_device *dev)
522 return NULL; 528 return NULL;
523} 529}
524 530
525static void ncsi_request_timeout(unsigned long data) 531static void ncsi_request_timeout(struct timer_list *t)
526{ 532{
527 struct ncsi_request *nr = (struct ncsi_request *)data; 533 struct ncsi_request *nr = from_timer(nr, t, timer);
528 struct ncsi_dev_priv *ndp = nr->ndp; 534 struct ncsi_dev_priv *ndp = nr->ndp;
529 unsigned long flags; 535 unsigned long flags;
530 536
@@ -677,7 +683,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
677 data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, index); 683 data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, index);
678 if (!data) { 684 if (!data) {
679 netdev_err(ndp->ndev.dev, 685 netdev_err(ndp->ndev.dev,
680 "ncsi: failed to retrieve filter %d\n", index); 686 "NCSI: failed to retrieve filter %d\n", index);
681 /* Set the VLAN id to 0 - this will still disable the entry in 687 /* Set the VLAN id to 0 - this will still disable the entry in
682 * the filter table, but we won't know what it was. 688 * the filter table, but we won't know what it was.
683 */ 689 */
@@ -687,7 +693,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
687 } 693 }
688 694
689 netdev_printk(KERN_DEBUG, ndp->ndev.dev, 695 netdev_printk(KERN_DEBUG, ndp->ndev.dev,
690 "ncsi: removed vlan tag %u at index %d\n", 696 "NCSI: removed vlan tag %u at index %d\n",
691 vid, index + 1); 697 vid, index + 1);
692 ncsi_remove_filter(nc, NCSI_FILTER_VLAN, index); 698 ncsi_remove_filter(nc, NCSI_FILTER_VLAN, index);
693 699
@@ -713,7 +719,7 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
713 if (index < 0) { 719 if (index < 0) {
714 /* New tag to add */ 720 /* New tag to add */
715 netdev_printk(KERN_DEBUG, ndp->ndev.dev, 721 netdev_printk(KERN_DEBUG, ndp->ndev.dev,
716 "ncsi: new vlan id to set: %u\n", 722 "NCSI: new vlan id to set: %u\n",
717 vlan->vid); 723 vlan->vid);
718 break; 724 break;
719 } 725 }
@@ -732,11 +738,15 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
732 if (index < 0) { 738 if (index < 0) {
733 netdev_err(ndp->ndev.dev, 739 netdev_err(ndp->ndev.dev,
734 "Failed to add new VLAN tag, error %d\n", index); 740 "Failed to add new VLAN tag, error %d\n", index);
741 if (index == -ENOSPC)
742 netdev_err(ndp->ndev.dev,
743 "Channel %u already has all VLAN filters set\n",
744 nc->id);
735 return -1; 745 return -1;
736 } 746 }
737 747
738 netdev_printk(KERN_DEBUG, ndp->ndev.dev, 748 netdev_printk(KERN_DEBUG, ndp->ndev.dev,
739 "ncsi: set vid %u in packet, index %u\n", 749 "NCSI: set vid %u in packet, index %u\n",
740 vlan->vid, index + 1); 750 vlan->vid, index + 1);
741 nca->type = NCSI_PKT_CMD_SVF; 751 nca->type = NCSI_PKT_CMD_SVF;
742 nca->words[1] = vlan->vid; 752 nca->words[1] = vlan->vid;
@@ -775,8 +785,11 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
775 nca.package = np->id; 785 nca.package = np->id;
776 nca.channel = NCSI_RESERVED_CHANNEL; 786 nca.channel = NCSI_RESERVED_CHANNEL;
777 ret = ncsi_xmit_cmd(&nca); 787 ret = ncsi_xmit_cmd(&nca);
778 if (ret) 788 if (ret) {
789 netdev_err(ndp->ndev.dev,
790 "NCSI: Failed to transmit CMD_SP\n");
779 goto error; 791 goto error;
792 }
780 793
781 nd->state = ncsi_dev_state_config_cis; 794 nd->state = ncsi_dev_state_config_cis;
782 break; 795 break;
@@ -788,8 +801,11 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
788 nca.package = np->id; 801 nca.package = np->id;
789 nca.channel = nc->id; 802 nca.channel = nc->id;
790 ret = ncsi_xmit_cmd(&nca); 803 ret = ncsi_xmit_cmd(&nca);
791 if (ret) 804 if (ret) {
805 netdev_err(ndp->ndev.dev,
806 "NCSI: Failed to transmit CMD_CIS\n");
792 goto error; 807 goto error;
808 }
793 809
794 nd->state = ncsi_dev_state_config_clear_vids; 810 nd->state = ncsi_dev_state_config_clear_vids;
795 break; 811 break;
@@ -886,10 +902,16 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
886 } 902 }
887 903
888 ret = ncsi_xmit_cmd(&nca); 904 ret = ncsi_xmit_cmd(&nca);
889 if (ret) 905 if (ret) {
906 netdev_err(ndp->ndev.dev,
907 "NCSI: Failed to transmit CMD %x\n",
908 nca.type);
890 goto error; 909 goto error;
910 }
891 break; 911 break;
892 case ncsi_dev_state_config_done: 912 case ncsi_dev_state_config_done:
913 netdev_printk(KERN_DEBUG, ndp->ndev.dev,
914 "NCSI: channel %u config done\n", nc->id);
893 spin_lock_irqsave(&nc->lock, flags); 915 spin_lock_irqsave(&nc->lock, flags);
894 if (nc->reconfigure_needed) { 916 if (nc->reconfigure_needed) {
895 /* This channel's configuration has been updated 917 /* This channel's configuration has been updated
@@ -916,6 +938,9 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
916 } else { 938 } else {
917 hot_nc = NULL; 939 hot_nc = NULL;
918 nc->state = NCSI_CHANNEL_INACTIVE; 940 nc->state = NCSI_CHANNEL_INACTIVE;
941 netdev_warn(ndp->ndev.dev,
942 "NCSI: channel %u link down after config\n",
943 nc->id);
919 } 944 }
920 spin_unlock_irqrestore(&nc->lock, flags); 945 spin_unlock_irqrestore(&nc->lock, flags);
921 946
@@ -928,8 +953,8 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
928 ncsi_process_next_channel(ndp); 953 ncsi_process_next_channel(ndp);
929 break; 954 break;
930 default: 955 default:
931 netdev_warn(dev, "Wrong NCSI state 0x%x in config\n", 956 netdev_alert(dev, "Wrong NCSI state 0x%x in config\n",
932 nd->state); 957 nd->state);
933 } 958 }
934 959
935 return; 960 return;
@@ -981,10 +1006,17 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
981 } 1006 }
982 1007
983 if (!found) { 1008 if (!found) {
1009 netdev_warn(ndp->ndev.dev,
1010 "NCSI: No channel found with link\n");
984 ncsi_report_link(ndp, true); 1011 ncsi_report_link(ndp, true);
985 return -ENODEV; 1012 return -ENODEV;
986 } 1013 }
987 1014
1015 ncm = &found->modes[NCSI_MODE_LINK];
1016 netdev_printk(KERN_DEBUG, ndp->ndev.dev,
1017 "NCSI: Channel %u added to queue (link %s)\n",
1018 found->id, ncm->data[2] & 0x1 ? "up" : "down");
1019
988out: 1020out:
989 spin_lock_irqsave(&ndp->lock, flags); 1021 spin_lock_irqsave(&ndp->lock, flags);
990 list_add_tail_rcu(&found->link, &ndp->channel_queue); 1022 list_add_tail_rcu(&found->link, &ndp->channel_queue);
@@ -998,12 +1030,15 @@ static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp)
998 struct ncsi_package *np; 1030 struct ncsi_package *np;
999 struct ncsi_channel *nc; 1031 struct ncsi_channel *nc;
1000 unsigned int cap; 1032 unsigned int cap;
1033 bool has_channel = false;
1001 1034
1002 /* The hardware arbitration is disabled if any one channel 1035 /* The hardware arbitration is disabled if any one channel
1003 * doesn't support explicitly. 1036 * doesn't support explicitly.
1004 */ 1037 */
1005 NCSI_FOR_EACH_PACKAGE(ndp, np) { 1038 NCSI_FOR_EACH_PACKAGE(ndp, np) {
1006 NCSI_FOR_EACH_CHANNEL(np, nc) { 1039 NCSI_FOR_EACH_CHANNEL(np, nc) {
1040 has_channel = true;
1041
1007 cap = nc->caps[NCSI_CAP_GENERIC].cap; 1042 cap = nc->caps[NCSI_CAP_GENERIC].cap;
1008 if (!(cap & NCSI_CAP_GENERIC_HWA) || 1043 if (!(cap & NCSI_CAP_GENERIC_HWA) ||
1009 (cap & NCSI_CAP_GENERIC_HWA_MASK) != 1044 (cap & NCSI_CAP_GENERIC_HWA_MASK) !=
@@ -1014,8 +1049,13 @@ static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp)
1014 } 1049 }
1015 } 1050 }
1016 1051
1017 ndp->flags |= NCSI_DEV_HWA; 1052 if (has_channel) {
1018 return true; 1053 ndp->flags |= NCSI_DEV_HWA;
1054 return true;
1055 }
1056
1057 ndp->flags &= ~NCSI_DEV_HWA;
1058 return false;
1019} 1059}
1020 1060
1021static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp) 1061static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp)
@@ -1038,6 +1078,8 @@ static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp)
1038 1078
1039 /* We can have no channels in extremely case */ 1079 /* We can have no channels in extremely case */
1040 if (list_empty(&ndp->channel_queue)) { 1080 if (list_empty(&ndp->channel_queue)) {
1081 netdev_err(ndp->ndev.dev,
1082 "NCSI: No available channels for HWA\n");
1041 ncsi_report_link(ndp, false); 1083 ncsi_report_link(ndp, false);
1042 return -ENOENT; 1084 return -ENOENT;
1043 } 1085 }
@@ -1206,6 +1248,9 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
1206 1248
1207 return; 1249 return;
1208error: 1250error:
1251 netdev_err(ndp->ndev.dev,
1252 "NCSI: Failed to transmit cmd 0x%x during probe\n",
1253 nca.type);
1209 ncsi_report_link(ndp, true); 1254 ncsi_report_link(ndp, true);
1210} 1255}
1211 1256
@@ -1259,10 +1304,14 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp)
1259 switch (old_state) { 1304 switch (old_state) {
1260 case NCSI_CHANNEL_INACTIVE: 1305 case NCSI_CHANNEL_INACTIVE:
1261 ndp->ndev.state = ncsi_dev_state_config; 1306 ndp->ndev.state = ncsi_dev_state_config;
1307 netdev_info(ndp->ndev.dev, "NCSI: configuring channel %u\n",
1308 nc->id);
1262 ncsi_configure_channel(ndp); 1309 ncsi_configure_channel(ndp);
1263 break; 1310 break;
1264 case NCSI_CHANNEL_ACTIVE: 1311 case NCSI_CHANNEL_ACTIVE:
1265 ndp->ndev.state = ncsi_dev_state_suspend; 1312 ndp->ndev.state = ncsi_dev_state_suspend;
1313 netdev_info(ndp->ndev.dev, "NCSI: suspending channel %u\n",
1314 nc->id);
1266 ncsi_suspend_channel(ndp); 1315 ncsi_suspend_channel(ndp);
1267 break; 1316 break;
1268 default: 1317 default:
@@ -1282,6 +1331,8 @@ out:
1282 return ncsi_choose_active_channel(ndp); 1331 return ncsi_choose_active_channel(ndp);
1283 } 1332 }
1284 1333
1334 netdev_printk(KERN_DEBUG, ndp->ndev.dev,
1335 "NCSI: No more channels to process\n");
1285 ncsi_report_link(ndp, false); 1336 ncsi_report_link(ndp, false);
1286 return -ENODEV; 1337 return -ENODEV;
1287} 1338}
@@ -1373,7 +1424,7 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
1373 ncsi_dev_state_config || 1424 ncsi_dev_state_config ||
1374 !list_empty(&nc->link)) { 1425 !list_empty(&nc->link)) {
1375 netdev_printk(KERN_DEBUG, nd->dev, 1426 netdev_printk(KERN_DEBUG, nd->dev,
1376 "ncsi: channel %p marked dirty\n", 1427 "NCSI: channel %p marked dirty\n",
1377 nc); 1428 nc);
1378 nc->reconfigure_needed = true; 1429 nc->reconfigure_needed = true;
1379 } 1430 }
@@ -1393,7 +1444,7 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
1393 spin_unlock_irqrestore(&ndp->lock, flags); 1444 spin_unlock_irqrestore(&ndp->lock, flags);
1394 1445
1395 netdev_printk(KERN_DEBUG, nd->dev, 1446 netdev_printk(KERN_DEBUG, nd->dev,
1396 "ncsi: kicked channel %p\n", nc); 1447 "NCSI: kicked channel %p\n", nc);
1397 n++; 1448 n++;
1398 } 1449 }
1399 } 1450 }
@@ -1403,7 +1454,6 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
1403 1454
1404int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) 1455int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
1405{ 1456{
1406 struct ncsi_channel_filter *ncf;
1407 struct ncsi_dev_priv *ndp; 1457 struct ncsi_dev_priv *ndp;
1408 unsigned int n_vids = 0; 1458 unsigned int n_vids = 0;
1409 struct vlan_vid *vlan; 1459 struct vlan_vid *vlan;
@@ -1415,28 +1465,26 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
1415 1465
1416 nd = ncsi_find_dev(dev); 1466 nd = ncsi_find_dev(dev);
1417 if (!nd) { 1467 if (!nd) {
1418 netdev_warn(dev, "ncsi: No net_device?\n"); 1468 netdev_warn(dev, "NCSI: No net_device?\n");
1419 return 0; 1469 return 0;
1420 } 1470 }
1421 1471
1422 ndp = TO_NCSI_DEV_PRIV(nd); 1472 ndp = TO_NCSI_DEV_PRIV(nd);
1423 ncf = ndp->hot_channel->filters[NCSI_FILTER_VLAN];
1424 1473
1425 /* Add the VLAN id to our internal list */ 1474 /* Add the VLAN id to our internal list */
1426 list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) { 1475 list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) {
1427 n_vids++; 1476 n_vids++;
1428 if (vlan->vid == vid) { 1477 if (vlan->vid == vid) {
1429 netdev_printk(KERN_DEBUG, dev, 1478 netdev_printk(KERN_DEBUG, dev,
1430 "vid %u already registered\n", vid); 1479 "NCSI: vid %u already registered\n", vid);
1431 return 0; 1480 return 0;
1432 } 1481 }
1433 } 1482 }
1434 1483 if (n_vids >= NCSI_MAX_VLAN_VIDS) {
1435 if (n_vids >= ncf->total) { 1484 netdev_warn(dev,
1436 netdev_info(dev, 1485 "tried to add vlan id %u but NCSI max already registered (%u)\n",
1437 "NCSI Channel supports up to %u VLAN tags but %u are already set\n", 1486 vid, NCSI_MAX_VLAN_VIDS);
1438 ncf->total, n_vids); 1487 return -ENOSPC;
1439 return -EINVAL;
1440 } 1488 }
1441 1489
1442 vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); 1490 vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
@@ -1447,7 +1495,7 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
1447 vlan->vid = vid; 1495 vlan->vid = vid;
1448 list_add_rcu(&vlan->list, &ndp->vlan_vids); 1496 list_add_rcu(&vlan->list, &ndp->vlan_vids);
1449 1497
1450 netdev_printk(KERN_DEBUG, dev, "Added new vid %u\n", vid); 1498 netdev_printk(KERN_DEBUG, dev, "NCSI: Added new vid %u\n", vid);
1451 1499
1452 found = ncsi_kick_channels(ndp) != 0; 1500 found = ncsi_kick_channels(ndp) != 0;
1453 1501
@@ -1467,7 +1515,7 @@ int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
1467 1515
1468 nd = ncsi_find_dev(dev); 1516 nd = ncsi_find_dev(dev);
1469 if (!nd) { 1517 if (!nd) {
1470 netdev_warn(dev, "ncsi: no net_device?\n"); 1518 netdev_warn(dev, "NCSI: no net_device?\n");
1471 return 0; 1519 return 0;
1472 } 1520 }
1473 1521
@@ -1477,14 +1525,14 @@ int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
1477 list_for_each_entry_safe(vlan, tmp, &ndp->vlan_vids, list) 1525 list_for_each_entry_safe(vlan, tmp, &ndp->vlan_vids, list)
1478 if (vlan->vid == vid) { 1526 if (vlan->vid == vid) {
1479 netdev_printk(KERN_DEBUG, dev, 1527 netdev_printk(KERN_DEBUG, dev,
1480 "vid %u found, removing\n", vid); 1528 "NCSI: vid %u found, removing\n", vid);
1481 list_del_rcu(&vlan->list); 1529 list_del_rcu(&vlan->list);
1482 found = true; 1530 found = true;
1483 kfree(vlan); 1531 kfree(vlan);
1484 } 1532 }
1485 1533
1486 if (!found) { 1534 if (!found) {
1487 netdev_err(dev, "ncsi: vid %u wasn't registered!\n", vid); 1535 netdev_err(dev, "NCSI: vid %u wasn't registered!\n", vid);
1488 return -EINVAL; 1536 return -EINVAL;
1489 } 1537 }
1490 1538
@@ -1528,9 +1576,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
1528 for (i = 0; i < ARRAY_SIZE(ndp->requests); i++) { 1576 for (i = 0; i < ARRAY_SIZE(ndp->requests); i++) {
1529 ndp->requests[i].id = i; 1577 ndp->requests[i].id = i;
1530 ndp->requests[i].ndp = ndp; 1578 ndp->requests[i].ndp = ndp;
1531 setup_timer(&ndp->requests[i].timer, 1579 timer_setup(&ndp->requests[i].timer, ncsi_request_timeout, 0);
1532 ncsi_request_timeout,
1533 (unsigned long)&ndp->requests[i]);
1534 } 1580 }
1535 1581
1536 spin_lock_irqsave(&ncsi_dev_lock, flags); 1582 spin_lock_irqsave(&ncsi_dev_lock, flags);
@@ -1567,10 +1613,12 @@ int ncsi_start_dev(struct ncsi_dev *nd)
1567 return 0; 1613 return 0;
1568 } 1614 }
1569 1615
1570 if (ndp->flags & NCSI_DEV_HWA) 1616 if (ndp->flags & NCSI_DEV_HWA) {
1617 netdev_info(ndp->ndev.dev, "NCSI: Enabling HWA mode\n");
1571 ret = ncsi_enable_hwa(ndp); 1618 ret = ncsi_enable_hwa(ndp);
1572 else 1619 } else {
1573 ret = ncsi_choose_active_channel(ndp); 1620 ret = ncsi_choose_active_channel(ndp);
1621 }
1574 1622
1575 return ret; 1623 return ret;
1576} 1624}
@@ -1601,6 +1649,7 @@ void ncsi_stop_dev(struct ncsi_dev *nd)
1601 } 1649 }
1602 } 1650 }
1603 1651
1652 netdev_printk(KERN_DEBUG, ndp->ndev.dev, "NCSI: Stopping device\n");
1604 ncsi_report_link(ndp, true); 1653 ncsi_report_link(ndp, true);
1605} 1654}
1606EXPORT_SYMBOL_GPL(ncsi_stop_dev); 1655EXPORT_SYMBOL_GPL(ncsi_stop_dev);
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 265b9a892d41..efd933ff5570 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -146,7 +146,7 @@ static int ncsi_rsp_handler_ec(struct ncsi_request *nr)
146 146
147 ncm = &nc->modes[NCSI_MODE_ENABLE]; 147 ncm = &nc->modes[NCSI_MODE_ENABLE];
148 if (ncm->enable) 148 if (ncm->enable)
149 return -EBUSY; 149 return 0;
150 150
151 ncm->enable = 1; 151 ncm->enable = 1;
152 return 0; 152 return 0;
@@ -173,7 +173,7 @@ static int ncsi_rsp_handler_dc(struct ncsi_request *nr)
173 173
174 ncm = &nc->modes[NCSI_MODE_ENABLE]; 174 ncm = &nc->modes[NCSI_MODE_ENABLE];
175 if (!ncm->enable) 175 if (!ncm->enable)
176 return -EBUSY; 176 return 0;
177 177
178 ncm->enable = 0; 178 ncm->enable = 0;
179 return 0; 179 return 0;
@@ -217,7 +217,7 @@ static int ncsi_rsp_handler_ecnt(struct ncsi_request *nr)
217 217
218 ncm = &nc->modes[NCSI_MODE_TX_ENABLE]; 218 ncm = &nc->modes[NCSI_MODE_TX_ENABLE];
219 if (ncm->enable) 219 if (ncm->enable)
220 return -EBUSY; 220 return 0;
221 221
222 ncm->enable = 1; 222 ncm->enable = 1;
223 return 0; 223 return 0;
@@ -239,7 +239,7 @@ static int ncsi_rsp_handler_dcnt(struct ncsi_request *nr)
239 239
240 ncm = &nc->modes[NCSI_MODE_TX_ENABLE]; 240 ncm = &nc->modes[NCSI_MODE_TX_ENABLE];
241 if (!ncm->enable) 241 if (!ncm->enable)
242 return -EBUSY; 242 return 0;
243 243
244 ncm->enable = 1; 244 ncm->enable = 1;
245 return 0; 245 return 0;
@@ -263,7 +263,7 @@ static int ncsi_rsp_handler_ae(struct ncsi_request *nr)
263 /* Check if the AEN has been enabled */ 263 /* Check if the AEN has been enabled */
264 ncm = &nc->modes[NCSI_MODE_AEN]; 264 ncm = &nc->modes[NCSI_MODE_AEN];
265 if (ncm->enable) 265 if (ncm->enable)
266 return -EBUSY; 266 return 0;
267 267
268 /* Update to AEN configuration */ 268 /* Update to AEN configuration */
269 cmd = (struct ncsi_cmd_ae_pkt *)skb_network_header(nr->cmd); 269 cmd = (struct ncsi_cmd_ae_pkt *)skb_network_header(nr->cmd);
@@ -382,7 +382,7 @@ static int ncsi_rsp_handler_ev(struct ncsi_request *nr)
382 /* Check if VLAN mode has been enabled */ 382 /* Check if VLAN mode has been enabled */
383 ncm = &nc->modes[NCSI_MODE_VLAN]; 383 ncm = &nc->modes[NCSI_MODE_VLAN];
384 if (ncm->enable) 384 if (ncm->enable)
385 return -EBUSY; 385 return 0;
386 386
387 /* Update to VLAN mode */ 387 /* Update to VLAN mode */
388 cmd = (struct ncsi_cmd_ev_pkt *)skb_network_header(nr->cmd); 388 cmd = (struct ncsi_cmd_ev_pkt *)skb_network_header(nr->cmd);
@@ -409,7 +409,7 @@ static int ncsi_rsp_handler_dv(struct ncsi_request *nr)
409 /* Check if VLAN mode has been enabled */ 409 /* Check if VLAN mode has been enabled */
410 ncm = &nc->modes[NCSI_MODE_VLAN]; 410 ncm = &nc->modes[NCSI_MODE_VLAN];
411 if (!ncm->enable) 411 if (!ncm->enable)
412 return -EBUSY; 412 return 0;
413 413
414 /* Update to VLAN mode */ 414 /* Update to VLAN mode */
415 ncm->enable = 0; 415 ncm->enable = 0;
@@ -455,13 +455,10 @@ static int ncsi_rsp_handler_sma(struct ncsi_request *nr)
455 455
456 bitmap = &ncf->bitmap; 456 bitmap = &ncf->bitmap;
457 if (cmd->at_e & 0x1) { 457 if (cmd->at_e & 0x1) {
458 if (test_and_set_bit(cmd->index, bitmap)) 458 set_bit(cmd->index, bitmap);
459 return -EBUSY;
460 memcpy(ncf->data + 6 * cmd->index, cmd->mac, 6); 459 memcpy(ncf->data + 6 * cmd->index, cmd->mac, 6);
461 } else { 460 } else {
462 if (!test_and_clear_bit(cmd->index, bitmap)) 461 clear_bit(cmd->index, bitmap);
463 return -EBUSY;
464
465 memset(ncf->data + 6 * cmd->index, 0, 6); 462 memset(ncf->data + 6 * cmd->index, 0, 6);
466 } 463 }
467 464
@@ -485,7 +482,7 @@ static int ncsi_rsp_handler_ebf(struct ncsi_request *nr)
485 /* Check if broadcast filter has been enabled */ 482 /* Check if broadcast filter has been enabled */
486 ncm = &nc->modes[NCSI_MODE_BC]; 483 ncm = &nc->modes[NCSI_MODE_BC];
487 if (ncm->enable) 484 if (ncm->enable)
488 return -EBUSY; 485 return 0;
489 486
490 /* Update to broadcast filter mode */ 487 /* Update to broadcast filter mode */
491 cmd = (struct ncsi_cmd_ebf_pkt *)skb_network_header(nr->cmd); 488 cmd = (struct ncsi_cmd_ebf_pkt *)skb_network_header(nr->cmd);
@@ -511,7 +508,7 @@ static int ncsi_rsp_handler_dbf(struct ncsi_request *nr)
511 /* Check if broadcast filter isn't enabled */ 508 /* Check if broadcast filter isn't enabled */
512 ncm = &nc->modes[NCSI_MODE_BC]; 509 ncm = &nc->modes[NCSI_MODE_BC];
513 if (!ncm->enable) 510 if (!ncm->enable)
514 return -EBUSY; 511 return 0;
515 512
516 /* Update to broadcast filter mode */ 513 /* Update to broadcast filter mode */
517 ncm->enable = 0; 514 ncm->enable = 0;
@@ -538,7 +535,7 @@ static int ncsi_rsp_handler_egmf(struct ncsi_request *nr)
538 /* Check if multicast filter has been enabled */ 535 /* Check if multicast filter has been enabled */
539 ncm = &nc->modes[NCSI_MODE_MC]; 536 ncm = &nc->modes[NCSI_MODE_MC];
540 if (ncm->enable) 537 if (ncm->enable)
541 return -EBUSY; 538 return 0;
542 539
543 /* Update to multicast filter mode */ 540 /* Update to multicast filter mode */
544 cmd = (struct ncsi_cmd_egmf_pkt *)skb_network_header(nr->cmd); 541 cmd = (struct ncsi_cmd_egmf_pkt *)skb_network_header(nr->cmd);
@@ -564,7 +561,7 @@ static int ncsi_rsp_handler_dgmf(struct ncsi_request *nr)
564 /* Check if multicast filter has been enabled */ 561 /* Check if multicast filter has been enabled */
565 ncm = &nc->modes[NCSI_MODE_MC]; 562 ncm = &nc->modes[NCSI_MODE_MC];
566 if (!ncm->enable) 563 if (!ncm->enable)
567 return -EBUSY; 564 return 0;
568 565
569 /* Update to multicast filter mode */ 566 /* Update to multicast filter mode */
570 ncm->enable = 0; 567 ncm->enable = 0;
@@ -591,7 +588,7 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr)
591 /* Check if flow control has been enabled */ 588 /* Check if flow control has been enabled */
592 ncm = &nc->modes[NCSI_MODE_FC]; 589 ncm = &nc->modes[NCSI_MODE_FC];
593 if (ncm->enable) 590 if (ncm->enable)
594 return -EBUSY; 591 return 0;
595 592
596 /* Update to flow control mode */ 593 /* Update to flow control mode */
597 cmd = (struct ncsi_cmd_snfc_pkt *)skb_network_header(nr->cmd); 594 cmd = (struct ncsi_cmd_snfc_pkt *)skb_network_header(nr->cmd);
@@ -959,7 +956,7 @@ static struct ncsi_rsp_handler {
959 { NCSI_PKT_RSP_EGMF, 4, ncsi_rsp_handler_egmf }, 956 { NCSI_PKT_RSP_EGMF, 4, ncsi_rsp_handler_egmf },
960 { NCSI_PKT_RSP_DGMF, 4, ncsi_rsp_handler_dgmf }, 957 { NCSI_PKT_RSP_DGMF, 4, ncsi_rsp_handler_dgmf },
961 { NCSI_PKT_RSP_SNFC, 4, ncsi_rsp_handler_snfc }, 958 { NCSI_PKT_RSP_SNFC, 4, ncsi_rsp_handler_snfc },
962 { NCSI_PKT_RSP_GVI, 36, ncsi_rsp_handler_gvi }, 959 { NCSI_PKT_RSP_GVI, 40, ncsi_rsp_handler_gvi },
963 { NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc }, 960 { NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc },
964 { NCSI_PKT_RSP_GP, -1, ncsi_rsp_handler_gp }, 961 { NCSI_PKT_RSP_GP, -1, ncsi_rsp_handler_gp },
965 { NCSI_PKT_RSP_GCPS, 172, ncsi_rsp_handler_gcps }, 962 { NCSI_PKT_RSP_GCPS, 172, ncsi_rsp_handler_gcps },
@@ -1032,11 +1029,19 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
1032 if (payload < 0) 1029 if (payload < 0)
1033 payload = ntohs(hdr->length); 1030 payload = ntohs(hdr->length);
1034 ret = ncsi_validate_rsp_pkt(nr, payload); 1031 ret = ncsi_validate_rsp_pkt(nr, payload);
1035 if (ret) 1032 if (ret) {
1033 netdev_warn(ndp->ndev.dev,
1034 "NCSI: 'bad' packet ignored for type 0x%x\n",
1035 hdr->type);
1036 goto out; 1036 goto out;
1037 }
1037 1038
1038 /* Process the packet */ 1039 /* Process the packet */
1039 ret = nrh->handler(nr); 1040 ret = nrh->handler(nr);
1041 if (ret)
1042 netdev_err(ndp->ndev.dev,
1043 "NCSI: Handler for packet type 0x%x returned %d\n",
1044 hdr->type, ret);
1040out: 1045out:
1041 ncsi_free_request(nr); 1046 ncsi_free_request(nr);
1042 return ret; 1047 return ret;
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index d3891c93edd6..f78ed2470831 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o 2netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
2 3
3nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o 4nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile
index 28ec148df02d..a445a6bf4f11 100644
--- a/net/netfilter/ipset/Makefile
+++ b/net/netfilter/ipset/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the ipset modules 3# Makefile for the ipset modules
3# 4#
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 8ad2b52a0b32..5ca18f07683b 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -37,11 +37,11 @@
37#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id))) 37#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
38 38
39static void 39static void
40mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) 40mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
41{ 41{
42 struct mtype *map = set->data; 42 struct mtype *map = set->data;
43 43
44 setup_timer(&map->gc, gc, (unsigned long)set); 44 timer_setup(&map->gc, gc, 0);
45 mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); 45 mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
46} 46}
47 47
@@ -272,10 +272,10 @@ out:
272} 272}
273 273
274static void 274static void
275mtype_gc(unsigned long ul_set) 275mtype_gc(struct timer_list *t)
276{ 276{
277 struct ip_set *set = (struct ip_set *)ul_set; 277 struct mtype *map = from_timer(map, t, gc);
278 struct mtype *map = set->data; 278 struct ip_set *set = map->set;
279 void *x; 279 void *x;
280 u32 id; 280 u32 id;
281 281
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 4783efff0bde..d8975a0b4282 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -48,6 +48,7 @@ struct bitmap_ip {
48 size_t memsize; /* members size */ 48 size_t memsize; /* members size */
49 u8 netmask; /* subnet netmask */ 49 u8 netmask; /* subnet netmask */
50 struct timer_list gc; /* garbage collection */ 50 struct timer_list gc; /* garbage collection */
51 struct ip_set *set; /* attached to this ip_set */
51 unsigned char extensions[0] /* data extensions */ 52 unsigned char extensions[0] /* data extensions */
52 __aligned(__alignof__(u64)); 53 __aligned(__alignof__(u64));
53}; 54};
@@ -232,6 +233,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
232 map->netmask = netmask; 233 map->netmask = netmask;
233 set->timeout = IPSET_NO_TIMEOUT; 234 set->timeout = IPSET_NO_TIMEOUT;
234 235
236 map->set = set;
235 set->data = map; 237 set->data = map;
236 set->family = NFPROTO_IPV4; 238 set->family = NFPROTO_IPV4;
237 239
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 9a065f672d3a..4c279fbd2d5d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -52,6 +52,7 @@ struct bitmap_ipmac {
52 u32 elements; /* number of max elements in the set */ 52 u32 elements; /* number of max elements in the set */
53 size_t memsize; /* members size */ 53 size_t memsize; /* members size */
54 struct timer_list gc; /* garbage collector */ 54 struct timer_list gc; /* garbage collector */
55 struct ip_set *set; /* attached to this ip_set */
55 unsigned char extensions[0] /* MAC + data extensions */ 56 unsigned char extensions[0] /* MAC + data extensions */
56 __aligned(__alignof__(u64)); 57 __aligned(__alignof__(u64));
57}; 58};
@@ -307,6 +308,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
307 map->elements = elements; 308 map->elements = elements;
308 set->timeout = IPSET_NO_TIMEOUT; 309 set->timeout = IPSET_NO_TIMEOUT;
309 310
311 map->set = set;
310 set->data = map; 312 set->data = map;
311 set->family = NFPROTO_IPV4; 313 set->family = NFPROTO_IPV4;
312 314
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 7f0c733358a4..7f9bbd7c98b5 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -40,6 +40,7 @@ struct bitmap_port {
40 u32 elements; /* number of max elements in the set */ 40 u32 elements; /* number of max elements in the set */
41 size_t memsize; /* members size */ 41 size_t memsize; /* members size */
42 struct timer_list gc; /* garbage collection */ 42 struct timer_list gc; /* garbage collection */
43 struct ip_set *set; /* attached to this ip_set */
43 unsigned char extensions[0] /* data extensions */ 44 unsigned char extensions[0] /* data extensions */
44 __aligned(__alignof__(u64)); 45 __aligned(__alignof__(u64));
45}; 46};
@@ -214,6 +215,7 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
214 map->last_port = last_port; 215 map->last_port = last_port;
215 set->timeout = IPSET_NO_TIMEOUT; 216 set->timeout = IPSET_NO_TIMEOUT;
216 217
218 map->set = set;
217 set->data = map; 219 set->data = map;
218 set->family = NFPROTO_UNSPEC; 220 set->family = NFPROTO_UNSPEC;
219 221
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index e495b5e484b1..cf84f7b37cd9 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1191,14 +1191,17 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
1191 from->family == to->family)) 1191 from->family == to->family))
1192 return -IPSET_ERR_TYPE_MISMATCH; 1192 return -IPSET_ERR_TYPE_MISMATCH;
1193 1193
1194 if (from->ref_netlink || to->ref_netlink) 1194 write_lock_bh(&ip_set_ref_lock);
1195
1196 if (from->ref_netlink || to->ref_netlink) {
1197 write_unlock_bh(&ip_set_ref_lock);
1195 return -EBUSY; 1198 return -EBUSY;
1199 }
1196 1200
1197 strncpy(from_name, from->name, IPSET_MAXNAMELEN); 1201 strncpy(from_name, from->name, IPSET_MAXNAMELEN);
1198 strncpy(from->name, to->name, IPSET_MAXNAMELEN); 1202 strncpy(from->name, to->name, IPSET_MAXNAMELEN);
1199 strncpy(to->name, from_name, IPSET_MAXNAMELEN); 1203 strncpy(to->name, from_name, IPSET_MAXNAMELEN);
1200 1204
1201 write_lock_bh(&ip_set_ref_lock);
1202 swap(from->ref, to->ref); 1205 swap(from->ref, to->ref);
1203 ip_set(inst, from_id) = to; 1206 ip_set(inst, from_id) = to;
1204 ip_set(inst, to_id) = from; 1207 ip_set(inst, to_id) = from;
@@ -2072,25 +2075,28 @@ static struct pernet_operations ip_set_net_ops = {
2072static int __init 2075static int __init
2073ip_set_init(void) 2076ip_set_init(void)
2074{ 2077{
2075 int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); 2078 int ret = register_pernet_subsys(&ip_set_net_ops);
2079
2080 if (ret) {
2081 pr_err("ip_set: cannot register pernet_subsys.\n");
2082 return ret;
2083 }
2076 2084
2085 ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
2077 if (ret != 0) { 2086 if (ret != 0) {
2078 pr_err("ip_set: cannot register with nfnetlink.\n"); 2087 pr_err("ip_set: cannot register with nfnetlink.\n");
2088 unregister_pernet_subsys(&ip_set_net_ops);
2079 return ret; 2089 return ret;
2080 } 2090 }
2091
2081 ret = nf_register_sockopt(&so_set); 2092 ret = nf_register_sockopt(&so_set);
2082 if (ret != 0) { 2093 if (ret != 0) {
2083 pr_err("SO_SET registry failed: %d\n", ret); 2094 pr_err("SO_SET registry failed: %d\n", ret);
2084 nfnetlink_subsys_unregister(&ip_set_netlink_subsys); 2095 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
2096 unregister_pernet_subsys(&ip_set_net_ops);
2085 return ret; 2097 return ret;
2086 } 2098 }
2087 ret = register_pernet_subsys(&ip_set_net_ops); 2099
2088 if (ret) {
2089 pr_err("ip_set: cannot register pernet_subsys.\n");
2090 nf_unregister_sockopt(&so_set);
2091 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
2092 return ret;
2093 }
2094 pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL); 2100 pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL);
2095 return 0; 2101 return 0;
2096} 2102}
@@ -2098,9 +2104,10 @@ ip_set_init(void)
2098static void __exit 2104static void __exit
2099ip_set_fini(void) 2105ip_set_fini(void)
2100{ 2106{
2101 unregister_pernet_subsys(&ip_set_net_ops);
2102 nf_unregister_sockopt(&so_set); 2107 nf_unregister_sockopt(&so_set);
2103 nfnetlink_subsys_unregister(&ip_set_netlink_subsys); 2108 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
2109
2110 unregister_pernet_subsys(&ip_set_net_ops);
2104 pr_debug("these are the famous last words\n"); 2111 pr_debug("these are the famous last words\n");
2105} 2112}
2106 2113
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index f236c0bc7b3f..efffc8eabafe 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -280,6 +280,7 @@ htable_bits(u32 hashsize)
280struct htype { 280struct htype {
281 struct htable __rcu *table; /* the hash table */ 281 struct htable __rcu *table; /* the hash table */
282 struct timer_list gc; /* garbage collection when timeout enabled */ 282 struct timer_list gc; /* garbage collection when timeout enabled */
283 struct ip_set *set; /* attached to this ip_set */
283 u32 maxelem; /* max elements in the hash */ 284 u32 maxelem; /* max elements in the hash */
284 u32 initval; /* random jhash init value */ 285 u32 initval; /* random jhash init value */
285#ifdef IP_SET_HASH_WITH_MARKMASK 286#ifdef IP_SET_HASH_WITH_MARKMASK
@@ -429,11 +430,11 @@ mtype_destroy(struct ip_set *set)
429} 430}
430 431
431static void 432static void
432mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) 433mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
433{ 434{
434 struct htype *h = set->data; 435 struct htype *h = set->data;
435 436
436 setup_timer(&h->gc, gc, (unsigned long)set); 437 timer_setup(&h->gc, gc, 0);
437 mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); 438 mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
438 pr_debug("gc initialized, run in every %u\n", 439 pr_debug("gc initialized, run in every %u\n",
439 IPSET_GC_PERIOD(set->timeout)); 440 IPSET_GC_PERIOD(set->timeout));
@@ -526,10 +527,10 @@ mtype_expire(struct ip_set *set, struct htype *h)
526} 527}
527 528
528static void 529static void
529mtype_gc(unsigned long ul_set) 530mtype_gc(struct timer_list *t)
530{ 531{
531 struct ip_set *set = (struct ip_set *)ul_set; 532 struct htype *h = from_timer(h, t, gc);
532 struct htype *h = set->data; 533 struct ip_set *set = h->set;
533 534
534 pr_debug("called\n"); 535 pr_debug("called\n");
535 spin_lock_bh(&set->lock); 536 spin_lock_bh(&set->lock);
@@ -1041,12 +1042,24 @@ out:
1041static int 1042static int
1042mtype_head(struct ip_set *set, struct sk_buff *skb) 1043mtype_head(struct ip_set *set, struct sk_buff *skb)
1043{ 1044{
1044 const struct htype *h = set->data; 1045 struct htype *h = set->data;
1045 const struct htable *t; 1046 const struct htable *t;
1046 struct nlattr *nested; 1047 struct nlattr *nested;
1047 size_t memsize; 1048 size_t memsize;
1048 u8 htable_bits; 1049 u8 htable_bits;
1049 1050
1051 /* If any members have expired, set->elements will be wrong
1052 * mytype_expire function will update it with the right count.
1053 * we do not hold set->lock here, so grab it first.
1054 * set->elements can still be incorrect in the case of a huge set,
1055 * because elements might time out during the listing.
1056 */
1057 if (SET_WITH_TIMEOUT(set)) {
1058 spin_lock_bh(&set->lock);
1059 mtype_expire(set, h);
1060 spin_unlock_bh(&set->lock);
1061 }
1062
1050 rcu_read_lock_bh(); 1063 rcu_read_lock_bh();
1051 t = rcu_dereference_bh_nfnl(h->table); 1064 t = rcu_dereference_bh_nfnl(h->table);
1052 memsize = mtype_ahash_memsize(h, t) + set->ext_size; 1065 memsize = mtype_ahash_memsize(h, t) + set->ext_size;
@@ -1302,6 +1315,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
1302 t->htable_bits = hbits; 1315 t->htable_bits = hbits;
1303 RCU_INIT_POINTER(h->table, t); 1316 RCU_INIT_POINTER(h->table, t);
1304 1317
1318 h->set = set;
1305 set->data = h; 1319 set->data = h;
1306#ifndef IP_SET_PROTO_UNDEF 1320#ifndef IP_SET_PROTO_UNDEF
1307 if (set->family == NFPROTO_IPV4) { 1321 if (set->family == NFPROTO_IPV4) {
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 20bfbd315f61..613eb212cb48 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -123,13 +123,12 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
123 return ret; 123 return ret;
124 124
125 ip &= ip_set_hostmask(h->netmask); 125 ip &= ip_set_hostmask(h->netmask);
126 e.ip = htonl(ip);
127 if (e.ip == 0)
128 return -IPSET_ERR_HASH_ELEM;
126 129
127 if (adt == IPSET_TEST) { 130 if (adt == IPSET_TEST)
128 e.ip = htonl(ip);
129 if (e.ip == 0)
130 return -IPSET_ERR_HASH_ELEM;
131 return adtfn(set, &e, &ext, &ext, flags); 131 return adtfn(set, &e, &ext, &ext, flags);
132 }
133 132
134 ip_to = ip; 133 ip_to = ip;
135 if (tb[IPSET_ATTR_IP_TO]) { 134 if (tb[IPSET_ATTR_IP_TO]) {
@@ -148,17 +147,20 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
148 147
149 hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); 148 hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
150 149
151 if (retried) 150 if (retried) {
152 ip = ntohl(h->next.ip); 151 ip = ntohl(h->next.ip);
153 for (; !before(ip_to, ip); ip += hosts) {
154 e.ip = htonl(ip); 152 e.ip = htonl(ip);
155 if (e.ip == 0) 153 }
156 return -IPSET_ERR_HASH_ELEM; 154 for (; ip <= ip_to;) {
157 ret = adtfn(set, &e, &ext, &ext, flags); 155 ret = adtfn(set, &e, &ext, &ext, flags);
158
159 if (ret && !ip_set_eexist(ret, flags)) 156 if (ret && !ip_set_eexist(ret, flags))
160 return ret; 157 return ret;
161 158
159 ip += hosts;
160 e.ip = htonl(ip);
161 if (e.ip == 0)
162 return 0;
163
162 ret = 0; 164 ret = 0;
163 } 165 }
164 return ret; 166 return ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index b64cf14e8352..f3ba8348cf9d 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -149,7 +149,7 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
149 149
150 if (retried) 150 if (retried)
151 ip = ntohl(h->next.ip); 151 ip = ntohl(h->next.ip);
152 for (; !before(ip_to, ip); ip++) { 152 for (; ip <= ip_to; ip++) {
153 e.ip = htonl(ip); 153 e.ip = htonl(ip);
154 ret = adtfn(set, &e, &ext, &ext, flags); 154 ret = adtfn(set, &e, &ext, &ext, flags);
155 155
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index f438740e6c6a..ddb8039ec1d2 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -178,7 +178,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
178 178
179 if (retried) 179 if (retried)
180 ip = ntohl(h->next.ip); 180 ip = ntohl(h->next.ip);
181 for (; !before(ip_to, ip); ip++) { 181 for (; ip <= ip_to; ip++) {
182 p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) 182 p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
183 : port; 183 : port;
184 for (; p <= port_to; p++) { 184 for (; p <= port_to; p++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 6215fb898c50..a7f4d7a85420 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -185,7 +185,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
185 185
186 if (retried) 186 if (retried)
187 ip = ntohl(h->next.ip); 187 ip = ntohl(h->next.ip);
188 for (; !before(ip_to, ip); ip++) { 188 for (; ip <= ip_to; ip++) {
189 p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) 189 p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
190 : port; 190 : port;
191 for (; p <= port_to; p++) { 191 for (; p <= port_to; p++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 5ab1b99a53c2..0f164e986bf1 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -271,7 +271,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
271 271
272 if (retried) 272 if (retried)
273 ip = ntohl(h->next.ip); 273 ip = ntohl(h->next.ip);
274 for (; !before(ip_to, ip); ip++) { 274 for (; ip <= ip_to; ip++) {
275 e.ip = htonl(ip); 275 e.ip = htonl(ip);
276 p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) 276 p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
277 : port; 277 : port;
@@ -281,7 +281,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
281 ip == ntohl(h->next.ip) && 281 ip == ntohl(h->next.ip) &&
282 p == ntohs(h->next.port) 282 p == ntohs(h->next.port)
283 ? ntohl(h->next.ip2) : ip2_from; 283 ? ntohl(h->next.ip2) : ip2_from;
284 while (!after(ip2, ip2_to)) { 284 while (ip2 <= ip2_to) {
285 e.ip2 = htonl(ip2); 285 e.ip2 = htonl(ip2);
286 ip2_last = ip_set_range_to_cidr(ip2, ip2_to, 286 ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
287 &cidr); 287 &cidr);
@@ -434,7 +434,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
434 if (unlikely(tb[IPSET_ATTR_IP_TO])) 434 if (unlikely(tb[IPSET_ATTR_IP_TO]))
435 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; 435 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
436 if (unlikely(tb[IPSET_ATTR_CIDR])) { 436 if (unlikely(tb[IPSET_ATTR_CIDR])) {
437 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 437 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
438 438
439 if (cidr != HOST_MASK) 439 if (cidr != HOST_MASK)
440 return -IPSET_ERR_INVALID_CIDR; 440 return -IPSET_ERR_INVALID_CIDR;
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 5d9e895452e7..1c67a1761e45 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -193,7 +193,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
193 } 193 }
194 if (retried) 194 if (retried)
195 ip = ntohl(h->next.ip); 195 ip = ntohl(h->next.ip);
196 while (!after(ip, ip_to)) { 196 while (ip <= ip_to) {
197 e.ip = htonl(ip); 197 e.ip = htonl(ip);
198 last = ip_set_range_to_cidr(ip, ip_to, &e.cidr); 198 last = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
199 ret = adtfn(set, &e, &ext, &ext, flags); 199 ret = adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 44cf11939c91..d417074f1c1a 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -255,7 +255,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
255 255
256 if (retried) 256 if (retried)
257 ip = ntohl(h->next.ip); 257 ip = ntohl(h->next.ip);
258 while (!after(ip, ip_to)) { 258 while (ip <= ip_to) {
259 e.ip = htonl(ip); 259 e.ip = htonl(ip);
260 last = ip_set_range_to_cidr(ip, ip_to, &e.cidr); 260 last = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
261 ret = adtfn(set, &e, &ext, &ext, flags); 261 ret = adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index db614e13b193..7f9ae2e9645b 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -250,13 +250,13 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
250 if (retried) 250 if (retried)
251 ip = ntohl(h->next.ip[0]); 251 ip = ntohl(h->next.ip[0]);
252 252
253 while (!after(ip, ip_to)) { 253 while (ip <= ip_to) {
254 e.ip[0] = htonl(ip); 254 e.ip[0] = htonl(ip);
255 last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); 255 last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
256 ip2 = (retried && 256 ip2 = (retried &&
257 ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1]) 257 ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1])
258 : ip2_from; 258 : ip2_from;
259 while (!after(ip2, ip2_to)) { 259 while (ip2 <= ip2_to) {
260 e.ip[1] = htonl(ip2); 260 e.ip[1] = htonl(ip2);
261 last2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); 261 last2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
262 ret = adtfn(set, &e, &ext, &ext, flags); 262 ret = adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 54b64b6cd0cd..e6ef382febe4 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -241,7 +241,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
241 241
242 if (retried) 242 if (retried)
243 ip = ntohl(h->next.ip); 243 ip = ntohl(h->next.ip);
244 while (!after(ip, ip_to)) { 244 while (ip <= ip_to) {
245 e.ip = htonl(ip); 245 e.ip = htonl(ip);
246 last = ip_set_range_to_cidr(ip, ip_to, &cidr); 246 last = ip_set_range_to_cidr(ip, ip_to, &cidr);
247 e.cidr = cidr - 1; 247 e.cidr = cidr - 1;
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index aff846960ac4..8602f2595a1a 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -291,7 +291,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
291 if (retried) 291 if (retried)
292 ip = ntohl(h->next.ip[0]); 292 ip = ntohl(h->next.ip[0]);
293 293
294 while (!after(ip, ip_to)) { 294 while (ip <= ip_to) {
295 e.ip[0] = htonl(ip); 295 e.ip[0] = htonl(ip);
296 ip_last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); 296 ip_last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
297 p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port) 297 p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port)
@@ -301,7 +301,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
301 ip2 = (retried && ip == ntohl(h->next.ip[0]) && 301 ip2 = (retried && ip == ntohl(h->next.ip[0]) &&
302 p == ntohs(h->next.port)) ? ntohl(h->next.ip[1]) 302 p == ntohs(h->next.port)) ? ntohl(h->next.ip[1])
303 : ip2_from; 303 : ip2_from;
304 while (!after(ip2, ip2_to)) { 304 while (ip2 <= ip2_to) {
305 e.ip[1] = htonl(ip2); 305 e.ip[1] = htonl(ip2);
306 ip2_last = ip_set_range_to_cidr(ip2, ip2_to, 306 ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
307 &e.cidr[1]); 307 &e.cidr[1]);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 178d4eba013b..e864681b8dc5 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -44,6 +44,7 @@ struct set_adt_elem {
44struct list_set { 44struct list_set {
45 u32 size; /* size of set list array */ 45 u32 size; /* size of set list array */
46 struct timer_list gc; /* garbage collection */ 46 struct timer_list gc; /* garbage collection */
47 struct ip_set *set; /* attached to this ip_set */
47 struct net *net; /* namespace */ 48 struct net *net; /* namespace */
48 struct list_head members; /* the set members */ 49 struct list_head members; /* the set members */
49}; 50};
@@ -453,7 +454,6 @@ static size_t
453list_set_memsize(const struct list_set *map, size_t dsize) 454list_set_memsize(const struct list_set *map, size_t dsize)
454{ 455{
455 struct set_elem *e; 456 struct set_elem *e;
456 size_t memsize;
457 u32 n = 0; 457 u32 n = 0;
458 458
459 rcu_read_lock(); 459 rcu_read_lock();
@@ -461,9 +461,7 @@ list_set_memsize(const struct list_set *map, size_t dsize)
461 n++; 461 n++;
462 rcu_read_unlock(); 462 rcu_read_unlock();
463 463
464 memsize = sizeof(*map) + n * dsize; 464 return (sizeof(*map) + n * dsize);
465
466 return memsize;
467} 465}
468 466
469static int 467static int
@@ -571,10 +569,10 @@ static const struct ip_set_type_variant set_variant = {
571}; 569};
572 570
573static void 571static void
574list_set_gc(unsigned long ul_set) 572list_set_gc(struct timer_list *t)
575{ 573{
576 struct ip_set *set = (struct ip_set *)ul_set; 574 struct list_set *map = from_timer(map, t, gc);
577 struct list_set *map = set->data; 575 struct ip_set *set = map->set;
578 576
579 spin_lock_bh(&set->lock); 577 spin_lock_bh(&set->lock);
580 set_cleanup_entries(set); 578 set_cleanup_entries(set);
@@ -585,11 +583,11 @@ list_set_gc(unsigned long ul_set)
585} 583}
586 584
587static void 585static void
588list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) 586list_set_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
589{ 587{
590 struct list_set *map = set->data; 588 struct list_set *map = set->data;
591 589
592 setup_timer(&map->gc, gc, (unsigned long)set); 590 timer_setup(&map->gc, gc, 0);
593 mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); 591 mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
594} 592}
595 593
@@ -606,6 +604,7 @@ init_list_set(struct net *net, struct ip_set *set, u32 size)
606 604
607 map->size = size; 605 map->size = size;
608 map->net = net; 606 map->net = net;
607 map->set = set;
609 INIT_LIST_HEAD(&map->members); 608 INIT_LIST_HEAD(&map->members);
610 set->data = map; 609 set->data = map;
611 610
diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c
index 1c8a42c1056c..d5be9c25fad6 100644
--- a/net/netfilter/ipset/pfxlen.c
+++ b/net/netfilter/ipset/pfxlen.c
@@ -3,6 +3,141 @@
3 3
4/* Prefixlen maps for fast conversions, by Jan Engelhardt. */ 4/* Prefixlen maps for fast conversions, by Jan Engelhardt. */
5 5
6#ifdef E
7#undef E
8#endif
9
10#define PREFIXES_MAP \
11 E(0x00000000, 0x00000000, 0x00000000, 0x00000000), \
12 E(0x80000000, 0x00000000, 0x00000000, 0x00000000), \
13 E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), \
14 E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), \
15 E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), \
16 E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), \
17 E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), \
18 E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), \
19 E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), \
20 E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), \
21 E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), \
22 E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), \
23 E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), \
24 E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), \
25 E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), \
26 E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), \
27 E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), \
28 E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), \
29 E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), \
30 E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), \
31 E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), \
32 E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), \
33 E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), \
34 E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), \
35 E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), \
36 E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), \
37 E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), \
38 E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), \
39 E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), \
40 E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), \
41 E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), \
42 E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), \
43 E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), \
44 E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), \
45 E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), \
46 E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), \
47 E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), \
48 E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), \
49 E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), \
50 E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), \
51 E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), \
52 E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), \
53 E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), \
54 E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), \
55 E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), \
56 E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), \
57 E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), \
58 E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), \
59 E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), \
60 E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), \
61 E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), \
62 E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), \
63 E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), \
64 E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), \
65 E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), \
66 E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), \
67 E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), \
68 E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), \
69 E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), \
70 E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), \
71 E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), \
72 E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), \
73 E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), \
74 E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), \
75 E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), \
76 E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), \
77 E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), \
78 E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), \
79 E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), \
80 E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), \
81 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), \
82 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), \
83 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), \
84 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), \
85 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), \
86 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), \
87 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), \
88 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), \
89 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), \
90 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), \
91 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), \
92 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), \
93 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), \
94 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), \
95 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), \
96 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), \
97 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), \
98 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), \
99 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), \
100 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), \
101 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), \
102 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), \
103 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), \
104 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), \
105 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), \
106 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), \
107 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), \
108 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), \
109 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), \
110 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), \
111 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), \
112 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), \
113 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), \
114 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), \
115 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), \
116 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), \
117 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), \
118 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), \
119 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), \
120 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), \
121 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), \
122 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), \
123 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), \
124 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), \
125 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), \
126 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), \
127 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), \
128 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), \
129 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), \
130 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), \
131 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), \
132 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), \
133 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), \
134 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), \
135 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), \
136 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), \
137 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), \
138 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), \
139 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
140
6#define E(a, b, c, d) \ 141#define E(a, b, c, d) \
7 {.ip6 = { \ 142 {.ip6 = { \
8 htonl(a), htonl(b), \ 143 htonl(a), htonl(b), \
@@ -13,135 +148,7 @@
13 * just use prefixlen_netmask_map[prefixlength].ip. 148 * just use prefixlen_netmask_map[prefixlength].ip.
14 */ 149 */
15const union nf_inet_addr ip_set_netmask_map[] = { 150const union nf_inet_addr ip_set_netmask_map[] = {
16 E(0x00000000, 0x00000000, 0x00000000, 0x00000000), 151 PREFIXES_MAP
17 E(0x80000000, 0x00000000, 0x00000000, 0x00000000),
18 E(0xC0000000, 0x00000000, 0x00000000, 0x00000000),
19 E(0xE0000000, 0x00000000, 0x00000000, 0x00000000),
20 E(0xF0000000, 0x00000000, 0x00000000, 0x00000000),
21 E(0xF8000000, 0x00000000, 0x00000000, 0x00000000),
22 E(0xFC000000, 0x00000000, 0x00000000, 0x00000000),
23 E(0xFE000000, 0x00000000, 0x00000000, 0x00000000),
24 E(0xFF000000, 0x00000000, 0x00000000, 0x00000000),
25 E(0xFF800000, 0x00000000, 0x00000000, 0x00000000),
26 E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000),
27 E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000),
28 E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000),
29 E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000),
30 E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000),
31 E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000),
32 E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000),
33 E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000),
34 E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000),
35 E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000),
36 E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000),
37 E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000),
38 E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000),
39 E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000),
40 E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000),
41 E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000),
42 E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000),
43 E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000),
44 E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000),
45 E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000),
46 E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000),
47 E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000),
48 E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000),
49 E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000),
50 E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000),
51 E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000),
52 E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000),
53 E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000),
54 E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000),
55 E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000),
56 E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000),
57 E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000),
58 E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000),
59 E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000),
60 E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000),
61 E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000),
62 E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000),
63 E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000),
64 E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000),
65 E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000),
66 E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000),
67 E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000),
68 E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000),
69 E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000),
70 E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000),
71 E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000),
72 E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000),
73 E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000),
74 E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000),
75 E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000),
76 E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000),
77 E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000),
78 E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000),
79 E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000),
80 E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000),
81 E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000),
82 E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000),
83 E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000),
84 E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000),
85 E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000),
86 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000),
87 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000),
88 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000),
89 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000),
90 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000),
91 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000),
92 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000),
93 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000),
94 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000),
95 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000),
96 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000),
97 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000),
98 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000),
99 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000),
100 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000),
101 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000),
102 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000),
103 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000),
104 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000),
105 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000),
106 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000),
107 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000),
108 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000),
109 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000),
110 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000),
111 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000),
112 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000),
113 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000),
114 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000),
115 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000),
116 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000),
117 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000),
118 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000),
119 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000),
120 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000),
121 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000),
122 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000),
123 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000),
124 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000),
125 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000),
126 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000),
127 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000),
128 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000),
129 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000),
130 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000),
131 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000),
132 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000),
133 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800),
134 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00),
135 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00),
136 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00),
137 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80),
138 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0),
139 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0),
140 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0),
141 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8),
142 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC),
143 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE),
144 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
145}; 152};
146EXPORT_SYMBOL_GPL(ip_set_netmask_map); 153EXPORT_SYMBOL_GPL(ip_set_netmask_map);
147 154
@@ -155,135 +162,7 @@ EXPORT_SYMBOL_GPL(ip_set_netmask_map);
155 * just use prefixlen_hostmask_map[prefixlength].ip. 162 * just use prefixlen_hostmask_map[prefixlength].ip.
156 */ 163 */
157const union nf_inet_addr ip_set_hostmask_map[] = { 164const union nf_inet_addr ip_set_hostmask_map[] = {
158 E(0x00000000, 0x00000000, 0x00000000, 0x00000000), 165 PREFIXES_MAP
159 E(0x80000000, 0x00000000, 0x00000000, 0x00000000),
160 E(0xC0000000, 0x00000000, 0x00000000, 0x00000000),
161 E(0xE0000000, 0x00000000, 0x00000000, 0x00000000),
162 E(0xF0000000, 0x00000000, 0x00000000, 0x00000000),
163 E(0xF8000000, 0x00000000, 0x00000000, 0x00000000),
164 E(0xFC000000, 0x00000000, 0x00000000, 0x00000000),
165 E(0xFE000000, 0x00000000, 0x00000000, 0x00000000),
166 E(0xFF000000, 0x00000000, 0x00000000, 0x00000000),
167 E(0xFF800000, 0x00000000, 0x00000000, 0x00000000),
168 E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000),
169 E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000),
170 E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000),
171 E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000),
172 E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000),
173 E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000),
174 E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000),
175 E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000),
176 E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000),
177 E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000),
178 E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000),
179 E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000),
180 E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000),
181 E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000),
182 E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000),
183 E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000),
184 E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000),
185 E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000),
186 E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000),
187 E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000),
188 E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000),
189 E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000),
190 E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000),
191 E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000),
192 E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000),
193 E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000),
194 E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000),
195 E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000),
196 E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000),
197 E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000),
198 E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000),
199 E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000),
200 E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000),
201 E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000),
202 E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000),
203 E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000),
204 E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000),
205 E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000),
206 E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000),
207 E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000),
208 E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000),
209 E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000),
210 E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000),
211 E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000),
212 E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000),
213 E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000),
214 E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000),
215 E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000),
216 E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000),
217 E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000),
218 E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000),
219 E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000),
220 E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000),
221 E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000),
222 E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000),
223 E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000),
224 E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000),
225 E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000),
226 E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000),
227 E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000),
228 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000),
229 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000),
230 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000),
231 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000),
232 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000),
233 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000),
234 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000),
235 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000),
236 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000),
237 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000),
238 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000),
239 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000),
240 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000),
241 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000),
242 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000),
243 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000),
244 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000),
245 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000),
246 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000),
247 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000),
248 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000),
249 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000),
250 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000),
251 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000),
252 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000),
253 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000),
254 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000),
255 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000),
256 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000),
257 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000),
258 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000),
259 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000),
260 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000),
261 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000),
262 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000),
263 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000),
264 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000),
265 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000),
266 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000),
267 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000),
268 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000),
269 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000),
270 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000),
271 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000),
272 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000),
273 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000),
274 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000),
275 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800),
276 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00),
277 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00),
278 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00),
279 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80),
280 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0),
281 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0),
282 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0),
283 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8),
284 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC),
285 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE),
286 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
287}; 166};
288EXPORT_SYMBOL_GPL(ip_set_hostmask_map); 167EXPORT_SYMBOL_GPL(ip_set_hostmask_map);
289 168
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
index 67f3f4389602..c552993fa4b9 100644
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the IPVS modules on top of IPv4. 3# Makefile for the IPVS modules on top of IPv4.
3# 4#
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 3d2ac71a83ec..3e053cb30070 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -104,7 +104,7 @@ static inline void ct_write_unlock_bh(unsigned int key)
104 spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); 104 spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
105} 105}
106 106
107static void ip_vs_conn_expire(unsigned long data); 107static void ip_vs_conn_expire(struct timer_list *t);
108 108
109/* 109/*
110 * Returns hash value for IPVS connection entry 110 * Returns hash value for IPVS connection entry
@@ -185,7 +185,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
185 hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]); 185 hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
186 ret = 1; 186 ret = 1;
187 } else { 187 } else {
188 pr_err("%s(): request for already hashed, called from %pF\n", 188 pr_err("%s(): request for already hashed, called from %pS\n",
189 __func__, __builtin_return_address(0)); 189 __func__, __builtin_return_address(0));
190 ret = 0; 190 ret = 0;
191 } 191 }
@@ -457,7 +457,7 @@ EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
457static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp) 457static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp)
458{ 458{
459 __ip_vs_conn_put(cp); 459 __ip_vs_conn_put(cp);
460 ip_vs_conn_expire((unsigned long)cp); 460 ip_vs_conn_expire(&cp->timer);
461} 461}
462 462
463/* 463/*
@@ -817,9 +817,9 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head)
817 kmem_cache_free(ip_vs_conn_cachep, cp); 817 kmem_cache_free(ip_vs_conn_cachep, cp);
818} 818}
819 819
820static void ip_vs_conn_expire(unsigned long data) 820static void ip_vs_conn_expire(struct timer_list *t)
821{ 821{
822 struct ip_vs_conn *cp = (struct ip_vs_conn *)data; 822 struct ip_vs_conn *cp = from_timer(cp, t, timer);
823 struct netns_ipvs *ipvs = cp->ipvs; 823 struct netns_ipvs *ipvs = cp->ipvs;
824 824
825 /* 825 /*
@@ -909,7 +909,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
909 } 909 }
910 910
911 INIT_HLIST_NODE(&cp->c_list); 911 INIT_HLIST_NODE(&cp->c_list);
912 setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); 912 timer_setup(&cp->timer, ip_vs_conn_expire, 0);
913 cp->ipvs = ipvs; 913 cp->ipvs = ipvs;
914 cp->af = p->af; 914 cp->af = p->af;
915 cp->daf = dest_af; 915 cp->daf = dest_af;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 4f940d7eb2f7..fff213eacf2a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -300,7 +300,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
300 unsigned int hash; 300 unsigned int hash;
301 301
302 if (svc->flags & IP_VS_SVC_F_HASHED) { 302 if (svc->flags & IP_VS_SVC_F_HASHED) {
303 pr_err("%s(): request for already hashed, called from %pF\n", 303 pr_err("%s(): request for already hashed, called from %pS\n",
304 __func__, __builtin_return_address(0)); 304 __func__, __builtin_return_address(0));
305 return 0; 305 return 0;
306 } 306 }
@@ -334,7 +334,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
334static int ip_vs_svc_unhash(struct ip_vs_service *svc) 334static int ip_vs_svc_unhash(struct ip_vs_service *svc)
335{ 335{
336 if (!(svc->flags & IP_VS_SVC_F_HASHED)) { 336 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
337 pr_err("%s(): request for unhash flagged, called from %pF\n", 337 pr_err("%s(): request for unhash flagged, called from %pS\n",
338 __func__, __builtin_return_address(0)); 338 __func__, __builtin_return_address(0));
339 return 0; 339 return 0;
340 } 340 }
@@ -1146,9 +1146,9 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1146 return 0; 1146 return 0;
1147} 1147}
1148 1148
1149static void ip_vs_dest_trash_expire(unsigned long data) 1149static void ip_vs_dest_trash_expire(struct timer_list *t)
1150{ 1150{
1151 struct netns_ipvs *ipvs = (struct netns_ipvs *)data; 1151 struct netns_ipvs *ipvs = from_timer(ipvs, t, dest_trash_timer);
1152 struct ip_vs_dest *dest, *next; 1152 struct ip_vs_dest *dest, *next;
1153 unsigned long now = jiffies; 1153 unsigned long now = jiffies;
1154 1154
@@ -2034,12 +2034,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2034 seq_puts(seq, 2034 seq_puts(seq,
2035 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n"); 2035 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
2036 } else { 2036 } else {
2037 struct net *net = seq_file_net(seq);
2038 struct netns_ipvs *ipvs = net_ipvs(net);
2037 const struct ip_vs_service *svc = v; 2039 const struct ip_vs_service *svc = v;
2038 const struct ip_vs_iter *iter = seq->private; 2040 const struct ip_vs_iter *iter = seq->private;
2039 const struct ip_vs_dest *dest; 2041 const struct ip_vs_dest *dest;
2040 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); 2042 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
2041 char *sched_name = sched ? sched->name : "none"; 2043 char *sched_name = sched ? sched->name : "none";
2042 2044
2045 if (svc->ipvs != ipvs)
2046 return 0;
2043 if (iter->table == ip_vs_svc_table) { 2047 if (iter->table == ip_vs_svc_table) {
2044#ifdef CONFIG_IP_VS_IPV6 2048#ifdef CONFIG_IP_VS_IPV6
2045 if (svc->af == AF_INET6) 2049 if (svc->af == AF_INET6)
@@ -4019,8 +4023,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
4019 4023
4020 INIT_LIST_HEAD(&ipvs->dest_trash); 4024 INIT_LIST_HEAD(&ipvs->dest_trash);
4021 spin_lock_init(&ipvs->dest_trash_lock); 4025 spin_lock_init(&ipvs->dest_trash_lock);
4022 setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 4026 timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0);
4023 (unsigned long) ipvs);
4024 atomic_set(&ipvs->ftpsvc_counter, 0); 4027 atomic_set(&ipvs->ftpsvc_counter, 0);
4025 atomic_set(&ipvs->nullsvc_counter, 0); 4028 atomic_set(&ipvs->nullsvc_counter, 0);
4026 atomic_set(&ipvs->conn_out_counter, 0); 4029 atomic_set(&ipvs->conn_out_counter, 0);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 457c6c193e13..489055091a9b 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -97,12 +97,12 @@ static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum,
97} 97}
98 98
99 99
100static void estimation_timer(unsigned long arg) 100static void estimation_timer(struct timer_list *t)
101{ 101{
102 struct ip_vs_estimator *e; 102 struct ip_vs_estimator *e;
103 struct ip_vs_stats *s; 103 struct ip_vs_stats *s;
104 u64 rate; 104 u64 rate;
105 struct netns_ipvs *ipvs = (struct netns_ipvs *)arg; 105 struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer);
106 106
107 spin_lock(&ipvs->est_lock); 107 spin_lock(&ipvs->est_lock);
108 list_for_each_entry(e, &ipvs->est_list, list) { 108 list_for_each_entry(e, &ipvs->est_list, list) {
@@ -192,7 +192,7 @@ int __net_init ip_vs_estimator_net_init(struct netns_ipvs *ipvs)
192{ 192{
193 INIT_LIST_HEAD(&ipvs->est_list); 193 INIT_LIST_HEAD(&ipvs->est_list);
194 spin_lock_init(&ipvs->est_lock); 194 spin_lock_init(&ipvs->est_lock);
195 setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)ipvs); 195 timer_setup(&ipvs->est_timer, estimation_timer, 0);
196 mod_timer(&ipvs->est_timer, jiffies + 2 * HZ); 196 mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
197 return 0; 197 return 0;
198} 198}
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index b6aa4a970c6e..d625179de485 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -106,6 +106,7 @@ struct ip_vs_lblc_table {
106 struct rcu_head rcu_head; 106 struct rcu_head rcu_head;
107 struct hlist_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */ 107 struct hlist_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */
108 struct timer_list periodic_timer; /* collect stale entries */ 108 struct timer_list periodic_timer; /* collect stale entries */
109 struct ip_vs_service *svc; /* pointer back to service */
109 atomic_t entries; /* number of entries */ 110 atomic_t entries; /* number of entries */
110 int max_size; /* maximum size of entries */ 111 int max_size; /* maximum size of entries */
111 int rover; /* rover for expire check */ 112 int rover; /* rover for expire check */
@@ -294,10 +295,10 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
294 * of the table. 295 * of the table.
295 * The full expiration check is for this purpose now. 296 * The full expiration check is for this purpose now.
296 */ 297 */
297static void ip_vs_lblc_check_expire(unsigned long data) 298static void ip_vs_lblc_check_expire(struct timer_list *t)
298{ 299{
299 struct ip_vs_service *svc = (struct ip_vs_service *) data; 300 struct ip_vs_lblc_table *tbl = from_timer(tbl, t, periodic_timer);
300 struct ip_vs_lblc_table *tbl = svc->sched_data; 301 struct ip_vs_service *svc = tbl->svc;
301 unsigned long now = jiffies; 302 unsigned long now = jiffies;
302 int goal; 303 int goal;
303 int i, j; 304 int i, j;
@@ -369,12 +370,12 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
369 tbl->rover = 0; 370 tbl->rover = 0;
370 tbl->counter = 1; 371 tbl->counter = 1;
371 tbl->dead = 0; 372 tbl->dead = 0;
373 tbl->svc = svc;
372 374
373 /* 375 /*
374 * Hook periodic timer for garbage collection 376 * Hook periodic timer for garbage collection
375 */ 377 */
376 setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire, 378 timer_setup(&tbl->periodic_timer, ip_vs_lblc_check_expire, 0);
377 (unsigned long)svc);
378 mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL); 379 mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
379 380
380 return 0; 381 return 0;
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index c13ff575f9f7..84c57b62a588 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -278,6 +278,7 @@ struct ip_vs_lblcr_table {
278 atomic_t entries; /* number of entries */ 278 atomic_t entries; /* number of entries */
279 int max_size; /* maximum size of entries */ 279 int max_size; /* maximum size of entries */
280 struct timer_list periodic_timer; /* collect stale entries */ 280 struct timer_list periodic_timer; /* collect stale entries */
281 struct ip_vs_service *svc; /* pointer back to service */
281 int rover; /* rover for expire check */ 282 int rover; /* rover for expire check */
282 int counter; /* counter for no expire */ 283 int counter; /* counter for no expire */
283 bool dead; 284 bool dead;
@@ -458,10 +459,10 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
458 * of the table. 459 * of the table.
459 * The full expiration check is for this purpose now. 460 * The full expiration check is for this purpose now.
460 */ 461 */
461static void ip_vs_lblcr_check_expire(unsigned long data) 462static void ip_vs_lblcr_check_expire(struct timer_list *t)
462{ 463{
463 struct ip_vs_service *svc = (struct ip_vs_service *) data; 464 struct ip_vs_lblcr_table *tbl = from_timer(tbl, t, periodic_timer);
464 struct ip_vs_lblcr_table *tbl = svc->sched_data; 465 struct ip_vs_service *svc = tbl->svc;
465 unsigned long now = jiffies; 466 unsigned long now = jiffies;
466 int goal; 467 int goal;
467 int i, j; 468 int i, j;
@@ -532,12 +533,12 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
532 tbl->rover = 0; 533 tbl->rover = 0;
533 tbl->counter = 1; 534 tbl->counter = 1;
534 tbl->dead = 0; 535 tbl->dead = 0;
536 tbl->svc = svc;
535 537
536 /* 538 /*
537 * Hook periodic timer for garbage collection 539 * Hook periodic timer for garbage collection
538 */ 540 */
539 setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire, 541 timer_setup(&tbl->periodic_timer, ip_vs_lblcr_check_expire, 0);
540 (unsigned long)svc);
541 mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL); 542 mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
542 543
543 return 0; 544 return 0;
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 57c8ee66491e..eff7569824e5 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/kernel.h> 2#include <linux/kernel.h>
2#include <linux/ip.h> 3#include <linux/ip.h>
3#include <linux/sctp.h> 4#include <linux/sctp.h>
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 0e5b64a75da0..9ee71cb276d7 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * IPVS An implementation of the IP virtual server support for the 3 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module 4 * LINUX operating system. IPVS is now implemented as a module
@@ -457,7 +458,7 @@ static inline bool in_persistence(struct ip_vs_conn *cp)
457static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs, 458static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
458 struct ip_vs_conn *cp, int pkts) 459 struct ip_vs_conn *cp, int pkts)
459{ 460{
460 unsigned long orig = ACCESS_ONCE(cp->sync_endtime); 461 unsigned long orig = READ_ONCE(cp->sync_endtime);
461 unsigned long now = jiffies; 462 unsigned long now = jiffies;
462 unsigned long n = (now + cp->timeout) & ~3UL; 463 unsigned long n = (now + cp->timeout) & ~3UL;
463 unsigned int sync_refresh_period; 464 unsigned int sync_refresh_period;
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 90d396814798..4527921b1c3a 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -921,6 +921,7 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
921{ 921{
922 struct sk_buff *new_skb = NULL; 922 struct sk_buff *new_skb = NULL;
923 struct iphdr *old_iph = NULL; 923 struct iphdr *old_iph = NULL;
924 __u8 old_dsfield;
924#ifdef CONFIG_IP_VS_IPV6 925#ifdef CONFIG_IP_VS_IPV6
925 struct ipv6hdr *old_ipv6h = NULL; 926 struct ipv6hdr *old_ipv6h = NULL;
926#endif 927#endif
@@ -945,7 +946,7 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
945 *payload_len = 946 *payload_len =
946 ntohs(old_ipv6h->payload_len) + 947 ntohs(old_ipv6h->payload_len) +
947 sizeof(*old_ipv6h); 948 sizeof(*old_ipv6h);
948 *dsfield = ipv6_get_dsfield(old_ipv6h); 949 old_dsfield = ipv6_get_dsfield(old_ipv6h);
949 *ttl = old_ipv6h->hop_limit; 950 *ttl = old_ipv6h->hop_limit;
950 if (df) 951 if (df)
951 *df = 0; 952 *df = 0;
@@ -960,12 +961,15 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
960 961
961 /* fix old IP header checksum */ 962 /* fix old IP header checksum */
962 ip_send_check(old_iph); 963 ip_send_check(old_iph);
963 *dsfield = ipv4_get_dsfield(old_iph); 964 old_dsfield = ipv4_get_dsfield(old_iph);
964 *ttl = old_iph->ttl; 965 *ttl = old_iph->ttl;
965 if (payload_len) 966 if (payload_len)
966 *payload_len = ntohs(old_iph->tot_len); 967 *payload_len = ntohs(old_iph->tot_len);
967 } 968 }
968 969
970 /* Implement full-functionality option for ECN encapsulation */
971 *dsfield = INET_ECN_encapsulate(old_dsfield, old_dsfield);
972
969 return skb; 973 return skb;
970error: 974error:
971 kfree_skb(skb); 975 kfree_skb(skb);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 01130392b7c0..85f643c1e227 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1083,7 +1083,7 @@ static void gc_worker(struct work_struct *work)
1083 next_run = gc_work->next_gc_run; 1083 next_run = gc_work->next_gc_run;
1084 gc_work->last_bucket = i; 1084 gc_work->last_bucket = i;
1085 gc_work->early_drop = false; 1085 gc_work->early_drop = false;
1086 queue_delayed_work(system_long_wq, &gc_work->dwork, next_run); 1086 queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
1087} 1087}
1088 1088
1089static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) 1089static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
@@ -1419,7 +1419,7 @@ repeat:
1419 /* Decide what timeout policy we want to apply to this flow. */ 1419 /* Decide what timeout policy we want to apply to this flow. */
1420 timeouts = nf_ct_timeout_lookup(net, ct, l4proto); 1420 timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
1421 1421
1422 ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, timeouts); 1422 ret = l4proto->packet(ct, skb, dataoff, ctinfo, timeouts);
1423 if (ret <= 0) { 1423 if (ret <= 0) {
1424 /* Invalid: inverse of the return code tells 1424 /* Invalid: inverse of the return code tells
1425 * the netfilter core what to do */ 1425 * the netfilter core what to do */
@@ -1563,9 +1563,14 @@ int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
1563} 1563}
1564EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple); 1564EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
1565 1565
1566int nf_ct_port_nlattr_tuple_size(void) 1566unsigned int nf_ct_port_nlattr_tuple_size(void)
1567{ 1567{
1568 return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); 1568 static unsigned int size __read_mostly;
1569
1570 if (!size)
1571 size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1572
1573 return size;
1569} 1574}
1570EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size); 1575EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
1571#endif 1576#endif
@@ -1940,7 +1945,7 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
1940 return 0; 1945 return 0;
1941} 1946}
1942 1947
1943int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) 1948int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
1944{ 1949{
1945 unsigned int hashsize; 1950 unsigned int hashsize;
1946 int rc; 1951 int rc;
@@ -2084,7 +2089,7 @@ int nf_conntrack_init_start(void)
2084 goto err_proto; 2089 goto err_proto;
2085 2090
2086 conntrack_gc_work_init(&conntrack_gc_work); 2091 conntrack_gc_work_init(&conntrack_gc_work);
2087 queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ); 2092 queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ);
2088 2093
2089 return 0; 2094 return 0;
2090 2095
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 64778f9a8548..d6748a8a79c5 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -67,9 +67,9 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
67} 67}
68EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report); 68EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
69 69
70static void nf_ct_expectation_timed_out(unsigned long ul_expect) 70static void nf_ct_expectation_timed_out(struct timer_list *t)
71{ 71{
72 struct nf_conntrack_expect *exp = (void *)ul_expect; 72 struct nf_conntrack_expect *exp = from_timer(exp, t, timeout);
73 73
74 spin_lock_bh(&nf_conntrack_expect_lock); 74 spin_lock_bh(&nf_conntrack_expect_lock);
75 nf_ct_unlink_expect(exp); 75 nf_ct_unlink_expect(exp);
@@ -368,8 +368,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
368 /* two references : one for hash insert, one for the timer */ 368 /* two references : one for hash insert, one for the timer */
369 refcount_add(2, &exp->use); 369 refcount_add(2, &exp->use);
370 370
371 setup_timer(&exp->timeout, nf_ct_expectation_timed_out, 371 timer_setup(&exp->timeout, nf_ct_expectation_timed_out, 0);
372 (unsigned long)exp);
373 helper = rcu_dereference_protected(master_help->helper, 372 helper = rcu_dereference_protected(master_help->helper,
374 lockdep_is_held(&nf_conntrack_expect_lock)); 373 lockdep_is_held(&nf_conntrack_expect_lock));
375 if (helper) { 374 if (helper) {
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index 89b2e46925c4..cf1bf2605c10 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -91,41 +91,41 @@ typedef struct field_t {
91} field_t; 91} field_t;
92 92
93/* Bit Stream */ 93/* Bit Stream */
94typedef struct { 94struct bitstr {
95 unsigned char *buf; 95 unsigned char *buf;
96 unsigned char *beg; 96 unsigned char *beg;
97 unsigned char *end; 97 unsigned char *end;
98 unsigned char *cur; 98 unsigned char *cur;
99 unsigned int bit; 99 unsigned int bit;
100} bitstr_t; 100};
101 101
102/* Tool Functions */ 102/* Tool Functions */
103#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;} 103#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;}
104#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;} 104#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;}
105#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;} 105#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;}
106#define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND) 106#define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND)
107static unsigned int get_len(bitstr_t *bs); 107static unsigned int get_len(struct bitstr *bs);
108static unsigned int get_bit(bitstr_t *bs); 108static unsigned int get_bit(struct bitstr *bs);
109static unsigned int get_bits(bitstr_t *bs, unsigned int b); 109static unsigned int get_bits(struct bitstr *bs, unsigned int b);
110static unsigned int get_bitmap(bitstr_t *bs, unsigned int b); 110static unsigned int get_bitmap(struct bitstr *bs, unsigned int b);
111static unsigned int get_uint(bitstr_t *bs, int b); 111static unsigned int get_uint(struct bitstr *bs, int b);
112 112
113/* Decoder Functions */ 113/* Decoder Functions */
114static int decode_nul(bitstr_t *bs, const struct field_t *f, char *base, int level); 114static int decode_nul(struct bitstr *bs, const struct field_t *f, char *base, int level);
115static int decode_bool(bitstr_t *bs, const struct field_t *f, char *base, int level); 115static int decode_bool(struct bitstr *bs, const struct field_t *f, char *base, int level);
116static int decode_oid(bitstr_t *bs, const struct field_t *f, char *base, int level); 116static int decode_oid(struct bitstr *bs, const struct field_t *f, char *base, int level);
117static int decode_int(bitstr_t *bs, const struct field_t *f, char *base, int level); 117static int decode_int(struct bitstr *bs, const struct field_t *f, char *base, int level);
118static int decode_enum(bitstr_t *bs, const struct field_t *f, char *base, int level); 118static int decode_enum(struct bitstr *bs, const struct field_t *f, char *base, int level);
119static int decode_bitstr(bitstr_t *bs, const struct field_t *f, char *base, int level); 119static int decode_bitstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
120static int decode_numstr(bitstr_t *bs, const struct field_t *f, char *base, int level); 120static int decode_numstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
121static int decode_octstr(bitstr_t *bs, const struct field_t *f, char *base, int level); 121static int decode_octstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
122static int decode_bmpstr(bitstr_t *bs, const struct field_t *f, char *base, int level); 122static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
123static int decode_seq(bitstr_t *bs, const struct field_t *f, char *base, int level); 123static int decode_seq(struct bitstr *bs, const struct field_t *f, char *base, int level);
124static int decode_seqof(bitstr_t *bs, const struct field_t *f, char *base, int level); 124static int decode_seqof(struct bitstr *bs, const struct field_t *f, char *base, int level);
125static int decode_choice(bitstr_t *bs, const struct field_t *f, char *base, int level); 125static int decode_choice(struct bitstr *bs, const struct field_t *f, char *base, int level);
126 126
127/* Decoder Functions Vector */ 127/* Decoder Functions Vector */
128typedef int (*decoder_t)(bitstr_t *, const struct field_t *, char *, int); 128typedef int (*decoder_t)(struct bitstr *, const struct field_t *, char *, int);
129static const decoder_t Decoders[] = { 129static const decoder_t Decoders[] = {
130 decode_nul, 130 decode_nul,
131 decode_bool, 131 decode_bool,
@@ -150,7 +150,7 @@ static const decoder_t Decoders[] = {
150 * Functions 150 * Functions
151 ****************************************************************************/ 151 ****************************************************************************/
152/* Assume bs is aligned && v < 16384 */ 152/* Assume bs is aligned && v < 16384 */
153static unsigned int get_len(bitstr_t *bs) 153static unsigned int get_len(struct bitstr *bs)
154{ 154{
155 unsigned int v; 155 unsigned int v;
156 156
@@ -166,7 +166,7 @@ static unsigned int get_len(bitstr_t *bs)
166} 166}
167 167
168/****************************************************************************/ 168/****************************************************************************/
169static unsigned int get_bit(bitstr_t *bs) 169static unsigned int get_bit(struct bitstr *bs)
170{ 170{
171 unsigned int b = (*bs->cur) & (0x80 >> bs->bit); 171 unsigned int b = (*bs->cur) & (0x80 >> bs->bit);
172 172
@@ -177,7 +177,7 @@ static unsigned int get_bit(bitstr_t *bs)
177 177
178/****************************************************************************/ 178/****************************************************************************/
179/* Assume b <= 8 */ 179/* Assume b <= 8 */
180static unsigned int get_bits(bitstr_t *bs, unsigned int b) 180static unsigned int get_bits(struct bitstr *bs, unsigned int b)
181{ 181{
182 unsigned int v, l; 182 unsigned int v, l;
183 183
@@ -203,7 +203,7 @@ static unsigned int get_bits(bitstr_t *bs, unsigned int b)
203 203
204/****************************************************************************/ 204/****************************************************************************/
205/* Assume b <= 32 */ 205/* Assume b <= 32 */
206static unsigned int get_bitmap(bitstr_t *bs, unsigned int b) 206static unsigned int get_bitmap(struct bitstr *bs, unsigned int b)
207{ 207{
208 unsigned int v, l, shift, bytes; 208 unsigned int v, l, shift, bytes;
209 209
@@ -242,7 +242,7 @@ static unsigned int get_bitmap(bitstr_t *bs, unsigned int b)
242/**************************************************************************** 242/****************************************************************************
243 * Assume bs is aligned and sizeof(unsigned int) == 4 243 * Assume bs is aligned and sizeof(unsigned int) == 4
244 ****************************************************************************/ 244 ****************************************************************************/
245static unsigned int get_uint(bitstr_t *bs, int b) 245static unsigned int get_uint(struct bitstr *bs, int b)
246{ 246{
247 unsigned int v = 0; 247 unsigned int v = 0;
248 248
@@ -264,7 +264,7 @@ static unsigned int get_uint(bitstr_t *bs, int b)
264} 264}
265 265
266/****************************************************************************/ 266/****************************************************************************/
267static int decode_nul(bitstr_t *bs, const struct field_t *f, 267static int decode_nul(struct bitstr *bs, const struct field_t *f,
268 char *base, int level) 268 char *base, int level)
269{ 269{
270 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 270 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -273,7 +273,7 @@ static int decode_nul(bitstr_t *bs, const struct field_t *f,
273} 273}
274 274
275/****************************************************************************/ 275/****************************************************************************/
276static int decode_bool(bitstr_t *bs, const struct field_t *f, 276static int decode_bool(struct bitstr *bs, const struct field_t *f,
277 char *base, int level) 277 char *base, int level)
278{ 278{
279 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 279 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -285,7 +285,7 @@ static int decode_bool(bitstr_t *bs, const struct field_t *f,
285} 285}
286 286
287/****************************************************************************/ 287/****************************************************************************/
288static int decode_oid(bitstr_t *bs, const struct field_t *f, 288static int decode_oid(struct bitstr *bs, const struct field_t *f,
289 char *base, int level) 289 char *base, int level)
290{ 290{
291 int len; 291 int len;
@@ -302,7 +302,7 @@ static int decode_oid(bitstr_t *bs, const struct field_t *f,
302} 302}
303 303
304/****************************************************************************/ 304/****************************************************************************/
305static int decode_int(bitstr_t *bs, const struct field_t *f, 305static int decode_int(struct bitstr *bs, const struct field_t *f,
306 char *base, int level) 306 char *base, int level)
307{ 307{
308 unsigned int len; 308 unsigned int len;
@@ -346,7 +346,7 @@ static int decode_int(bitstr_t *bs, const struct field_t *f,
346} 346}
347 347
348/****************************************************************************/ 348/****************************************************************************/
349static int decode_enum(bitstr_t *bs, const struct field_t *f, 349static int decode_enum(struct bitstr *bs, const struct field_t *f,
350 char *base, int level) 350 char *base, int level)
351{ 351{
352 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 352 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -362,7 +362,7 @@ static int decode_enum(bitstr_t *bs, const struct field_t *f,
362} 362}
363 363
364/****************************************************************************/ 364/****************************************************************************/
365static int decode_bitstr(bitstr_t *bs, const struct field_t *f, 365static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
366 char *base, int level) 366 char *base, int level)
367{ 367{
368 unsigned int len; 368 unsigned int len;
@@ -396,7 +396,7 @@ static int decode_bitstr(bitstr_t *bs, const struct field_t *f,
396} 396}
397 397
398/****************************************************************************/ 398/****************************************************************************/
399static int decode_numstr(bitstr_t *bs, const struct field_t *f, 399static int decode_numstr(struct bitstr *bs, const struct field_t *f,
400 char *base, int level) 400 char *base, int level)
401{ 401{
402 unsigned int len; 402 unsigned int len;
@@ -414,7 +414,7 @@ static int decode_numstr(bitstr_t *bs, const struct field_t *f,
414} 414}
415 415
416/****************************************************************************/ 416/****************************************************************************/
417static int decode_octstr(bitstr_t *bs, const struct field_t *f, 417static int decode_octstr(struct bitstr *bs, const struct field_t *f,
418 char *base, int level) 418 char *base, int level)
419{ 419{
420 unsigned int len; 420 unsigned int len;
@@ -463,7 +463,7 @@ static int decode_octstr(bitstr_t *bs, const struct field_t *f,
463} 463}
464 464
465/****************************************************************************/ 465/****************************************************************************/
466static int decode_bmpstr(bitstr_t *bs, const struct field_t *f, 466static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
467 char *base, int level) 467 char *base, int level)
468{ 468{
469 unsigned int len; 469 unsigned int len;
@@ -489,7 +489,7 @@ static int decode_bmpstr(bitstr_t *bs, const struct field_t *f,
489} 489}
490 490
491/****************************************************************************/ 491/****************************************************************************/
492static int decode_seq(bitstr_t *bs, const struct field_t *f, 492static int decode_seq(struct bitstr *bs, const struct field_t *f,
493 char *base, int level) 493 char *base, int level)
494{ 494{
495 unsigned int ext, bmp, i, opt, len = 0, bmp2, bmp2_len; 495 unsigned int ext, bmp, i, opt, len = 0, bmp2, bmp2_len;
@@ -606,7 +606,7 @@ static int decode_seq(bitstr_t *bs, const struct field_t *f,
606} 606}
607 607
608/****************************************************************************/ 608/****************************************************************************/
609static int decode_seqof(bitstr_t *bs, const struct field_t *f, 609static int decode_seqof(struct bitstr *bs, const struct field_t *f,
610 char *base, int level) 610 char *base, int level)
611{ 611{
612 unsigned int count, effective_count = 0, i, len = 0; 612 unsigned int count, effective_count = 0, i, len = 0;
@@ -696,7 +696,7 @@ static int decode_seqof(bitstr_t *bs, const struct field_t *f,
696 696
697 697
698/****************************************************************************/ 698/****************************************************************************/
699static int decode_choice(bitstr_t *bs, const struct field_t *f, 699static int decode_choice(struct bitstr *bs, const struct field_t *f,
700 char *base, int level) 700 char *base, int level)
701{ 701{
702 unsigned int type, ext, len = 0; 702 unsigned int type, ext, len = 0;
@@ -772,7 +772,7 @@ int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras)
772 FNAME("RasMessage") CHOICE, 5, 24, 32, DECODE | EXT, 772 FNAME("RasMessage") CHOICE, 5, 24, 32, DECODE | EXT,
773 0, _RasMessage 773 0, _RasMessage
774 }; 774 };
775 bitstr_t bs; 775 struct bitstr bs;
776 776
777 bs.buf = bs.beg = bs.cur = buf; 777 bs.buf = bs.beg = bs.cur = buf;
778 bs.end = buf + sz; 778 bs.end = buf + sz;
@@ -789,7 +789,7 @@ static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg,
789 FNAME("H323-UserInformation") SEQ, 1, 2, 2, DECODE | EXT, 789 FNAME("H323-UserInformation") SEQ, 1, 2, 2, DECODE | EXT,
790 0, _H323_UserInformation 790 0, _H323_UserInformation
791 }; 791 };
792 bitstr_t bs; 792 struct bitstr bs;
793 793
794 bs.buf = buf; 794 bs.buf = buf;
795 bs.beg = bs.cur = beg; 795 bs.beg = bs.cur = beg;
@@ -808,7 +808,7 @@ int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz,
808 FNAME("MultimediaSystemControlMessage") CHOICE, 2, 4, 4, 808 FNAME("MultimediaSystemControlMessage") CHOICE, 2, 4, 4,
809 DECODE | EXT, 0, _MultimediaSystemControlMessage 809 DECODE | EXT, 0, _MultimediaSystemControlMessage
810 }; 810 };
811 bitstr_t bs; 811 struct bitstr bs;
812 812
813 bs.buf = bs.beg = bs.cur = buf; 813 bs.buf = bs.beg = bs.cur = buf;
814 bs.end = buf + sz; 814 bs.end = buf + sz;
@@ -877,6 +877,7 @@ int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931)
877 if (sz < 1) 877 if (sz < 1)
878 break; 878 break;
879 len = *p++; 879 len = *p++;
880 sz--;
880 if (sz < len) 881 if (sz < len)
881 break; 882 break;
882 p += len; 883 p += len;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index de4053d84364..59c08997bfdf 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -533,11 +533,12 @@ nla_put_failure:
533 return -1; 533 return -1;
534} 534}
535 535
536static inline size_t ctnetlink_proto_size(const struct nf_conn *ct) 536#if defined(CONFIG_NETFILTER_NETLINK_GLUE_CT) || defined(CONFIG_NF_CONNTRACK_EVENTS)
537static size_t ctnetlink_proto_size(const struct nf_conn *ct)
537{ 538{
538 const struct nf_conntrack_l3proto *l3proto; 539 const struct nf_conntrack_l3proto *l3proto;
539 const struct nf_conntrack_l4proto *l4proto; 540 const struct nf_conntrack_l4proto *l4proto;
540 size_t len; 541 size_t len, len4 = 0;
541 542
542 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); 543 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
543 len = l3proto->nla_size; 544 len = l3proto->nla_size;
@@ -545,9 +546,14 @@ static inline size_t ctnetlink_proto_size(const struct nf_conn *ct)
545 546
546 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 547 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
547 len += l4proto->nla_size; 548 len += l4proto->nla_size;
549 if (l4proto->nlattr_tuple_size) {
550 len4 = l4proto->nlattr_tuple_size();
551 len4 *= 3u; /* ORIG, REPLY, MASTER */
552 }
548 553
549 return len; 554 return len + len4;
550} 555}
556#endif
551 557
552static inline size_t ctnetlink_acct_size(const struct nf_conn *ct) 558static inline size_t ctnetlink_acct_size(const struct nf_conn *ct)
553{ 559{
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index b3e489c859ec..c8e9c9503a08 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -27,6 +27,7 @@
27#include <net/netfilter/nf_conntrack_l3proto.h> 27#include <net/netfilter/nf_conntrack_l3proto.h>
28#include <net/netfilter/nf_conntrack_l4proto.h> 28#include <net/netfilter/nf_conntrack_l4proto.h>
29#include <net/netfilter/nf_conntrack_core.h> 29#include <net/netfilter/nf_conntrack_core.h>
30#include <net/netfilter/nf_log.h>
30 31
31static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly; 32static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly;
32struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly; 33struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly;
@@ -63,6 +64,52 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header,
63 *header = NULL; 64 *header = NULL;
64 *table = NULL; 65 *table = NULL;
65} 66}
67
68__printf(5, 6)
69void nf_l4proto_log_invalid(const struct sk_buff *skb,
70 struct net *net,
71 u16 pf, u8 protonum,
72 const char *fmt, ...)
73{
74 struct va_format vaf;
75 va_list args;
76
77 if (net->ct.sysctl_log_invalid != protonum ||
78 net->ct.sysctl_log_invalid != IPPROTO_RAW)
79 return;
80
81 va_start(args, fmt);
82 vaf.fmt = fmt;
83 vaf.va = &args;
84
85 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
86 "nf_ct_proto_%d: %pV ", protonum, &vaf);
87 va_end(args);
88}
89EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid);
90
91__printf(3, 4)
92void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
93 const struct nf_conn *ct,
94 const char *fmt, ...)
95{
96 struct va_format vaf;
97 struct net *net;
98 va_list args;
99
100 net = nf_ct_net(ct);
101 if (likely(net->ct.sysctl_log_invalid == 0))
102 return;
103
104 va_start(args, fmt);
105 vaf.fmt = fmt;
106 vaf.va = &args;
107
108 nf_l4proto_log_invalid(skb, net, nf_ct_l3num(ct),
109 nf_ct_protonum(ct), "%pV", &vaf);
110 va_end(args);
111}
112EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid);
66#endif 113#endif
67 114
68const struct nf_conntrack_l4proto * 115const struct nf_conntrack_l4proto *
@@ -125,7 +172,7 @@ void nf_ct_l3proto_module_put(unsigned short l3proto)
125} 172}
126EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put); 173EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
127 174
128int nf_ct_netns_get(struct net *net, u8 nfproto) 175static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
129{ 176{
130 const struct nf_conntrack_l3proto *l3proto; 177 const struct nf_conntrack_l3proto *l3proto;
131 int ret; 178 int ret;
@@ -150,9 +197,33 @@ int nf_ct_netns_get(struct net *net, u8 nfproto)
150 197
151 return ret; 198 return ret;
152} 199}
200
201int nf_ct_netns_get(struct net *net, u8 nfproto)
202{
203 int err;
204
205 if (nfproto == NFPROTO_INET) {
206 err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
207 if (err < 0)
208 goto err1;
209 err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
210 if (err < 0)
211 goto err2;
212 } else {
213 err = nf_ct_netns_do_get(net, nfproto);
214 if (err < 0)
215 goto err1;
216 }
217 return 0;
218
219err2:
220 nf_ct_netns_put(net, NFPROTO_IPV4);
221err1:
222 return err;
223}
153EXPORT_SYMBOL_GPL(nf_ct_netns_get); 224EXPORT_SYMBOL_GPL(nf_ct_netns_get);
154 225
155void nf_ct_netns_put(struct net *net, u8 nfproto) 226static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
156{ 227{
157 const struct nf_conntrack_l3proto *l3proto; 228 const struct nf_conntrack_l3proto *l3proto;
158 229
@@ -171,6 +242,15 @@ void nf_ct_netns_put(struct net *net, u8 nfproto)
171 242
172 nf_ct_l3proto_module_put(nfproto); 243 nf_ct_l3proto_module_put(nfproto);
173} 244}
245
246void nf_ct_netns_put(struct net *net, uint8_t nfproto)
247{
248 if (nfproto == NFPROTO_INET) {
249 nf_ct_netns_do_put(net, NFPROTO_IPV4);
250 nf_ct_netns_do_put(net, NFPROTO_IPV6);
251 } else
252 nf_ct_netns_do_put(net, nfproto);
253}
174EXPORT_SYMBOL_GPL(nf_ct_netns_put); 254EXPORT_SYMBOL_GPL(nf_ct_netns_put);
175 255
176const struct nf_conntrack_l4proto * 256const struct nf_conntrack_l4proto *
@@ -351,8 +431,6 @@ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto)
351 l4proto->nla_size = 0; 431 l4proto->nla_size = 0;
352 if (l4proto->nlattr_size) 432 if (l4proto->nlattr_size)
353 l4proto->nla_size += l4proto->nlattr_size(); 433 l4proto->nla_size += l4proto->nlattr_size();
354 if (l4proto->nlattr_tuple_size)
355 l4proto->nla_size += 3 * l4proto->nlattr_tuple_size();
356 434
357 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], 435 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
358 l4proto); 436 l4proto);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 0f5a4d79f6b8..2a446f4a554c 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -428,13 +428,13 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
428 default: 428 default:
429 dn = dccp_pernet(net); 429 dn = dccp_pernet(net);
430 if (dn->dccp_loose == 0) { 430 if (dn->dccp_loose == 0) {
431 msg = "nf_ct_dccp: not picking up existing connection "; 431 msg = "not picking up existing connection ";
432 goto out_invalid; 432 goto out_invalid;
433 } 433 }
434 case CT_DCCP_REQUEST: 434 case CT_DCCP_REQUEST:
435 break; 435 break;
436 case CT_DCCP_INVALID: 436 case CT_DCCP_INVALID:
437 msg = "nf_ct_dccp: invalid state transition "; 437 msg = "invalid state transition ";
438 goto out_invalid; 438 goto out_invalid;
439 } 439 }
440 440
@@ -447,9 +447,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
447 return true; 447 return true;
448 448
449out_invalid: 449out_invalid:
450 if (LOG_INVALID(net, IPPROTO_DCCP)) 450 nf_ct_l4proto_log_invalid(skb, ct, "%s", msg);
451 nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL,
452 NULL, "%s", msg);
453 return false; 451 return false;
454} 452}
455 453
@@ -469,10 +467,8 @@ static unsigned int *dccp_get_timeouts(struct net *net)
469 467
470static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, 468static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
471 unsigned int dataoff, enum ip_conntrack_info ctinfo, 469 unsigned int dataoff, enum ip_conntrack_info ctinfo,
472 u_int8_t pf,
473 unsigned int *timeouts) 470 unsigned int *timeouts)
474{ 471{
475 struct net *net = nf_ct_net(ct);
476 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 472 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
477 struct dccp_hdr _dh, *dh; 473 struct dccp_hdr _dh, *dh;
478 u_int8_t type, old_state, new_state; 474 u_int8_t type, old_state, new_state;
@@ -534,15 +530,11 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
534 ct->proto.dccp.last_pkt = type; 530 ct->proto.dccp.last_pkt = type;
535 531
536 spin_unlock_bh(&ct->lock); 532 spin_unlock_bh(&ct->lock);
537 if (LOG_INVALID(net, IPPROTO_DCCP)) 533 nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid packet");
538 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
539 "nf_ct_dccp: invalid packet ignored ");
540 return NF_ACCEPT; 534 return NF_ACCEPT;
541 case CT_DCCP_INVALID: 535 case CT_DCCP_INVALID:
542 spin_unlock_bh(&ct->lock); 536 spin_unlock_bh(&ct->lock);
543 if (LOG_INVALID(net, IPPROTO_DCCP)) 537 nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid state transition");
544 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
545 "nf_ct_dccp: invalid state transition ");
546 return -NF_ACCEPT; 538 return -NF_ACCEPT;
547 } 539 }
548 540
@@ -604,8 +596,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl,
604 return NF_ACCEPT; 596 return NF_ACCEPT;
605 597
606out_invalid: 598out_invalid:
607 if (LOG_INVALID(net, IPPROTO_DCCP)) 599 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_DCCP, "%s", msg);
608 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", msg);
609 return -NF_ACCEPT; 600 return -NF_ACCEPT;
610} 601}
611 602
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 9cd40700842e..1f86ddf6649a 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -60,7 +60,6 @@ static int generic_packet(struct nf_conn *ct,
60 const struct sk_buff *skb, 60 const struct sk_buff *skb,
61 unsigned int dataoff, 61 unsigned int dataoff,
62 enum ip_conntrack_info ctinfo, 62 enum ip_conntrack_info ctinfo,
63 u_int8_t pf,
64 unsigned int *timeout) 63 unsigned int *timeout)
65{ 64{
66 nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); 65 nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 09a90484c27d..a2503005d80b 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -244,7 +244,6 @@ static int gre_packet(struct nf_conn *ct,
244 const struct sk_buff *skb, 244 const struct sk_buff *skb,
245 unsigned int dataoff, 245 unsigned int dataoff,
246 enum ip_conntrack_info ctinfo, 246 enum ip_conntrack_info ctinfo,
247 u_int8_t pf,
248 unsigned int *timeouts) 247 unsigned int *timeouts)
249{ 248{
250 /* If we've seen traffic both ways, this is a GRE connection. 249 /* If we've seen traffic both ways, this is a GRE connection.
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 6303a88af12b..80faf04ddf15 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -306,7 +306,6 @@ static int sctp_packet(struct nf_conn *ct,
306 const struct sk_buff *skb, 306 const struct sk_buff *skb,
307 unsigned int dataoff, 307 unsigned int dataoff,
308 enum ip_conntrack_info ctinfo, 308 enum ip_conntrack_info ctinfo,
309 u_int8_t pf,
310 unsigned int *timeouts) 309 unsigned int *timeouts)
311{ 310{
312 enum sctp_conntrack new_state, old_state; 311 enum sctp_conntrack new_state, old_state;
@@ -522,8 +521,7 @@ static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb,
522 } 521 }
523 return NF_ACCEPT; 522 return NF_ACCEPT;
524out_invalid: 523out_invalid:
525 if (LOG_INVALID(net, IPPROTO_SCTP)) 524 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_SCTP, "%s", logmsg);
526 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", logmsg);
527 return -NF_ACCEPT; 525 return -NF_ACCEPT;
528} 526}
529 527
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index cba1c6ffe51a..b12fc07111d0 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -493,8 +493,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
493 unsigned int index, 493 unsigned int index,
494 const struct sk_buff *skb, 494 const struct sk_buff *skb,
495 unsigned int dataoff, 495 unsigned int dataoff,
496 const struct tcphdr *tcph, 496 const struct tcphdr *tcph)
497 u_int8_t pf)
498{ 497{
499 struct net *net = nf_ct_net(ct); 498 struct net *net = nf_ct_net(ct);
500 struct nf_tcp_net *tn = tcp_pernet(net); 499 struct nf_tcp_net *tn = tcp_pernet(net);
@@ -702,9 +701,9 @@ static bool tcp_in_window(const struct nf_conn *ct,
702 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || 701 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
703 tn->tcp_be_liberal) 702 tn->tcp_be_liberal)
704 res = true; 703 res = true;
705 if (!res && LOG_INVALID(net, IPPROTO_TCP)) 704 if (!res) {
706 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, 705 nf_ct_l4proto_log_invalid(skb, ct,
707 "nf_ct_tcp: %s ", 706 "%s",
708 before(seq, sender->td_maxend + 1) ? 707 before(seq, sender->td_maxend + 1) ?
709 in_recv_win ? 708 in_recv_win ?
710 before(sack, receiver->td_end + 1) ? 709 before(sack, receiver->td_end + 1) ?
@@ -713,6 +712,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
713 : "ACK is over the upper bound (ACKed data not seen yet)" 712 : "ACK is over the upper bound (ACKed data not seen yet)"
714 : "SEQ is under the lower bound (already ACKed data retransmitted)" 713 : "SEQ is under the lower bound (already ACKed data retransmitted)"
715 : "SEQ is over the upper bound (over the window of the receiver)"); 714 : "SEQ is over the upper bound (over the window of the receiver)");
715 }
716 } 716 }
717 717
718 pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u " 718 pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
@@ -738,6 +738,12 @@ static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
738 [TCPHDR_ACK|TCPHDR_URG] = 1, 738 [TCPHDR_ACK|TCPHDR_URG] = 1,
739}; 739};
740 740
741static void tcp_error_log(const struct sk_buff *skb, struct net *net,
742 u8 pf, const char *msg)
743{
744 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_TCP, "%s", msg);
745}
746
741/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ 747/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
742static int tcp_error(struct net *net, struct nf_conn *tmpl, 748static int tcp_error(struct net *net, struct nf_conn *tmpl,
743 struct sk_buff *skb, 749 struct sk_buff *skb,
@@ -753,17 +759,13 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
753 /* Smaller that minimal TCP header? */ 759 /* Smaller that minimal TCP header? */
754 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); 760 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
755 if (th == NULL) { 761 if (th == NULL) {
756 if (LOG_INVALID(net, IPPROTO_TCP)) 762 tcp_error_log(skb, net, pf, "short packet");
757 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
758 "nf_ct_tcp: short packet ");
759 return -NF_ACCEPT; 763 return -NF_ACCEPT;
760 } 764 }
761 765
762 /* Not whole TCP header or malformed packet */ 766 /* Not whole TCP header or malformed packet */
763 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { 767 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
764 if (LOG_INVALID(net, IPPROTO_TCP)) 768 tcp_error_log(skb, net, pf, "truncated packet");
765 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
766 "nf_ct_tcp: truncated/malformed packet ");
767 return -NF_ACCEPT; 769 return -NF_ACCEPT;
768 } 770 }
769 771
@@ -774,18 +776,14 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
774 /* FIXME: Source route IP option packets --RR */ 776 /* FIXME: Source route IP option packets --RR */
775 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 777 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
776 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { 778 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
777 if (LOG_INVALID(net, IPPROTO_TCP)) 779 tcp_error_log(skb, net, pf, "bad checksum");
778 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
779 "nf_ct_tcp: bad TCP checksum ");
780 return -NF_ACCEPT; 780 return -NF_ACCEPT;
781 } 781 }
782 782
783 /* Check TCP flags. */ 783 /* Check TCP flags. */
784 tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH)); 784 tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
785 if (!tcp_valid_flags[tcpflags]) { 785 if (!tcp_valid_flags[tcpflags]) {
786 if (LOG_INVALID(net, IPPROTO_TCP)) 786 tcp_error_log(skb, net, pf, "invalid tcp flag combination");
787 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
788 "nf_ct_tcp: invalid TCP flag combination ");
789 return -NF_ACCEPT; 787 return -NF_ACCEPT;
790 } 788 }
791 789
@@ -802,7 +800,6 @@ static int tcp_packet(struct nf_conn *ct,
802 const struct sk_buff *skb, 800 const struct sk_buff *skb,
803 unsigned int dataoff, 801 unsigned int dataoff,
804 enum ip_conntrack_info ctinfo, 802 enum ip_conntrack_info ctinfo,
805 u_int8_t pf,
806 unsigned int *timeouts) 803 unsigned int *timeouts)
807{ 804{
808 struct net *net = nf_ct_net(ct); 805 struct net *net = nf_ct_net(ct);
@@ -939,10 +936,8 @@ static int tcp_packet(struct nf_conn *ct,
939 IP_CT_EXP_CHALLENGE_ACK; 936 IP_CT_EXP_CHALLENGE_ACK;
940 } 937 }
941 spin_unlock_bh(&ct->lock); 938 spin_unlock_bh(&ct->lock);
942 if (LOG_INVALID(net, IPPROTO_TCP)) 939 nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in "
943 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, 940 "state %s ", tcp_conntrack_names[old_state]);
944 "nf_ct_tcp: invalid packet ignored in "
945 "state %s ", tcp_conntrack_names[old_state]);
946 return NF_ACCEPT; 941 return NF_ACCEPT;
947 case TCP_CONNTRACK_MAX: 942 case TCP_CONNTRACK_MAX:
948 /* Special case for SYN proxy: when the SYN to the server or 943 /* Special case for SYN proxy: when the SYN to the server or
@@ -964,9 +959,7 @@ static int tcp_packet(struct nf_conn *ct,
964 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", 959 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
965 dir, get_conntrack_index(th), old_state); 960 dir, get_conntrack_index(th), old_state);
966 spin_unlock_bh(&ct->lock); 961 spin_unlock_bh(&ct->lock);
967 if (LOG_INVALID(net, IPPROTO_TCP)) 962 nf_ct_l4proto_log_invalid(skb, ct, "invalid state");
968 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
969 "nf_ct_tcp: invalid state ");
970 return -NF_ACCEPT; 963 return -NF_ACCEPT;
971 case TCP_CONNTRACK_TIME_WAIT: 964 case TCP_CONNTRACK_TIME_WAIT:
972 /* RFC5961 compliance cause stack to send "challenge-ACK" 965 /* RFC5961 compliance cause stack to send "challenge-ACK"
@@ -981,9 +974,7 @@ static int tcp_packet(struct nf_conn *ct,
981 /* Detected RFC5961 challenge ACK */ 974 /* Detected RFC5961 challenge ACK */
982 ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; 975 ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
983 spin_unlock_bh(&ct->lock); 976 spin_unlock_bh(&ct->lock);
984 if (LOG_INVALID(net, IPPROTO_TCP)) 977 nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored");
985 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
986 "nf_ct_tcp: challenge-ACK ignored ");
987 return NF_ACCEPT; /* Don't change state */ 978 return NF_ACCEPT; /* Don't change state */
988 } 979 }
989 break; 980 break;
@@ -993,9 +984,7 @@ static int tcp_packet(struct nf_conn *ct,
993 && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { 984 && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
994 /* Invalid RST */ 985 /* Invalid RST */
995 spin_unlock_bh(&ct->lock); 986 spin_unlock_bh(&ct->lock);
996 if (LOG_INVALID(net, IPPROTO_TCP)) 987 nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
997 nf_log_packet(net, pf, 0, skb, NULL, NULL,
998 NULL, "nf_ct_tcp: invalid RST ");
999 return -NF_ACCEPT; 988 return -NF_ACCEPT;
1000 } 989 }
1001 if (index == TCP_RST_SET 990 if (index == TCP_RST_SET
@@ -1022,7 +1011,7 @@ static int tcp_packet(struct nf_conn *ct,
1022 } 1011 }
1023 1012
1024 if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, 1013 if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1025 skb, dataoff, th, pf)) { 1014 skb, dataoff, th)) {
1026 spin_unlock_bh(&ct->lock); 1015 spin_unlock_bh(&ct->lock);
1027 return -NF_ACCEPT; 1016 return -NF_ACCEPT;
1028 } 1017 }
@@ -1288,9 +1277,14 @@ static int tcp_nlattr_size(void)
1288 + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1); 1277 + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1289} 1278}
1290 1279
1291static int tcp_nlattr_tuple_size(void) 1280static unsigned int tcp_nlattr_tuple_size(void)
1292{ 1281{
1293 return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); 1282 static unsigned int size __read_mostly;
1283
1284 if (!size)
1285 size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1286
1287 return size;
1294} 1288}
1295#endif 1289#endif
1296 1290
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 8af734cd1a94..3a5f727103af 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -73,7 +73,6 @@ static int udp_packet(struct nf_conn *ct,
73 const struct sk_buff *skb, 73 const struct sk_buff *skb,
74 unsigned int dataoff, 74 unsigned int dataoff,
75 enum ip_conntrack_info ctinfo, 75 enum ip_conntrack_info ctinfo,
76 u_int8_t pf,
77 unsigned int *timeouts) 76 unsigned int *timeouts)
78{ 77{
79 /* If we've seen traffic both ways, this is some kind of UDP 78 /* If we've seen traffic both ways, this is some kind of UDP
@@ -99,6 +98,12 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
99} 98}
100 99
101#ifdef CONFIG_NF_CT_PROTO_UDPLITE 100#ifdef CONFIG_NF_CT_PROTO_UDPLITE
101static void udplite_error_log(const struct sk_buff *skb, struct net *net,
102 u8 pf, const char *msg)
103{
104 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDPLITE, "%s", msg);
105}
106
102static int udplite_error(struct net *net, struct nf_conn *tmpl, 107static int udplite_error(struct net *net, struct nf_conn *tmpl,
103 struct sk_buff *skb, 108 struct sk_buff *skb,
104 unsigned int dataoff, 109 unsigned int dataoff,
@@ -112,9 +117,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
112 /* Header is too small? */ 117 /* Header is too small? */
113 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); 118 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
114 if (!hdr) { 119 if (!hdr) {
115 if (LOG_INVALID(net, IPPROTO_UDPLITE)) 120 udplite_error_log(skb, net, pf, "short packet");
116 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
117 "nf_ct_udplite: short packet ");
118 return -NF_ACCEPT; 121 return -NF_ACCEPT;
119 } 122 }
120 123
@@ -122,17 +125,13 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
122 if (cscov == 0) { 125 if (cscov == 0) {
123 cscov = udplen; 126 cscov = udplen;
124 } else if (cscov < sizeof(*hdr) || cscov > udplen) { 127 } else if (cscov < sizeof(*hdr) || cscov > udplen) {
125 if (LOG_INVALID(net, IPPROTO_UDPLITE)) 128 udplite_error_log(skb, net, pf, "invalid checksum coverage");
126 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
127 "nf_ct_udplite: invalid checksum coverage ");
128 return -NF_ACCEPT; 129 return -NF_ACCEPT;
129 } 130 }
130 131
131 /* UDPLITE mandates checksums */ 132 /* UDPLITE mandates checksums */
132 if (!hdr->check) { 133 if (!hdr->check) {
133 if (LOG_INVALID(net, IPPROTO_UDPLITE)) 134 udplite_error_log(skb, net, pf, "checksum missing");
134 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
135 "nf_ct_udplite: checksum missing ");
136 return -NF_ACCEPT; 135 return -NF_ACCEPT;
137 } 136 }
138 137
@@ -140,9 +139,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
140 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 139 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
141 nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, 140 nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
142 pf)) { 141 pf)) {
143 if (LOG_INVALID(net, IPPROTO_UDPLITE)) 142 udplite_error_log(skb, net, pf, "bad checksum");
144 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
145 "nf_ct_udplite: bad UDPLite checksum ");
146 return -NF_ACCEPT; 143 return -NF_ACCEPT;
147 } 144 }
148 145
@@ -150,6 +147,12 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
150} 147}
151#endif 148#endif
152 149
150static void udp_error_log(const struct sk_buff *skb, struct net *net,
151 u8 pf, const char *msg)
152{
153 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDP, "%s", msg);
154}
155
153static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, 156static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
154 unsigned int dataoff, 157 unsigned int dataoff,
155 u_int8_t pf, 158 u_int8_t pf,
@@ -162,17 +165,13 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
162 /* Header is too small? */ 165 /* Header is too small? */
163 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); 166 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
164 if (hdr == NULL) { 167 if (hdr == NULL) {
165 if (LOG_INVALID(net, IPPROTO_UDP)) 168 udp_error_log(skb, net, pf, "short packet");
166 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
167 "nf_ct_udp: short packet ");
168 return -NF_ACCEPT; 169 return -NF_ACCEPT;
169 } 170 }
170 171
171 /* Truncated/malformed packets */ 172 /* Truncated/malformed packets */
172 if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { 173 if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
173 if (LOG_INVALID(net, IPPROTO_UDP)) 174 udp_error_log(skb, net, pf, "truncated/malformed packet");
174 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
175 "nf_ct_udp: truncated/malformed packet ");
176 return -NF_ACCEPT; 175 return -NF_ACCEPT;
177 } 176 }
178 177
@@ -186,9 +185,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
186 * FIXME: Source route IP option packets --RR */ 185 * FIXME: Source route IP option packets --RR */
187 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 186 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
188 nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { 187 nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
189 if (LOG_INVALID(net, IPPROTO_UDP)) 188 udp_error_log(skb, net, pf, "bad checksum");
190 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
191 "nf_ct_udp: bad UDP checksum ");
192 return -NF_ACCEPT; 189 return -NF_ACCEPT;
193 } 190 }
194 191
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 49f87ec093a3..44284cd2528d 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef _NF_INTERNALS_H 2#ifndef _NF_INTERNALS_H
2#define _NF_INTERNALS_H 3#define _NF_INTERNALS_H
3 4
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index f393a7086025..6c38421e31f9 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -429,7 +429,7 @@ nf_nat_setup_info(struct nf_conn *ct,
429 429
430 srchash = hash_by_src(net, 430 srchash = hash_by_src(net,
431 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 431 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
432 lock = &nf_nat_locks[srchash % ARRAY_SIZE(nf_nat_locks)]; 432 lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS];
433 spin_lock_bh(lock); 433 spin_lock_bh(lock);
434 hlist_add_head_rcu(&ct->nat_bysource, 434 hlist_add_head_rcu(&ct->nat_bysource,
435 &nf_nat_bysource[srchash]); 435 &nf_nat_bysource[srchash]);
@@ -532,9 +532,9 @@ static void __nf_nat_cleanup_conntrack(struct nf_conn *ct)
532 unsigned int h; 532 unsigned int h;
533 533
534 h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 534 h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
535 spin_lock_bh(&nf_nat_locks[h % ARRAY_SIZE(nf_nat_locks)]); 535 spin_lock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
536 hlist_del_rcu(&ct->nat_bysource); 536 hlist_del_rcu(&ct->nat_bysource);
537 spin_unlock_bh(&nf_nat_locks[h % ARRAY_SIZE(nf_nat_locks)]); 537 spin_unlock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
538} 538}
539 539
540static int nf_nat_proto_clean(struct nf_conn *ct, void *data) 540static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
@@ -542,17 +542,14 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
542 if (nf_nat_proto_remove(ct, data)) 542 if (nf_nat_proto_remove(ct, data))
543 return 1; 543 return 1;
544 544
545 if ((ct->status & IPS_SRC_NAT_DONE) == 0) 545 /* This module is being removed and conntrack has nat null binding.
546 return 0;
547
548 /* This netns is being destroyed, and conntrack has nat null binding.
549 * Remove it from bysource hash, as the table will be freed soon. 546 * Remove it from bysource hash, as the table will be freed soon.
550 * 547 *
551 * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack() 548 * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
552 * will delete entry from already-freed table. 549 * will delete entry from already-freed table.
553 */ 550 */
554 clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); 551 if (test_and_clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status))
555 __nf_nat_cleanup_conntrack(ct); 552 __nf_nat_cleanup_conntrack(ct);
556 553
557 /* don't delete conntrack. Although that would make things a lot 554 /* don't delete conntrack. Although that would make things a lot
558 * simpler, we'd end up flushing all conntracks on nat rmmod. 555 * simpler, we'd end up flushing all conntracks on nat rmmod.
@@ -807,8 +804,8 @@ static int __init nf_nat_init(void)
807 804
808 /* Leave them the same for the moment. */ 805 /* Leave them the same for the moment. */
809 nf_nat_htable_size = nf_conntrack_htable_size; 806 nf_nat_htable_size = nf_conntrack_htable_size;
810 if (nf_nat_htable_size < ARRAY_SIZE(nf_nat_locks)) 807 if (nf_nat_htable_size < CONNTRACK_LOCKS)
811 nf_nat_htable_size = ARRAY_SIZE(nf_nat_locks); 808 nf_nat_htable_size = CONNTRACK_LOCKS;
812 809
813 nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0); 810 nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
814 if (!nf_nat_bysource) 811 if (!nf_nat_bysource)
@@ -821,7 +818,7 @@ static int __init nf_nat_init(void)
821 return ret; 818 return ret;
822 } 819 }
823 820
824 for (i = 0; i < ARRAY_SIZE(nf_nat_locks); i++) 821 for (i = 0; i < CONNTRACK_LOCKS; i++)
825 spin_lock_init(&nf_nat_locks[i]); 822 spin_lock_init(&nf_nat_locks[i]);
826 823
827 nf_ct_helper_expectfn_register(&follow_master_nat); 824 nf_ct_helper_expectfn_register(&follow_master_nat);
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index e84a578dbe35..d76afafdc699 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -134,7 +134,7 @@ static int __init nf_nat_ftp_init(void)
134} 134}
135 135
136/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ 136/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
137static int warn_set(const char *val, struct kernel_param *kp) 137static int warn_set(const char *val, const struct kernel_param *kp)
138{ 138{
139 printk(KERN_INFO KBUILD_MODNAME 139 printk(KERN_INFO KBUILD_MODNAME
140 ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); 140 ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
index 0648cb096bd8..dcb5f6375d9d 100644
--- a/net/netfilter/nf_nat_irc.c
+++ b/net/netfilter/nf_nat_irc.c
@@ -106,7 +106,7 @@ static int __init nf_nat_irc_init(void)
106} 106}
107 107
108/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ 108/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
109static int warn_set(const char *val, struct kernel_param *kp) 109static int warn_set(const char *val, const struct kernel_param *kp)
110{ 110{
111 printk(KERN_INFO KBUILD_MODNAME 111 printk(KERN_INFO KBUILD_MODNAME
112 ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); 112 ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index d2a9e6b5d01f..46cb3786e0ec 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/kernel.h> 2#include <linux/kernel.h>
2#include <linux/init.h> 3#include <linux/init.h>
3#include <linux/module.h> 4#include <linux/module.h>
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 929927171426..d8327b43e4dc 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1048,7 +1048,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
1048 if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name)) 1048 if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name))
1049 goto nla_put_failure; 1049 goto nla_put_failure;
1050 1050
1051 if (nft_dump_stats(skb, nft_base_chain(chain)->stats)) 1051 if (basechain->stats && nft_dump_stats(skb, basechain->stats))
1052 goto nla_put_failure; 1052 goto nla_put_failure;
1053 } 1053 }
1054 1054
@@ -1487,8 +1487,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
1487 1487
1488 chain2 = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], 1488 chain2 = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME],
1489 genmask); 1489 genmask);
1490 if (IS_ERR(chain2)) 1490 if (!IS_ERR(chain2))
1491 return PTR_ERR(chain2); 1491 return -EEXIST;
1492 } 1492 }
1493 1493
1494 if (nla[NFTA_CHAIN_COUNTERS]) { 1494 if (nla[NFTA_CHAIN_COUNTERS]) {
@@ -2549,14 +2549,9 @@ nft_select_set_ops(const struct nft_ctx *ctx,
2549 case NFT_SET_POL_PERFORMANCE: 2549 case NFT_SET_POL_PERFORMANCE:
2550 if (est.lookup < best.lookup) 2550 if (est.lookup < best.lookup)
2551 break; 2551 break;
2552 if (est.lookup == best.lookup) { 2552 if (est.lookup == best.lookup &&
2553 if (!desc->size) { 2553 est.space < best.space)
2554 if (est.space < best.space) 2554 break;
2555 break;
2556 } else if (est.size < best.size) {
2557 break;
2558 }
2559 }
2560 continue; 2555 continue;
2561 case NFT_SET_POL_MEMORY: 2556 case NFT_SET_POL_MEMORY:
2562 if (!desc->size) { 2557 if (!desc->size) {
@@ -2741,8 +2736,10 @@ cont:
2741 list_for_each_entry(i, &ctx->table->sets, list) { 2736 list_for_each_entry(i, &ctx->table->sets, list) {
2742 if (!nft_is_active_next(ctx->net, i)) 2737 if (!nft_is_active_next(ctx->net, i))
2743 continue; 2738 continue;
2744 if (!strcmp(set->name, i->name)) 2739 if (!strcmp(set->name, i->name)) {
2740 kfree(set->name);
2745 return -ENFILE; 2741 return -ENFILE;
2742 }
2746 } 2743 }
2747 return 0; 2744 return 0;
2748} 2745}
@@ -3591,45 +3588,6 @@ static int nf_tables_dump_set_done(struct netlink_callback *cb)
3591 return 0; 3588 return 0;
3592} 3589}
3593 3590
3594static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
3595 struct sk_buff *skb, const struct nlmsghdr *nlh,
3596 const struct nlattr * const nla[],
3597 struct netlink_ext_ack *extack)
3598{
3599 u8 genmask = nft_genmask_cur(net);
3600 const struct nft_set *set;
3601 struct nft_ctx ctx;
3602 int err;
3603
3604 err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
3605 if (err < 0)
3606 return err;
3607
3608 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
3609 genmask);
3610 if (IS_ERR(set))
3611 return PTR_ERR(set);
3612
3613 if (nlh->nlmsg_flags & NLM_F_DUMP) {
3614 struct netlink_dump_control c = {
3615 .dump = nf_tables_dump_set,
3616 .done = nf_tables_dump_set_done,
3617 };
3618 struct nft_set_dump_ctx *dump_ctx;
3619
3620 dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL);
3621 if (!dump_ctx)
3622 return -ENOMEM;
3623
3624 dump_ctx->set = set;
3625 dump_ctx->ctx = ctx;
3626
3627 c.data = dump_ctx;
3628 return netlink_dump_start(nlsk, skb, nlh, &c);
3629 }
3630 return -EOPNOTSUPP;
3631}
3632
3633static int nf_tables_fill_setelem_info(struct sk_buff *skb, 3591static int nf_tables_fill_setelem_info(struct sk_buff *skb,
3634 const struct nft_ctx *ctx, u32 seq, 3592 const struct nft_ctx *ctx, u32 seq,
3635 u32 portid, int event, u16 flags, 3593 u32 portid, int event, u16 flags,
@@ -3675,6 +3633,135 @@ nla_put_failure:
3675 return -1; 3633 return -1;
3676} 3634}
3677 3635
3636static int nft_setelem_parse_flags(const struct nft_set *set,
3637 const struct nlattr *attr, u32 *flags)
3638{
3639 if (attr == NULL)
3640 return 0;
3641
3642 *flags = ntohl(nla_get_be32(attr));
3643 if (*flags & ~NFT_SET_ELEM_INTERVAL_END)
3644 return -EINVAL;
3645 if (!(set->flags & NFT_SET_INTERVAL) &&
3646 *flags & NFT_SET_ELEM_INTERVAL_END)
3647 return -EINVAL;
3648
3649 return 0;
3650}
3651
3652static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
3653 const struct nlattr *attr)
3654{
3655 struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
3656 const struct nft_set_ext *ext;
3657 struct nft_data_desc desc;
3658 struct nft_set_elem elem;
3659 struct sk_buff *skb;
3660 uint32_t flags = 0;
3661 void *priv;
3662 int err;
3663
3664 err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
3665 nft_set_elem_policy, NULL);
3666 if (err < 0)
3667 return err;
3668
3669 if (!nla[NFTA_SET_ELEM_KEY])
3670 return -EINVAL;
3671
3672 err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
3673 if (err < 0)
3674 return err;
3675
3676 err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc,
3677 nla[NFTA_SET_ELEM_KEY]);
3678 if (err < 0)
3679 return err;
3680
3681 err = -EINVAL;
3682 if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
3683 return err;
3684
3685 priv = set->ops->get(ctx->net, set, &elem, flags);
3686 if (IS_ERR(priv))
3687 return PTR_ERR(priv);
3688
3689 elem.priv = priv;
3690 ext = nft_set_elem_ext(set, &elem);
3691
3692 err = -ENOMEM;
3693 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
3694 if (skb == NULL)
3695 goto err1;
3696
3697 err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid,
3698 NFT_MSG_NEWSETELEM, 0, set, &elem);
3699 if (err < 0)
3700 goto err2;
3701
3702 err = nfnetlink_unicast(skb, ctx->net, ctx->portid, MSG_DONTWAIT);
3703 /* This avoids a loop in nfnetlink. */
3704 if (err < 0)
3705 goto err1;
3706
3707 return 0;
3708err2:
3709 kfree_skb(skb);
3710err1:
3711 /* this avoids a loop in nfnetlink. */
3712 return err == -EAGAIN ? -ENOBUFS : err;
3713}
3714
3715static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
3716 struct sk_buff *skb, const struct nlmsghdr *nlh,
3717 const struct nlattr * const nla[],
3718 struct netlink_ext_ack *extack)
3719{
3720 u8 genmask = nft_genmask_cur(net);
3721 struct nft_set *set;
3722 struct nlattr *attr;
3723 struct nft_ctx ctx;
3724 int rem, err = 0;
3725
3726 err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
3727 if (err < 0)
3728 return err;
3729
3730 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
3731 genmask);
3732 if (IS_ERR(set))
3733 return PTR_ERR(set);
3734
3735 if (nlh->nlmsg_flags & NLM_F_DUMP) {
3736 struct netlink_dump_control c = {
3737 .dump = nf_tables_dump_set,
3738 .done = nf_tables_dump_set_done,
3739 };
3740 struct nft_set_dump_ctx *dump_ctx;
3741
3742 dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL);
3743 if (!dump_ctx)
3744 return -ENOMEM;
3745
3746 dump_ctx->set = set;
3747 dump_ctx->ctx = ctx;
3748
3749 c.data = dump_ctx;
3750 return netlink_dump_start(nlsk, skb, nlh, &c);
3751 }
3752
3753 if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
3754 return -EINVAL;
3755
3756 nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
3757 err = nft_get_set_elem(&ctx, set, attr);
3758 if (err < 0)
3759 break;
3760 }
3761
3762 return err;
3763}
3764
3678static void nf_tables_setelem_notify(const struct nft_ctx *ctx, 3765static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
3679 const struct nft_set *set, 3766 const struct nft_set *set,
3680 const struct nft_set_elem *elem, 3767 const struct nft_set_elem *elem,
@@ -3775,22 +3862,6 @@ static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem)
3775 kfree(elem); 3862 kfree(elem);
3776} 3863}
3777 3864
3778static int nft_setelem_parse_flags(const struct nft_set *set,
3779 const struct nlattr *attr, u32 *flags)
3780{
3781 if (attr == NULL)
3782 return 0;
3783
3784 *flags = ntohl(nla_get_be32(attr));
3785 if (*flags & ~NFT_SET_ELEM_INTERVAL_END)
3786 return -EINVAL;
3787 if (!(set->flags & NFT_SET_INTERVAL) &&
3788 *flags & NFT_SET_ELEM_INTERVAL_END)
3789 return -EINVAL;
3790
3791 return 0;
3792}
3793
3794static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, 3865static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
3795 const struct nlattr *attr, u32 nlmsg_flags) 3866 const struct nlattr *attr, u32 nlmsg_flags)
3796{ 3867{
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index cad6498f10b0..e5afab86381c 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -151,7 +151,7 @@ instance_put(struct nfulnl_instance *inst)
151 call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu); 151 call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu);
152} 152}
153 153
154static void nfulnl_timer(unsigned long data); 154static void nfulnl_timer(struct timer_list *t);
155 155
156static struct nfulnl_instance * 156static struct nfulnl_instance *
157instance_create(struct net *net, u_int16_t group_num, 157instance_create(struct net *net, u_int16_t group_num,
@@ -184,7 +184,7 @@ instance_create(struct net *net, u_int16_t group_num,
184 /* needs to be two, since we _put() after creation */ 184 /* needs to be two, since we _put() after creation */
185 refcount_set(&inst->use, 2); 185 refcount_set(&inst->use, 2);
186 186
187 setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst); 187 timer_setup(&inst->timer, nfulnl_timer, 0);
188 188
189 inst->net = get_net(net); 189 inst->net = get_net(net);
190 inst->peer_user_ns = user_ns; 190 inst->peer_user_ns = user_ns;
@@ -377,9 +377,9 @@ __nfulnl_flush(struct nfulnl_instance *inst)
377} 377}
378 378
379static void 379static void
380nfulnl_timer(unsigned long data) 380nfulnl_timer(struct timer_list *t)
381{ 381{
382 struct nfulnl_instance *inst = (struct nfulnl_instance *)data; 382 struct nfulnl_instance *inst = from_timer(inst, t, timer);
383 383
384 spin_lock_bh(&inst->lock); 384 spin_lock_bh(&inst->lock);
385 if (inst->skb) 385 if (inst->skb)
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index c9796629858f..a16356cacec3 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -401,7 +401,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
401 401
402 outdev = entry->state.out; 402 outdev = entry->state.out;
403 403
404 switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) { 404 switch ((enum nfqnl_config_mode)READ_ONCE(queue->copy_mode)) {
405 case NFQNL_COPY_META: 405 case NFQNL_COPY_META:
406 case NFQNL_COPY_NONE: 406 case NFQNL_COPY_NONE:
407 break; 407 break;
@@ -412,7 +412,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
412 skb_checksum_help(entskb)) 412 skb_checksum_help(entskb))
413 return NULL; 413 return NULL;
414 414
415 data_len = ACCESS_ONCE(queue->copy_range); 415 data_len = READ_ONCE(queue->copy_range);
416 if (data_len > entskb->len) 416 if (data_len > entskb->len)
417 data_len = entskb->len; 417 data_len = entskb->len;
418 418
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index bd0975d7dd6f..2647b895f4b0 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -312,39 +312,6 @@ static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
312 [NFTA_CT_SREG] = { .type = NLA_U32 }, 312 [NFTA_CT_SREG] = { .type = NLA_U32 },
313}; 313};
314 314
315static int nft_ct_netns_get(struct net *net, uint8_t family)
316{
317 int err;
318
319 if (family == NFPROTO_INET) {
320 err = nf_ct_netns_get(net, NFPROTO_IPV4);
321 if (err < 0)
322 goto err1;
323 err = nf_ct_netns_get(net, NFPROTO_IPV6);
324 if (err < 0)
325 goto err2;
326 } else {
327 err = nf_ct_netns_get(net, family);
328 if (err < 0)
329 goto err1;
330 }
331 return 0;
332
333err2:
334 nf_ct_netns_put(net, NFPROTO_IPV4);
335err1:
336 return err;
337}
338
339static void nft_ct_netns_put(struct net *net, uint8_t family)
340{
341 if (family == NFPROTO_INET) {
342 nf_ct_netns_put(net, NFPROTO_IPV4);
343 nf_ct_netns_put(net, NFPROTO_IPV6);
344 } else
345 nf_ct_netns_put(net, family);
346}
347
348#ifdef CONFIG_NF_CONNTRACK_ZONES 315#ifdef CONFIG_NF_CONNTRACK_ZONES
349static void nft_ct_tmpl_put_pcpu(void) 316static void nft_ct_tmpl_put_pcpu(void)
350{ 317{
@@ -489,7 +456,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
489 if (err < 0) 456 if (err < 0)
490 return err; 457 return err;
491 458
492 err = nft_ct_netns_get(ctx->net, ctx->afi->family); 459 err = nf_ct_netns_get(ctx->net, ctx->afi->family);
493 if (err < 0) 460 if (err < 0)
494 return err; 461 return err;
495 462
@@ -583,7 +550,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
583 if (err < 0) 550 if (err < 0)
584 goto err1; 551 goto err1;
585 552
586 err = nft_ct_netns_get(ctx->net, ctx->afi->family); 553 err = nf_ct_netns_get(ctx->net, ctx->afi->family);
587 if (err < 0) 554 if (err < 0)
588 goto err1; 555 goto err1;
589 556
@@ -606,7 +573,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx,
606 struct nft_ct *priv = nft_expr_priv(expr); 573 struct nft_ct *priv = nft_expr_priv(expr);
607 574
608 __nft_ct_set_destroy(ctx, priv); 575 __nft_ct_set_destroy(ctx, priv);
609 nft_ct_netns_put(ctx->net, ctx->afi->family); 576 nf_ct_netns_put(ctx->net, ctx->afi->family);
610} 577}
611 578
612static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) 579static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 734989c40579..45fb2752fb63 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -106,6 +106,23 @@ nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this,
106 return NULL; 106 return NULL;
107} 107}
108 108
109static void *nft_bitmap_get(const struct net *net, const struct nft_set *set,
110 const struct nft_set_elem *elem, unsigned int flags)
111{
112 const struct nft_bitmap *priv = nft_set_priv(set);
113 u8 genmask = nft_genmask_cur(net);
114 struct nft_bitmap_elem *be;
115
116 list_for_each_entry_rcu(be, &priv->list, head) {
117 if (memcmp(nft_set_ext_key(&be->ext), elem->key.val.data, set->klen) ||
118 !nft_set_elem_active(&be->ext, genmask))
119 continue;
120
121 return be;
122 }
123 return ERR_PTR(-ENOENT);
124}
125
109static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, 126static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
110 const struct nft_set_elem *elem, 127 const struct nft_set_elem *elem,
111 struct nft_set_ext **ext) 128 struct nft_set_ext **ext)
@@ -294,6 +311,7 @@ static struct nft_set_ops nft_bitmap_ops __read_mostly = {
294 .activate = nft_bitmap_activate, 311 .activate = nft_bitmap_activate,
295 .lookup = nft_bitmap_lookup, 312 .lookup = nft_bitmap_lookup,
296 .walk = nft_bitmap_walk, 313 .walk = nft_bitmap_walk,
314 .get = nft_bitmap_get,
297}; 315};
298 316
299static struct nft_set_type nft_bitmap_type __read_mostly = { 317static struct nft_set_type nft_bitmap_type __read_mostly = {
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 0fa01d772c5e..f8166c1d5430 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -95,6 +95,24 @@ static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
95 return !!he; 95 return !!he;
96} 96}
97 97
98static void *nft_rhash_get(const struct net *net, const struct nft_set *set,
99 const struct nft_set_elem *elem, unsigned int flags)
100{
101 struct nft_rhash *priv = nft_set_priv(set);
102 struct nft_rhash_elem *he;
103 struct nft_rhash_cmp_arg arg = {
104 .genmask = nft_genmask_cur(net),
105 .set = set,
106 .key = elem->key.val.data,
107 };
108
109 he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
110 if (he != NULL)
111 return he;
112
113 return ERR_PTR(-ENOENT);
114}
115
98static bool nft_rhash_update(struct nft_set *set, const u32 *key, 116static bool nft_rhash_update(struct nft_set *set, const u32 *key,
99 void *(*new)(struct nft_set *, 117 void *(*new)(struct nft_set *,
100 const struct nft_expr *, 118 const struct nft_expr *,
@@ -409,6 +427,24 @@ static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
409 return false; 427 return false;
410} 428}
411 429
430static void *nft_hash_get(const struct net *net, const struct nft_set *set,
431 const struct nft_set_elem *elem, unsigned int flags)
432{
433 struct nft_hash *priv = nft_set_priv(set);
434 u8 genmask = nft_genmask_cur(net);
435 struct nft_hash_elem *he;
436 u32 hash;
437
438 hash = jhash(elem->key.val.data, set->klen, priv->seed);
439 hash = reciprocal_scale(hash, priv->buckets);
440 hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
441 if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) &&
442 nft_set_elem_active(&he->ext, genmask))
443 return he;
444 }
445 return ERR_PTR(-ENOENT);
446}
447
412/* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */ 448/* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */
413static inline u32 nft_hash_key(const u32 *key, u32 klen) 449static inline u32 nft_hash_key(const u32 *key, u32 klen)
414{ 450{
@@ -494,7 +530,7 @@ static void *nft_hash_deactivate(const struct net *net,
494 hash = reciprocal_scale(hash, priv->buckets); 530 hash = reciprocal_scale(hash, priv->buckets);
495 hlist_for_each_entry(he, &priv->table[hash], node) { 531 hlist_for_each_entry(he, &priv->table[hash], node) {
496 if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val, 532 if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val,
497 set->klen) || 533 set->klen) &&
498 nft_set_elem_active(&he->ext, genmask)) { 534 nft_set_elem_active(&he->ext, genmask)) {
499 nft_set_elem_change_active(net, set, &he->ext); 535 nft_set_elem_change_active(net, set, &he->ext);
500 return he; 536 return he;
@@ -600,6 +636,7 @@ static struct nft_set_ops nft_rhash_ops __read_mostly = {
600 .lookup = nft_rhash_lookup, 636 .lookup = nft_rhash_lookup,
601 .update = nft_rhash_update, 637 .update = nft_rhash_update,
602 .walk = nft_rhash_walk, 638 .walk = nft_rhash_walk,
639 .get = nft_rhash_get,
603 .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, 640 .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
604}; 641};
605 642
@@ -617,6 +654,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
617 .remove = nft_hash_remove, 654 .remove = nft_hash_remove,
618 .lookup = nft_hash_lookup, 655 .lookup = nft_hash_lookup,
619 .walk = nft_hash_walk, 656 .walk = nft_hash_walk,
657 .get = nft_hash_get,
620 .features = NFT_SET_MAP | NFT_SET_OBJECT, 658 .features = NFT_SET_MAP | NFT_SET_OBJECT,
621}; 659};
622 660
@@ -634,6 +672,7 @@ static struct nft_set_ops nft_hash_fast_ops __read_mostly = {
634 .remove = nft_hash_remove, 672 .remove = nft_hash_remove,
635 .lookup = nft_hash_lookup_fast, 673 .lookup = nft_hash_lookup_fast,
636 .walk = nft_hash_walk, 674 .walk = nft_hash_walk,
675 .get = nft_hash_get,
637 .features = NFT_SET_MAP | NFT_SET_OBJECT, 676 .features = NFT_SET_MAP | NFT_SET_OBJECT,
638}; 677};
639 678
@@ -643,7 +682,6 @@ nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc,
643{ 682{
644 if (desc->size) { 683 if (desc->size) {
645 switch (desc->klen) { 684 switch (desc->klen) {
646 case 2:
647 case 4: 685 case 4:
648 return &nft_hash_fast_ops; 686 return &nft_hash_fast_ops;
649 default: 687 default:
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index d83a4ec5900d..e6f08bc5f359 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -113,6 +113,78 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
113 return ret; 113 return ret;
114} 114}
115 115
116static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
117 const u32 *key, struct nft_rbtree_elem **elem,
118 unsigned int seq, unsigned int flags, u8 genmask)
119{
120 struct nft_rbtree_elem *rbe, *interval = NULL;
121 struct nft_rbtree *priv = nft_set_priv(set);
122 const struct rb_node *parent;
123 const void *this;
124 int d;
125
126 parent = rcu_dereference_raw(priv->root.rb_node);
127 while (parent != NULL) {
128 if (read_seqcount_retry(&priv->count, seq))
129 return false;
130
131 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
132
133 this = nft_set_ext_key(&rbe->ext);
134 d = memcmp(this, key, set->klen);
135 if (d < 0) {
136 parent = rcu_dereference_raw(parent->rb_left);
137 interval = rbe;
138 } else if (d > 0) {
139 parent = rcu_dereference_raw(parent->rb_right);
140 } else {
141 if (!nft_set_elem_active(&rbe->ext, genmask))
142 parent = rcu_dereference_raw(parent->rb_left);
143
144 if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) ||
145 (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) ==
146 (flags & NFT_SET_ELEM_INTERVAL_END)) {
147 *elem = rbe;
148 return true;
149 }
150 return false;
151 }
152 }
153
154 if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
155 nft_set_elem_active(&interval->ext, genmask) &&
156 !nft_rbtree_interval_end(interval)) {
157 *elem = interval;
158 return true;
159 }
160
161 return false;
162}
163
164static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
165 const struct nft_set_elem *elem, unsigned int flags)
166{
167 struct nft_rbtree *priv = nft_set_priv(set);
168 unsigned int seq = read_seqcount_begin(&priv->count);
169 struct nft_rbtree_elem *rbe = ERR_PTR(-ENOENT);
170 const u32 *key = (const u32 *)&elem->key.val;
171 u8 genmask = nft_genmask_cur(net);
172 bool ret;
173
174 ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
175 if (ret || !read_seqcount_retry(&priv->count, seq))
176 return rbe;
177
178 read_lock_bh(&priv->lock);
179 seq = read_seqcount_begin(&priv->count);
180 ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
181 if (!ret)
182 rbe = ERR_PTR(-ENOENT);
183 read_unlock_bh(&priv->lock);
184
185 return rbe;
186}
187
116static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, 188static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
117 struct nft_rbtree_elem *new, 189 struct nft_rbtree_elem *new,
118 struct nft_set_ext **ext) 190 struct nft_set_ext **ext)
@@ -336,6 +408,7 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = {
336 .activate = nft_rbtree_activate, 408 .activate = nft_rbtree_activate,
337 .lookup = nft_rbtree_lookup, 409 .lookup = nft_rbtree_lookup,
338 .walk = nft_rbtree_walk, 410 .walk = nft_rbtree_walk,
411 .get = nft_rbtree_get,
339 .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT, 412 .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
340}; 413};
341 414
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index c83a3b5e1c6c..a77dd514297c 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -892,7 +892,7 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
892 if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0) 892 if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0)
893 return ERR_PTR(-EFAULT); 893 return ERR_PTR(-EFAULT);
894 894
895 strlcpy(info->name, compat_tmp.name, sizeof(info->name)); 895 memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1);
896 info->num_counters = compat_tmp.num_counters; 896 info->num_counters = compat_tmp.num_counters;
897 user += sizeof(compat_tmp); 897 user += sizeof(compat_tmp);
898 } else 898 } else
@@ -905,9 +905,9 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
905 if (copy_from_user(info, user, sizeof(*info)) != 0) 905 if (copy_from_user(info, user, sizeof(*info)) != 0)
906 return ERR_PTR(-EFAULT); 906 return ERR_PTR(-EFAULT);
907 907
908 info->name[sizeof(info->name) - 1] = '\0';
909 user += sizeof(*info); 908 user += sizeof(*info);
910 } 909 }
910 info->name[sizeof(info->name) - 1] = '\0';
911 911
912 size = sizeof(struct xt_counters); 912 size = sizeof(struct xt_counters);
913 size *= info->num_counters; 913 size *= info->num_counters;
@@ -1153,6 +1153,7 @@ xt_replace_table(struct xt_table *table,
1153 int *error) 1153 int *error)
1154{ 1154{
1155 struct xt_table_info *private; 1155 struct xt_table_info *private;
1156 unsigned int cpu;
1156 int ret; 1157 int ret;
1157 1158
1158 ret = xt_jumpstack_alloc(newinfo); 1159 ret = xt_jumpstack_alloc(newinfo);
@@ -1182,14 +1183,28 @@ xt_replace_table(struct xt_table *table,
1182 smp_wmb(); 1183 smp_wmb();
1183 table->private = newinfo; 1184 table->private = newinfo;
1184 1185
1186 /* make sure all cpus see new ->private value */
1187 smp_wmb();
1188
1185 /* 1189 /*
1186 * Even though table entries have now been swapped, other CPU's 1190 * Even though table entries have now been swapped, other CPU's
1187 * may still be using the old entries. This is okay, because 1191 * may still be using the old entries...
1188 * resynchronization happens because of the locking done
1189 * during the get_counters() routine.
1190 */ 1192 */
1191 local_bh_enable(); 1193 local_bh_enable();
1192 1194
1195 /* ... so wait for even xt_recseq on all cpus */
1196 for_each_possible_cpu(cpu) {
1197 seqcount_t *s = &per_cpu(xt_recseq, cpu);
1198 u32 seq = raw_read_seqcount(s);
1199
1200 if (seq & 1) {
1201 do {
1202 cond_resched();
1203 cpu_relax();
1204 } while (seq == raw_read_seqcount(s));
1205 }
1206 }
1207
1193#ifdef CONFIG_AUDIT 1208#ifdef CONFIG_AUDIT
1194 if (audit_enabled) { 1209 if (audit_enabled) {
1195 audit_log(current->audit_context, GFP_KERNEL, 1210 audit_log(current->audit_context, GFP_KERNEL,
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index daf45da448fa..ee3421ad108d 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -107,9 +107,9 @@ static void idletimer_tg_work(struct work_struct *work)
107 sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name); 107 sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name);
108} 108}
109 109
110static void idletimer_tg_expired(unsigned long data) 110static void idletimer_tg_expired(struct timer_list *t)
111{ 111{
112 struct idletimer_tg *timer = (struct idletimer_tg *) data; 112 struct idletimer_tg *timer = from_timer(timer, t, timer);
113 113
114 pr_debug("timer %s expired\n", timer->attr.attr.name); 114 pr_debug("timer %s expired\n", timer->attr.attr.name);
115 115
@@ -143,8 +143,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
143 143
144 list_add(&info->timer->entry, &idletimer_tg_list); 144 list_add(&info->timer->entry, &idletimer_tg_list);
145 145
146 setup_timer(&info->timer->timer, idletimer_tg_expired, 146 timer_setup(&info->timer->timer, idletimer_tg_expired, 0);
147 (unsigned long) info->timer);
148 info->timer->refcnt = 1; 147 info->timer->refcnt = 1;
149 148
150 mod_timer(&info->timer->timer, 149 mod_timer(&info->timer->timer,
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 3ba31c194cce..0971634e5444 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -85,9 +85,10 @@ led_tg(struct sk_buff *skb, const struct xt_action_param *par)
85 return XT_CONTINUE; 85 return XT_CONTINUE;
86} 86}
87 87
88static void led_timeout_callback(unsigned long data) 88static void led_timeout_callback(struct timer_list *t)
89{ 89{
90 struct xt_led_info_internal *ledinternal = (struct xt_led_info_internal *)data; 90 struct xt_led_info_internal *ledinternal = from_timer(ledinternal, t,
91 timer);
91 92
92 led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF); 93 led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF);
93} 94}
@@ -143,8 +144,7 @@ static int led_tg_check(const struct xt_tgchk_param *par)
143 144
144 /* See if we need to set up a timer */ 145 /* See if we need to set up a timer */
145 if (ledinfo->delay > 0) 146 if (ledinfo->delay > 0)
146 setup_timer(&ledinternal->timer, led_timeout_callback, 147 timer_setup(&ledinternal->timer, led_timeout_callback, 0);
147 (unsigned long)ledinternal);
148 148
149 list_add_tail(&ledinternal->list, &xt_led_triggers); 149 list_add_tail(&ledinternal->list, &xt_led_triggers);
150 150
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 38986a95216c..041da0d9c06f 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -8,6 +8,7 @@
8 */ 8 */
9 9
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/syscalls.h>
11#include <linux/skbuff.h> 12#include <linux/skbuff.h>
12#include <linux/filter.h> 13#include <linux/filter.h>
13#include <linux/bpf.h> 14#include <linux/bpf.h>
@@ -49,6 +50,22 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
49 return 0; 50 return 0;
50} 51}
51 52
53static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
54{
55 mm_segment_t oldfs = get_fs();
56 int retval, fd;
57
58 set_fs(KERNEL_DS);
59 fd = bpf_obj_get_user(path, 0);
60 set_fs(oldfs);
61 if (fd < 0)
62 return fd;
63
64 retval = __bpf_mt_check_fd(fd, ret);
65 sys_close(fd);
66 return retval;
67}
68
52static int bpf_mt_check(const struct xt_mtchk_param *par) 69static int bpf_mt_check(const struct xt_mtchk_param *par)
53{ 70{
54 struct xt_bpf_info *info = par->matchinfo; 71 struct xt_bpf_info *info = par->matchinfo;
@@ -66,9 +83,10 @@ static int bpf_mt_check_v1(const struct xt_mtchk_param *par)
66 return __bpf_mt_check_bytecode(info->bpf_program, 83 return __bpf_mt_check_bytecode(info->bpf_program,
67 info->bpf_program_num_elem, 84 info->bpf_program_num_elem,
68 &info->filter); 85 &info->filter);
69 else if (info->mode == XT_BPF_MODE_FD_PINNED || 86 else if (info->mode == XT_BPF_MODE_FD_ELF)
70 info->mode == XT_BPF_MODE_FD_ELF)
71 return __bpf_mt_check_fd(info->fd, &info->filter); 87 return __bpf_mt_check_fd(info->fd, &info->filter);
88 else if (info->mode == XT_BPF_MODE_PATH_PINNED)
89 return __bpf_mt_check_path(info->path, &info->filter);
72 else 90 else
73 return -EINVAL; 91 return -EINVAL;
74} 92}
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index ffa8eec980e9..a6214f235333 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -46,7 +46,6 @@
46struct xt_connlimit_conn { 46struct xt_connlimit_conn {
47 struct hlist_node node; 47 struct hlist_node node;
48 struct nf_conntrack_tuple tuple; 48 struct nf_conntrack_tuple tuple;
49 union nf_inet_addr addr;
50}; 49};
51 50
52struct xt_connlimit_rb { 51struct xt_connlimit_rb {
@@ -72,16 +71,9 @@ static inline unsigned int connlimit_iphash(__be32 addr)
72} 71}
73 72
74static inline unsigned int 73static inline unsigned int
75connlimit_iphash6(const union nf_inet_addr *addr, 74connlimit_iphash6(const union nf_inet_addr *addr)
76 const union nf_inet_addr *mask)
77{ 75{
78 union nf_inet_addr res; 76 return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6),
79 unsigned int i;
80
81 for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i)
82 res.ip6[i] = addr->ip6[i] & mask->ip6[i];
83
84 return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6),
85 connlimit_rnd) % CONNLIMIT_SLOTS; 77 connlimit_rnd) % CONNLIMIT_SLOTS;
86} 78}
87 79
@@ -95,24 +87,13 @@ static inline bool already_closed(const struct nf_conn *conn)
95} 87}
96 88
97static int 89static int
98same_source_net(const union nf_inet_addr *addr, 90same_source(const union nf_inet_addr *addr,
99 const union nf_inet_addr *mask, 91 const union nf_inet_addr *u3, u_int8_t family)
100 const union nf_inet_addr *u3, u_int8_t family)
101{ 92{
102 if (family == NFPROTO_IPV4) { 93 if (family == NFPROTO_IPV4)
103 return ntohl(addr->ip & mask->ip) - 94 return ntohl(addr->ip) - ntohl(u3->ip);
104 ntohl(u3->ip & mask->ip);
105 } else {
106 union nf_inet_addr lh, rh;
107 unsigned int i;
108
109 for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) {
110 lh.ip6[i] = addr->ip6[i] & mask->ip6[i];
111 rh.ip6[i] = u3->ip6[i] & mask->ip6[i];
112 }
113 95
114 return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6)); 96 return memcmp(addr->ip6, u3->ip6, sizeof(addr->ip6));
115 }
116} 97}
117 98
118static bool add_hlist(struct hlist_head *head, 99static bool add_hlist(struct hlist_head *head,
@@ -125,7 +106,6 @@ static bool add_hlist(struct hlist_head *head,
125 if (conn == NULL) 106 if (conn == NULL)
126 return false; 107 return false;
127 conn->tuple = *tuple; 108 conn->tuple = *tuple;
128 conn->addr = *addr;
129 hlist_add_head(&conn->node, head); 109 hlist_add_head(&conn->node, head);
130 return true; 110 return true;
131} 111}
@@ -196,7 +176,7 @@ static void tree_nodes_free(struct rb_root *root,
196static unsigned int 176static unsigned int
197count_tree(struct net *net, struct rb_root *root, 177count_tree(struct net *net, struct rb_root *root,
198 const struct nf_conntrack_tuple *tuple, 178 const struct nf_conntrack_tuple *tuple,
199 const union nf_inet_addr *addr, const union nf_inet_addr *mask, 179 const union nf_inet_addr *addr,
200 u8 family, const struct nf_conntrack_zone *zone) 180 u8 family, const struct nf_conntrack_zone *zone)
201{ 181{
202 struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES]; 182 struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES];
@@ -217,7 +197,7 @@ count_tree(struct net *net, struct rb_root *root,
217 rbconn = rb_entry(*rbnode, struct xt_connlimit_rb, node); 197 rbconn = rb_entry(*rbnode, struct xt_connlimit_rb, node);
218 198
219 parent = *rbnode; 199 parent = *rbnode;
220 diff = same_source_net(addr, mask, &rbconn->addr, family); 200 diff = same_source(addr, &rbconn->addr, family);
221 if (diff < 0) { 201 if (diff < 0) {
222 rbnode = &((*rbnode)->rb_left); 202 rbnode = &((*rbnode)->rb_left);
223 } else if (diff > 0) { 203 } else if (diff > 0) {
@@ -270,7 +250,6 @@ count_tree(struct net *net, struct rb_root *root,
270 } 250 }
271 251
272 conn->tuple = *tuple; 252 conn->tuple = *tuple;
273 conn->addr = *addr;
274 rbconn->addr = *addr; 253 rbconn->addr = *addr;
275 254
276 INIT_HLIST_HEAD(&rbconn->hhead); 255 INIT_HLIST_HEAD(&rbconn->hhead);
@@ -285,7 +264,6 @@ static int count_them(struct net *net,
285 struct xt_connlimit_data *data, 264 struct xt_connlimit_data *data,
286 const struct nf_conntrack_tuple *tuple, 265 const struct nf_conntrack_tuple *tuple,
287 const union nf_inet_addr *addr, 266 const union nf_inet_addr *addr,
288 const union nf_inet_addr *mask,
289 u_int8_t family, 267 u_int8_t family,
290 const struct nf_conntrack_zone *zone) 268 const struct nf_conntrack_zone *zone)
291{ 269{
@@ -294,14 +272,14 @@ static int count_them(struct net *net,
294 u32 hash; 272 u32 hash;
295 273
296 if (family == NFPROTO_IPV6) 274 if (family == NFPROTO_IPV6)
297 hash = connlimit_iphash6(addr, mask); 275 hash = connlimit_iphash6(addr);
298 else 276 else
299 hash = connlimit_iphash(addr->ip & mask->ip); 277 hash = connlimit_iphash(addr->ip);
300 root = &data->climit_root[hash]; 278 root = &data->climit_root[hash];
301 279
302 spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); 280 spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
303 281
304 count = count_tree(net, root, tuple, addr, mask, family, zone); 282 count = count_tree(net, root, tuple, addr, family, zone);
305 283
306 spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); 284 spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
307 285
@@ -332,16 +310,23 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
332 310
333 if (xt_family(par) == NFPROTO_IPV6) { 311 if (xt_family(par) == NFPROTO_IPV6) {
334 const struct ipv6hdr *iph = ipv6_hdr(skb); 312 const struct ipv6hdr *iph = ipv6_hdr(skb);
313 unsigned int i;
314
335 memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ? 315 memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
336 &iph->daddr : &iph->saddr, sizeof(addr.ip6)); 316 &iph->daddr : &iph->saddr, sizeof(addr.ip6));
317
318 for (i = 0; i < ARRAY_SIZE(addr.ip6); ++i)
319 addr.ip6[i] &= info->mask.ip6[i];
337 } else { 320 } else {
338 const struct iphdr *iph = ip_hdr(skb); 321 const struct iphdr *iph = ip_hdr(skb);
339 addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ? 322 addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ?
340 iph->daddr : iph->saddr; 323 iph->daddr : iph->saddr;
324
325 addr.ip &= info->mask.ip;
341 } 326 }
342 327
343 connections = count_them(net, info->data, tuple_ptr, &addr, 328 connections = count_them(net, info->data, tuple_ptr, &addr,
344 &info->mask, xt_family(par), zone); 329 xt_family(par), zone);
345 if (connections == 0) 330 if (connections == 0)
346 /* kmalloc failed, drop it entirely */ 331 /* kmalloc failed, drop it entirely */
347 goto hotdrop; 332 goto hotdrop;
diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h
index 8fd324116e6f..68ccbe50bb1e 100644
--- a/net/netfilter/xt_repldata.h
+++ b/net/netfilter/xt_repldata.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Today's hack: quantum tunneling in structs 3 * Today's hack: quantum tunneling in structs
3 * 4 *
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index e75ef39669c5..575d2153e3b8 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -76,7 +76,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
76 transparent = nf_sk_is_transparent(sk); 76 transparent = nf_sk_is_transparent(sk);
77 77
78 if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && 78 if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
79 transparent) 79 transparent && sk_fullsock(sk))
80 pskb->mark = sk->sk_mark; 80 pskb->mark = sk->sk_mark;
81 81
82 if (sk != skb->sk) 82 if (sk != skb->sk)
@@ -133,7 +133,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
133 transparent = nf_sk_is_transparent(sk); 133 transparent = nf_sk_is_transparent(sk);
134 134
135 if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && 135 if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
136 transparent) 136 transparent && sk_fullsock(sk))
137 pskb->mark = sk->sk_mark; 137 pskb->mark = sk->sk_mark;
138 138
139 if (sk != skb->sk) 139 if (sk != skb->sk)
diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile
index d341ede0dca5..5a46381a64e7 100644
--- a/net/netlabel/Makefile
+++ b/net/netlabel/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the NetLabel subsystem. 3# Makefile for the NetLabel subsystem.
3# 4#
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index d0f38bc9af6d..ac709f0f197b 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h
@@ -87,7 +87,7 @@ static inline struct netlbl_af4list *__af4list_valid_rcu(struct list_head *s,
87 struct list_head *i = s; 87 struct list_head *i = s;
88 struct netlbl_af4list *n = __af4list_entry(s); 88 struct netlbl_af4list *n = __af4list_entry(s);
89 while (i != h && !n->valid) { 89 while (i != h && !n->valid) {
90 i = rcu_dereference(i->next); 90 i = rcu_dereference(list_next_rcu(i));
91 n = __af4list_entry(i); 91 n = __af4list_entry(i);
92 } 92 }
93 return n; 93 return n;
@@ -154,7 +154,7 @@ static inline struct netlbl_af6list *__af6list_valid_rcu(struct list_head *s,
154 struct list_head *i = s; 154 struct list_head *i = s;
155 struct netlbl_af6list *n = __af6list_entry(s); 155 struct netlbl_af6list *n = __af6list_entry(s);
156 while (i != h && !n->valid) { 156 while (i != h && !n->valid) {
157 i = rcu_dereference(i->next); 157 i = rcu_dereference(list_next_rcu(i));
158 n = __af6list_entry(i); 158 n = __af6list_entry(i);
159 } 159 }
160 return n; 160 return n;
diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c
index d177dd066504..4d748975117d 100644
--- a/net/netlabel/netlabel_calipso.c
+++ b/net/netlabel/netlabel_calipso.c
@@ -393,7 +393,7 @@ EXPORT_SYMBOL(netlbl_calipso_ops_register);
393 393
394static const struct netlbl_calipso_ops *netlbl_calipso_ops_get(void) 394static const struct netlbl_calipso_ops *netlbl_calipso_ops_get(void)
395{ 395{
396 return ACCESS_ONCE(calipso_ops); 396 return READ_ONCE(calipso_ops);
397} 397}
398 398
399/** 399/**
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 327807731b44..b9e0ee4e22f5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -128,7 +128,6 @@ static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
128}; 128};
129 129
130static int netlink_dump(struct sock *sk); 130static int netlink_dump(struct sock *sk);
131static void netlink_skb_destructor(struct sk_buff *skb);
132 131
133/* nl_table locking explained: 132/* nl_table locking explained:
134 * Lookup and traversal are protected with an RCU read-side lock. Insertion 133 * Lookup and traversal are protected with an RCU read-side lock. Insertion
@@ -2136,7 +2135,7 @@ static int netlink_dump(struct sock *sk)
2136 struct sk_buff *skb = NULL; 2135 struct sk_buff *skb = NULL;
2137 struct nlmsghdr *nlh; 2136 struct nlmsghdr *nlh;
2138 struct module *module; 2137 struct module *module;
2139 int len, err = -ENOBUFS; 2138 int err = -ENOBUFS;
2140 int alloc_min_size; 2139 int alloc_min_size;
2141 int alloc_size; 2140 int alloc_size;
2142 2141
@@ -2183,9 +2182,11 @@ static int netlink_dump(struct sock *sk)
2183 skb_reserve(skb, skb_tailroom(skb) - alloc_size); 2182 skb_reserve(skb, skb_tailroom(skb) - alloc_size);
2184 netlink_skb_set_owner_r(skb, sk); 2183 netlink_skb_set_owner_r(skb, sk);
2185 2184
2186 len = cb->dump(skb, cb); 2185 if (nlk->dump_done_errno > 0)
2186 nlk->dump_done_errno = cb->dump(skb, cb);
2187 2187
2188 if (len > 0) { 2188 if (nlk->dump_done_errno > 0 ||
2189 skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) {
2189 mutex_unlock(nlk->cb_mutex); 2190 mutex_unlock(nlk->cb_mutex);
2190 2191
2191 if (sk_filter(sk, skb)) 2192 if (sk_filter(sk, skb))
@@ -2195,13 +2196,15 @@ static int netlink_dump(struct sock *sk)
2195 return 0; 2196 return 0;
2196 } 2197 }
2197 2198
2198 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); 2199 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE,
2199 if (!nlh) 2200 sizeof(nlk->dump_done_errno), NLM_F_MULTI);
2201 if (WARN_ON(!nlh))
2200 goto errout_skb; 2202 goto errout_skb;
2201 2203
2202 nl_dump_check_consistent(cb, nlh); 2204 nl_dump_check_consistent(cb, nlh);
2203 2205
2204 memcpy(nlmsg_data(nlh), &len, sizeof(len)); 2206 memcpy(nlmsg_data(nlh), &nlk->dump_done_errno,
2207 sizeof(nlk->dump_done_errno));
2205 2208
2206 if (sk_filter(sk, skb)) 2209 if (sk_filter(sk, skb))
2207 kfree_skb(skb); 2210 kfree_skb(skb);
@@ -2266,14 +2269,19 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
2266 cb->min_dump_alloc = control->min_dump_alloc; 2269 cb->min_dump_alloc = control->min_dump_alloc;
2267 cb->skb = skb; 2270 cb->skb = skb;
2268 2271
2272 if (cb->start) {
2273 ret = cb->start(cb);
2274 if (ret)
2275 goto error_unlock;
2276 }
2277
2269 nlk->cb_running = true; 2278 nlk->cb_running = true;
2279 nlk->dump_done_errno = INT_MAX;
2270 2280
2271 mutex_unlock(nlk->cb_mutex); 2281 mutex_unlock(nlk->cb_mutex);
2272 2282
2273 if (cb->start)
2274 cb->start(cb);
2275
2276 ret = netlink_dump(sk); 2283 ret = netlink_dump(sk);
2284
2277 sock_put(sk); 2285 sock_put(sk);
2278 2286
2279 if (ret) 2287 if (ret)
@@ -2303,27 +2311,26 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
2303 size_t tlvlen = 0; 2311 size_t tlvlen = 0;
2304 struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk); 2312 struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk);
2305 unsigned int flags = 0; 2313 unsigned int flags = 0;
2314 bool nlk_has_extack = nlk->flags & NETLINK_F_EXT_ACK;
2306 2315
2307 /* Error messages get the original request appened, unless the user 2316 /* Error messages get the original request appened, unless the user
2308 * requests to cap the error message, and get extra error data if 2317 * requests to cap the error message, and get extra error data if
2309 * requested. 2318 * requested.
2310 */ 2319 */
2320 if (nlk_has_extack && extack && extack->_msg)
2321 tlvlen += nla_total_size(strlen(extack->_msg) + 1);
2322
2311 if (err) { 2323 if (err) {
2312 if (!(nlk->flags & NETLINK_F_CAP_ACK)) 2324 if (!(nlk->flags & NETLINK_F_CAP_ACK))
2313 payload += nlmsg_len(nlh); 2325 payload += nlmsg_len(nlh);
2314 else 2326 else
2315 flags |= NLM_F_CAPPED; 2327 flags |= NLM_F_CAPPED;
2316 if (nlk->flags & NETLINK_F_EXT_ACK && extack) { 2328 if (nlk_has_extack && extack && extack->bad_attr)
2317 if (extack->_msg) 2329 tlvlen += nla_total_size(sizeof(u32));
2318 tlvlen += nla_total_size(strlen(extack->_msg) + 1);
2319 if (extack->bad_attr)
2320 tlvlen += nla_total_size(sizeof(u32));
2321 }
2322 } else { 2330 } else {
2323 flags |= NLM_F_CAPPED; 2331 flags |= NLM_F_CAPPED;
2324 2332
2325 if (nlk->flags & NETLINK_F_EXT_ACK && 2333 if (nlk_has_extack && extack && extack->cookie_len)
2326 extack && extack->cookie_len)
2327 tlvlen += nla_total_size(extack->cookie_len); 2334 tlvlen += nla_total_size(extack->cookie_len);
2328 } 2335 }
2329 2336
@@ -2332,16 +2339,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
2332 2339
2333 skb = nlmsg_new(payload + tlvlen, GFP_KERNEL); 2340 skb = nlmsg_new(payload + tlvlen, GFP_KERNEL);
2334 if (!skb) { 2341 if (!skb) {
2335 struct sock *sk; 2342 NETLINK_CB(in_skb).sk->sk_err = ENOBUFS;
2336 2343 NETLINK_CB(in_skb).sk->sk_error_report(NETLINK_CB(in_skb).sk);
2337 sk = netlink_lookup(sock_net(in_skb->sk),
2338 in_skb->sk->sk_protocol,
2339 NETLINK_CB(in_skb).portid);
2340 if (sk) {
2341 sk->sk_err = ENOBUFS;
2342 sk->sk_error_report(sk);
2343 sock_put(sk);
2344 }
2345 return; 2344 return;
2346 } 2345 }
2347 2346
@@ -2351,11 +2350,12 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
2351 errmsg->error = err; 2350 errmsg->error = err;
2352 memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh)); 2351 memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh));
2353 2352
2354 if (nlk->flags & NETLINK_F_EXT_ACK && extack) { 2353 if (nlk_has_extack && extack) {
2354 if (extack->_msg) {
2355 WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG,
2356 extack->_msg));
2357 }
2355 if (err) { 2358 if (err) {
2356 if (extack->_msg)
2357 WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG,
2358 extack->_msg));
2359 if (extack->bad_attr && 2359 if (extack->bad_attr &&
2360 !WARN_ON((u8 *)extack->bad_attr < in_skb->data || 2360 !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
2361 (u8 *)extack->bad_attr >= in_skb->data + 2361 (u8 *)extack->bad_attr >= in_skb->data +
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 3490f2430532..962de7b3c023 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef _AF_NETLINK_H 2#ifndef _AF_NETLINK_H
2#define _AF_NETLINK_H 3#define _AF_NETLINK_H
3 4
@@ -33,6 +34,7 @@ struct netlink_sock {
33 wait_queue_head_t wait; 34 wait_queue_head_t wait;
34 bool bound; 35 bool bound;
35 bool cb_running; 36 bool cb_running;
37 int dump_done_errno;
36 struct netlink_callback cb; 38 struct netlink_callback cb;
37 struct mutex *cb_mutex; 39 struct mutex *cb_mutex;
38 struct mutex cb_def_mutex; 40 struct mutex cb_def_mutex;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 10f8b4cff40a..d444daf1ac04 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * NETLINK Generic Netlink Family 3 * NETLINK Generic Netlink Family
3 * 4 *
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index ebf16f7f9089..7ed9d4422a73 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -241,9 +241,9 @@ void nr_destroy_socket(struct sock *);
241/* 241/*
242 * Handler for deferred kills. 242 * Handler for deferred kills.
243 */ 243 */
244static void nr_destroy_timer(unsigned long data) 244static void nr_destroy_timer(struct timer_list *t)
245{ 245{
246 struct sock *sk=(struct sock *)data; 246 struct sock *sk = from_timer(sk, t, sk_timer);
247 bh_lock_sock(sk); 247 bh_lock_sock(sk);
248 sock_hold(sk); 248 sock_hold(sk);
249 nr_destroy_socket(sk); 249 nr_destroy_socket(sk);
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 80dbd0beb516..fbfdae452ff9 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -125,7 +125,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb,
125 125
126 case NR_DISCREQ: 126 case NR_DISCREQ:
127 nr_write_internal(sk, NR_DISCACK); 127 nr_write_internal(sk, NR_DISCACK);
128 128 /* fall through */
129 case NR_DISCACK: 129 case NR_DISCACK:
130 nr_disconnect(sk, 0); 130 nr_disconnect(sk, 0);
131 break; 131 break;
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
index 94d4e922af53..215ad22a9647 100644
--- a/net/netrom/nr_loopback.c
+++ b/net/netrom/nr_loopback.c
@@ -15,10 +15,10 @@
15#include <net/netrom.h> 15#include <net/netrom.h>
16#include <linux/init.h> 16#include <linux/init.h>
17 17
18static void nr_loopback_timer(unsigned long); 18static void nr_loopback_timer(struct timer_list *);
19 19
20static struct sk_buff_head loopback_queue; 20static struct sk_buff_head loopback_queue;
21static DEFINE_TIMER(loopback_timer, nr_loopback_timer, 0, 0); 21static DEFINE_TIMER(loopback_timer, nr_loopback_timer);
22 22
23void __init nr_loopback_init(void) 23void __init nr_loopback_init(void)
24{ 24{
@@ -48,7 +48,7 @@ int nr_loopback_queue(struct sk_buff *skb)
48 return 1; 48 return 1;
49} 49}
50 50
51static void nr_loopback_timer(unsigned long param) 51static void nr_loopback_timer(struct timer_list *unused)
52{ 52{
53 struct sk_buff *skb; 53 struct sk_buff *skb;
54 ax25_address *nr_dest; 54 ax25_address *nr_dest;
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 0c59354e280e..75e6ba970fde 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -80,6 +80,19 @@ static struct nr_neigh *nr_neigh_get_dev(ax25_address *callsign,
80 80
81static void nr_remove_neigh(struct nr_neigh *); 81static void nr_remove_neigh(struct nr_neigh *);
82 82
83/* re-sort the routes in quality order. */
84static void re_sort_routes(struct nr_node *nr_node, int x, int y)
85{
86 if (nr_node->routes[y].quality > nr_node->routes[x].quality) {
87 if (nr_node->which == x)
88 nr_node->which = y;
89 else if (nr_node->which == y)
90 nr_node->which = x;
91
92 swap(nr_node->routes[x], nr_node->routes[y]);
93 }
94}
95
83/* 96/*
84 * Add a new route to a node, and in the process add the node and the 97 * Add a new route to a node, and in the process add the node and the
85 * neighbour if it is new. 98 * neighbour if it is new.
@@ -90,7 +103,6 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
90{ 103{
91 struct nr_node *nr_node; 104 struct nr_node *nr_node;
92 struct nr_neigh *nr_neigh; 105 struct nr_neigh *nr_neigh;
93 struct nr_route nr_route;
94 int i, found; 106 int i, found;
95 struct net_device *odev; 107 struct net_device *odev;
96 108
@@ -251,49 +263,11 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
251 /* Now re-sort the routes in quality order */ 263 /* Now re-sort the routes in quality order */
252 switch (nr_node->count) { 264 switch (nr_node->count) {
253 case 3: 265 case 3:
254 if (nr_node->routes[1].quality > nr_node->routes[0].quality) { 266 re_sort_routes(nr_node, 0, 1);
255 switch (nr_node->which) { 267 re_sort_routes(nr_node, 1, 2);
256 case 0: 268 /* fall through */
257 nr_node->which = 1;
258 break;
259 case 1:
260 nr_node->which = 0;
261 break;
262 }
263 nr_route = nr_node->routes[0];
264 nr_node->routes[0] = nr_node->routes[1];
265 nr_node->routes[1] = nr_route;
266 }
267 if (nr_node->routes[2].quality > nr_node->routes[1].quality) {
268 switch (nr_node->which) {
269 case 1: nr_node->which = 2;
270 break;
271
272 case 2: nr_node->which = 1;
273 break;
274
275 default:
276 break;
277 }
278 nr_route = nr_node->routes[1];
279 nr_node->routes[1] = nr_node->routes[2];
280 nr_node->routes[2] = nr_route;
281 }
282 case 2: 269 case 2:
283 if (nr_node->routes[1].quality > nr_node->routes[0].quality) { 270 re_sort_routes(nr_node, 0, 1);
284 switch (nr_node->which) {
285 case 0: nr_node->which = 1;
286 break;
287
288 case 1: nr_node->which = 0;
289 break;
290
291 default: break;
292 }
293 nr_route = nr_node->routes[0];
294 nr_node->routes[0] = nr_node->routes[1];
295 nr_node->routes[1] = nr_route;
296 }
297 case 1: 271 case 1:
298 break; 272 break;
299 } 273 }
@@ -384,6 +358,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n
384 switch (i) { 358 switch (i) {
385 case 0: 359 case 0:
386 nr_node->routes[0] = nr_node->routes[1]; 360 nr_node->routes[0] = nr_node->routes[1];
361 /* fall through */
387 case 1: 362 case 1:
388 nr_node->routes[1] = nr_node->routes[2]; 363 nr_node->routes[1] = nr_node->routes[2];
389 case 2: 364 case 2:
@@ -553,6 +528,7 @@ void nr_rt_device_down(struct net_device *dev)
553 switch (i) { 528 switch (i) {
554 case 0: 529 case 0:
555 t->routes[0] = t->routes[1]; 530 t->routes[0] = t->routes[1];
531 /* fall through */
556 case 1: 532 case 1:
557 t->routes[1] = t->routes[2]; 533 t->routes[1] = t->routes[2];
558 case 2: 534 case 2:
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index 94d05806a9a2..cbd51ed5a2d7 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -29,24 +29,23 @@
29#include <linux/interrupt.h> 29#include <linux/interrupt.h>
30#include <net/netrom.h> 30#include <net/netrom.h>
31 31
32static void nr_heartbeat_expiry(unsigned long); 32static void nr_heartbeat_expiry(struct timer_list *);
33static void nr_t1timer_expiry(unsigned long); 33static void nr_t1timer_expiry(struct timer_list *);
34static void nr_t2timer_expiry(unsigned long); 34static void nr_t2timer_expiry(struct timer_list *);
35static void nr_t4timer_expiry(unsigned long); 35static void nr_t4timer_expiry(struct timer_list *);
36static void nr_idletimer_expiry(unsigned long); 36static void nr_idletimer_expiry(struct timer_list *);
37 37
38void nr_init_timers(struct sock *sk) 38void nr_init_timers(struct sock *sk)
39{ 39{
40 struct nr_sock *nr = nr_sk(sk); 40 struct nr_sock *nr = nr_sk(sk);
41 41
42 setup_timer(&nr->t1timer, nr_t1timer_expiry, (unsigned long)sk); 42 timer_setup(&nr->t1timer, nr_t1timer_expiry, 0);
43 setup_timer(&nr->t2timer, nr_t2timer_expiry, (unsigned long)sk); 43 timer_setup(&nr->t2timer, nr_t2timer_expiry, 0);
44 setup_timer(&nr->t4timer, nr_t4timer_expiry, (unsigned long)sk); 44 timer_setup(&nr->t4timer, nr_t4timer_expiry, 0);
45 setup_timer(&nr->idletimer, nr_idletimer_expiry, (unsigned long)sk); 45 timer_setup(&nr->idletimer, nr_idletimer_expiry, 0);
46 46
47 /* initialized by sock_init_data */ 47 /* initialized by sock_init_data */
48 sk->sk_timer.data = (unsigned long)sk; 48 sk->sk_timer.function = nr_heartbeat_expiry;
49 sk->sk_timer.function = &nr_heartbeat_expiry;
50} 49}
51 50
52void nr_start_t1timer(struct sock *sk) 51void nr_start_t1timer(struct sock *sk)
@@ -113,9 +112,9 @@ int nr_t1timer_running(struct sock *sk)
113 return timer_pending(&nr_sk(sk)->t1timer); 112 return timer_pending(&nr_sk(sk)->t1timer);
114} 113}
115 114
116static void nr_heartbeat_expiry(unsigned long param) 115static void nr_heartbeat_expiry(struct timer_list *t)
117{ 116{
118 struct sock *sk = (struct sock *)param; 117 struct sock *sk = from_timer(sk, t, sk_timer);
119 struct nr_sock *nr = nr_sk(sk); 118 struct nr_sock *nr = nr_sk(sk);
120 119
121 bh_lock_sock(sk); 120 bh_lock_sock(sk);
@@ -152,10 +151,10 @@ static void nr_heartbeat_expiry(unsigned long param)
152 bh_unlock_sock(sk); 151 bh_unlock_sock(sk);
153} 152}
154 153
155static void nr_t2timer_expiry(unsigned long param) 154static void nr_t2timer_expiry(struct timer_list *t)
156{ 155{
157 struct sock *sk = (struct sock *)param; 156 struct nr_sock *nr = from_timer(nr, t, t2timer);
158 struct nr_sock *nr = nr_sk(sk); 157 struct sock *sk = &nr->sock;
159 158
160 bh_lock_sock(sk); 159 bh_lock_sock(sk);
161 if (nr->condition & NR_COND_ACK_PENDING) { 160 if (nr->condition & NR_COND_ACK_PENDING) {
@@ -165,19 +164,20 @@ static void nr_t2timer_expiry(unsigned long param)
165 bh_unlock_sock(sk); 164 bh_unlock_sock(sk);
166} 165}
167 166
168static void nr_t4timer_expiry(unsigned long param) 167static void nr_t4timer_expiry(struct timer_list *t)
169{ 168{
170 struct sock *sk = (struct sock *)param; 169 struct nr_sock *nr = from_timer(nr, t, t4timer);
170 struct sock *sk = &nr->sock;
171 171
172 bh_lock_sock(sk); 172 bh_lock_sock(sk);
173 nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY; 173 nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY;
174 bh_unlock_sock(sk); 174 bh_unlock_sock(sk);
175} 175}
176 176
177static void nr_idletimer_expiry(unsigned long param) 177static void nr_idletimer_expiry(struct timer_list *t)
178{ 178{
179 struct sock *sk = (struct sock *)param; 179 struct nr_sock *nr = from_timer(nr, t, idletimer);
180 struct nr_sock *nr = nr_sk(sk); 180 struct sock *sk = &nr->sock;
181 181
182 bh_lock_sock(sk); 182 bh_lock_sock(sk);
183 183
@@ -202,10 +202,10 @@ static void nr_idletimer_expiry(unsigned long param)
202 bh_unlock_sock(sk); 202 bh_unlock_sock(sk);
203} 203}
204 204
205static void nr_t1timer_expiry(unsigned long param) 205static void nr_t1timer_expiry(struct timer_list *t)
206{ 206{
207 struct sock *sk = (struct sock *)param; 207 struct nr_sock *nr = from_timer(nr, t, t1timer);
208 struct nr_sock *nr = nr_sk(sk); 208 struct sock *sk = &nr->sock;
209 209
210 bh_lock_sock(sk); 210 bh_lock_sock(sk);
211 switch (nr->state) { 211 switch (nr->state) {
diff --git a/net/nfc/Makefile b/net/nfc/Makefile
index 2555ff8e7219..2ffc69b473fc 100644
--- a/net/nfc/Makefile
+++ b/net/nfc/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the Linux NFC subsystem. 3# Makefile for the Linux NFC subsystem.
3# 4#
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 5cf33df888c3..947a470f929d 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -1015,9 +1015,9 @@ exit:
1015 device_unlock(&dev->dev); 1015 device_unlock(&dev->dev);
1016} 1016}
1017 1017
1018static void nfc_check_pres_timeout(unsigned long data) 1018static void nfc_check_pres_timeout(struct timer_list *t)
1019{ 1019{
1020 struct nfc_dev *dev = (struct nfc_dev *)data; 1020 struct nfc_dev *dev = from_timer(dev, t, check_pres_timer);
1021 1021
1022 schedule_work(&dev->check_pres_work); 1022 schedule_work(&dev->check_pres_work);
1023} 1023}
@@ -1094,10 +1094,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
1094 dev->targets_generation = 1; 1094 dev->targets_generation = 1;
1095 1095
1096 if (ops->check_presence) { 1096 if (ops->check_presence) {
1097 init_timer(&dev->check_pres_timer); 1097 timer_setup(&dev->check_pres_timer, nfc_check_pres_timeout, 0);
1098 dev->check_pres_timer.data = (unsigned long)dev;
1099 dev->check_pres_timer.function = nfc_check_pres_timeout;
1100
1101 INIT_WORK(&dev->check_pres_work, nfc_check_pres_work); 1098 INIT_WORK(&dev->check_pres_work, nfc_check_pres_work);
1102 } 1099 }
1103 1100
@@ -1106,7 +1103,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
1106err_free_dev: 1103err_free_dev:
1107 kfree(dev); 1104 kfree(dev);
1108 1105
1109 return ERR_PTR(rc); 1106 return NULL;
1110} 1107}
1111EXPORT_SYMBOL(nfc_allocate_device); 1108EXPORT_SYMBOL(nfc_allocate_device);
1112 1109
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index de6dd37d04c7..ec0a8998e52d 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -650,6 +650,7 @@ static void digital_deactivate_target(struct nfc_dev *nfc_dev,
650 return; 650 return;
651 } 651 }
652 652
653 digital_abort_cmd(ddev);
653 ddev->curr_protocol = 0; 654 ddev->curr_protocol = 0;
654} 655}
655 656
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index b740fef0acc5..ac8030c4bcf8 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -428,9 +428,9 @@ exit_noskb:
428 nfc_hci_driver_failure(hdev, r); 428 nfc_hci_driver_failure(hdev, r);
429} 429}
430 430
431static void nfc_hci_cmd_timeout(unsigned long data) 431static void nfc_hci_cmd_timeout(struct timer_list *t)
432{ 432{
433 struct nfc_hci_dev *hdev = (struct nfc_hci_dev *)data; 433 struct nfc_hci_dev *hdev = from_timer(hdev, t, cmd_timer);
434 434
435 schedule_work(&hdev->msg_tx_work); 435 schedule_work(&hdev->msg_tx_work);
436} 436}
@@ -1004,9 +1004,7 @@ int nfc_hci_register_device(struct nfc_hci_dev *hdev)
1004 1004
1005 INIT_WORK(&hdev->msg_tx_work, nfc_hci_msg_tx_work); 1005 INIT_WORK(&hdev->msg_tx_work, nfc_hci_msg_tx_work);
1006 1006
1007 init_timer(&hdev->cmd_timer); 1007 timer_setup(&hdev->cmd_timer, nfc_hci_cmd_timeout, 0);
1008 hdev->cmd_timer.data = (unsigned long)hdev;
1009 hdev->cmd_timer.function = nfc_hci_cmd_timeout;
1010 1008
1011 skb_queue_head_init(&hdev->rx_hcp_frags); 1009 skb_queue_head_init(&hdev->rx_hcp_frags);
1012 1010
diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index 17e59a009ce6..fe988936ad92 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -580,27 +580,27 @@ static void llc_shdlc_handle_send_queue(struct llc_shdlc *shdlc)
580 } 580 }
581} 581}
582 582
583static void llc_shdlc_connect_timeout(unsigned long data) 583static void llc_shdlc_connect_timeout(struct timer_list *t)
584{ 584{
585 struct llc_shdlc *shdlc = (struct llc_shdlc *)data; 585 struct llc_shdlc *shdlc = from_timer(shdlc, t, connect_timer);
586 586
587 pr_debug("\n"); 587 pr_debug("\n");
588 588
589 schedule_work(&shdlc->sm_work); 589 schedule_work(&shdlc->sm_work);
590} 590}
591 591
592static void llc_shdlc_t1_timeout(unsigned long data) 592static void llc_shdlc_t1_timeout(struct timer_list *t)
593{ 593{
594 struct llc_shdlc *shdlc = (struct llc_shdlc *)data; 594 struct llc_shdlc *shdlc = from_timer(shdlc, t, t1_timer);
595 595
596 pr_debug("SoftIRQ: need to send ack\n"); 596 pr_debug("SoftIRQ: need to send ack\n");
597 597
598 schedule_work(&shdlc->sm_work); 598 schedule_work(&shdlc->sm_work);
599} 599}
600 600
601static void llc_shdlc_t2_timeout(unsigned long data) 601static void llc_shdlc_t2_timeout(struct timer_list *t)
602{ 602{
603 struct llc_shdlc *shdlc = (struct llc_shdlc *)data; 603 struct llc_shdlc *shdlc = from_timer(shdlc, t, t2_timer);
604 604
605 pr_debug("SoftIRQ: need to retransmit\n"); 605 pr_debug("SoftIRQ: need to retransmit\n");
606 606
@@ -763,17 +763,9 @@ static void *llc_shdlc_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
763 mutex_init(&shdlc->state_mutex); 763 mutex_init(&shdlc->state_mutex);
764 shdlc->state = SHDLC_DISCONNECTED; 764 shdlc->state = SHDLC_DISCONNECTED;
765 765
766 init_timer(&shdlc->connect_timer); 766 timer_setup(&shdlc->connect_timer, llc_shdlc_connect_timeout, 0);
767 shdlc->connect_timer.data = (unsigned long)shdlc; 767 timer_setup(&shdlc->t1_timer, llc_shdlc_t1_timeout, 0);
768 shdlc->connect_timer.function = llc_shdlc_connect_timeout; 768 timer_setup(&shdlc->t2_timer, llc_shdlc_t2_timeout, 0);
769
770 init_timer(&shdlc->t1_timer);
771 shdlc->t1_timer.data = (unsigned long)shdlc;
772 shdlc->t1_timer.function = llc_shdlc_t1_timeout;
773
774 init_timer(&shdlc->t2_timer);
775 shdlc->t2_timer.data = (unsigned long)shdlc;
776 shdlc->t2_timer.function = llc_shdlc_t2_timeout;
777 769
778 shdlc->w = SHDLC_MAX_WINDOW; 770 shdlc->w = SHDLC_MAX_WINDOW;
779 shdlc->srej_support = SHDLC_SREJ_SUPPORT; 771 shdlc->srej_support = SHDLC_SREJ_SUPPORT;
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index 02eef5cf3cce..ef4026a23e80 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -242,9 +242,9 @@ static void nfc_llcp_timeout_work(struct work_struct *work)
242 nfc_dep_link_down(local->dev); 242 nfc_dep_link_down(local->dev);
243} 243}
244 244
245static void nfc_llcp_symm_timer(unsigned long data) 245static void nfc_llcp_symm_timer(struct timer_list *t)
246{ 246{
247 struct nfc_llcp_local *local = (struct nfc_llcp_local *) data; 247 struct nfc_llcp_local *local = from_timer(local, t, link_timer);
248 248
249 pr_err("SYMM timeout\n"); 249 pr_err("SYMM timeout\n");
250 250
@@ -285,9 +285,9 @@ static void nfc_llcp_sdreq_timeout_work(struct work_struct *work)
285 nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list); 285 nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list);
286} 286}
287 287
288static void nfc_llcp_sdreq_timer(unsigned long data) 288static void nfc_llcp_sdreq_timer(struct timer_list *t)
289{ 289{
290 struct nfc_llcp_local *local = (struct nfc_llcp_local *) data; 290 struct nfc_llcp_local *local = from_timer(local, t, sdreq_timer);
291 291
292 schedule_work(&local->sdreq_timeout_work); 292 schedule_work(&local->sdreq_timeout_work);
293} 293}
@@ -1573,9 +1573,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
1573 INIT_LIST_HEAD(&local->list); 1573 INIT_LIST_HEAD(&local->list);
1574 kref_init(&local->ref); 1574 kref_init(&local->ref);
1575 mutex_init(&local->sdp_lock); 1575 mutex_init(&local->sdp_lock);
1576 init_timer(&local->link_timer); 1576 timer_setup(&local->link_timer, nfc_llcp_symm_timer, 0);
1577 local->link_timer.data = (unsigned long) local;
1578 local->link_timer.function = nfc_llcp_symm_timer;
1579 1577
1580 skb_queue_head_init(&local->tx_queue); 1578 skb_queue_head_init(&local->tx_queue);
1581 INIT_WORK(&local->tx_work, nfc_llcp_tx_work); 1579 INIT_WORK(&local->tx_work, nfc_llcp_tx_work);
@@ -1601,9 +1599,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
1601 1599
1602 mutex_init(&local->sdreq_lock); 1600 mutex_init(&local->sdreq_lock);
1603 INIT_HLIST_HEAD(&local->pending_sdreqs); 1601 INIT_HLIST_HEAD(&local->pending_sdreqs);
1604 init_timer(&local->sdreq_timer); 1602 timer_setup(&local->sdreq_timer, nfc_llcp_sdreq_timer, 0);
1605 local->sdreq_timer.data = (unsigned long) local;
1606 local->sdreq_timer.function = nfc_llcp_sdreq_timer;
1607 INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work); 1603 INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work);
1608 1604
1609 list_add(&local->list, &llcp_devices); 1605 list_add(&local->list, &llcp_devices);
diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile
index 0ca31d9bf741..c3362c499281 100644
--- a/net/nfc/nci/Makefile
+++ b/net/nfc/nci/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the Linux NFC NCI layer. 3# Makefile for the Linux NFC NCI layer.
3# 4#
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index c25e9b4179c3..074960154993 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -591,18 +591,18 @@ static int nci_close_device(struct nci_dev *ndev)
591} 591}
592 592
593/* NCI command timer function */ 593/* NCI command timer function */
594static void nci_cmd_timer(unsigned long arg) 594static void nci_cmd_timer(struct timer_list *t)
595{ 595{
596 struct nci_dev *ndev = (void *) arg; 596 struct nci_dev *ndev = from_timer(ndev, t, cmd_timer);
597 597
598 atomic_set(&ndev->cmd_cnt, 1); 598 atomic_set(&ndev->cmd_cnt, 1);
599 queue_work(ndev->cmd_wq, &ndev->cmd_work); 599 queue_work(ndev->cmd_wq, &ndev->cmd_work);
600} 600}
601 601
602/* NCI data exchange timer function */ 602/* NCI data exchange timer function */
603static void nci_data_timer(unsigned long arg) 603static void nci_data_timer(struct timer_list *t)
604{ 604{
605 struct nci_dev *ndev = (void *) arg; 605 struct nci_dev *ndev = from_timer(ndev, t, data_timer);
606 606
607 set_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags); 607 set_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags);
608 queue_work(ndev->rx_wq, &ndev->rx_work); 608 queue_work(ndev->rx_wq, &ndev->rx_work);
@@ -1232,10 +1232,8 @@ int nci_register_device(struct nci_dev *ndev)
1232 skb_queue_head_init(&ndev->rx_q); 1232 skb_queue_head_init(&ndev->rx_q);
1233 skb_queue_head_init(&ndev->tx_q); 1233 skb_queue_head_init(&ndev->tx_q);
1234 1234
1235 setup_timer(&ndev->cmd_timer, nci_cmd_timer, 1235 timer_setup(&ndev->cmd_timer, nci_cmd_timer, 0);
1236 (unsigned long) ndev); 1236 timer_setup(&ndev->data_timer, nci_data_timer, 0);
1237 setup_timer(&ndev->data_timer, nci_data_timer,
1238 (unsigned long) ndev);
1239 1237
1240 mutex_init(&ndev->req_lock); 1238 mutex_init(&ndev->req_lock);
1241 INIT_LIST_HEAD(&ndev->conn_info_list); 1239 INIT_LIST_HEAD(&ndev->conn_info_list);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index b251fb936a27..c0b83dc9d993 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -75,7 +75,7 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
75 if (!hdr) 75 if (!hdr)
76 return -EMSGSIZE; 76 return -EMSGSIZE;
77 77
78 genl_dump_check_consistent(cb, hdr, &nfc_genl_family); 78 genl_dump_check_consistent(cb, hdr);
79 79
80 if (nla_put_u32(msg, NFC_ATTR_TARGET_INDEX, target->idx) || 80 if (nla_put_u32(msg, NFC_ATTR_TARGET_INDEX, target->idx) ||
81 nla_put_u32(msg, NFC_ATTR_PROTOCOLS, target->supported_protocols) || 81 nla_put_u32(msg, NFC_ATTR_PROTOCOLS, target->supported_protocols) ||
@@ -603,7 +603,7 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
603 return -EMSGSIZE; 603 return -EMSGSIZE;
604 604
605 if (cb) 605 if (cb)
606 genl_dump_check_consistent(cb, hdr, &nfc_genl_family); 606 genl_dump_check_consistent(cb, hdr);
607 607
608 if (nfc_genl_setup_device_added(dev, msg)) 608 if (nfc_genl_setup_device_added(dev, msg))
609 goto nla_put_failure; 609 goto nla_put_failure;
@@ -928,6 +928,30 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info)
928 return rc; 928 return rc;
929} 929}
930 930
931static int nfc_genl_deactivate_target(struct sk_buff *skb,
932 struct genl_info *info)
933{
934 struct nfc_dev *dev;
935 u32 device_idx, target_idx;
936 int rc;
937
938 if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
939 return -EINVAL;
940
941 device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
942
943 dev = nfc_get_device(device_idx);
944 if (!dev)
945 return -ENODEV;
946
947 target_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]);
948
949 rc = nfc_deactivate_target(dev, target_idx, NFC_TARGET_MODE_SLEEP);
950
951 nfc_put_device(dev);
952 return rc;
953}
954
931static int nfc_genl_dep_link_up(struct sk_buff *skb, struct genl_info *info) 955static int nfc_genl_dep_link_up(struct sk_buff *skb, struct genl_info *info)
932{ 956{
933 struct nfc_dev *dev; 957 struct nfc_dev *dev;
@@ -1332,7 +1356,7 @@ static int nfc_genl_send_se(struct sk_buff *msg, struct nfc_dev *dev,
1332 goto nla_put_failure; 1356 goto nla_put_failure;
1333 1357
1334 if (cb) 1358 if (cb)
1335 genl_dump_check_consistent(cb, hdr, &nfc_genl_family); 1359 genl_dump_check_consistent(cb, hdr);
1336 1360
1337 if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || 1361 if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
1338 nla_put_u32(msg, NFC_ATTR_SE_INDEX, se->idx) || 1362 nla_put_u32(msg, NFC_ATTR_SE_INDEX, se->idx) ||
@@ -1751,6 +1775,11 @@ static const struct genl_ops nfc_genl_ops[] = {
1751 .doit = nfc_genl_vendor_cmd, 1775 .doit = nfc_genl_vendor_cmd,
1752 .policy = nfc_genl_policy, 1776 .policy = nfc_genl_policy,
1753 }, 1777 },
1778 {
1779 .cmd = NFC_CMD_DEACTIVATE_TARGET,
1780 .doit = nfc_genl_deactivate_target,
1781 .policy = nfc_genl_policy,
1782 },
1754}; 1783};
1755 1784
1756static struct genl_family nfc_genl_family __ro_after_init = { 1785static struct genl_family nfc_genl_family __ro_after_init = {
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
index 58fb827439a8..d7da99a0b0b8 100644
--- a/net/nsh/nsh.c
+++ b/net/nsh/nsh.c
@@ -14,6 +14,66 @@
14#include <net/nsh.h> 14#include <net/nsh.h>
15#include <net/tun_proto.h> 15#include <net/tun_proto.h>
16 16
17int nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh)
18{
19 struct nshhdr *nh;
20 size_t length = nsh_hdr_len(pushed_nh);
21 u8 next_proto;
22
23 if (skb->mac_len) {
24 next_proto = TUN_P_ETHERNET;
25 } else {
26 next_proto = tun_p_from_eth_p(skb->protocol);
27 if (!next_proto)
28 return -EAFNOSUPPORT;
29 }
30
31 /* Add the NSH header */
32 if (skb_cow_head(skb, length) < 0)
33 return -ENOMEM;
34
35 skb_push(skb, length);
36 nh = (struct nshhdr *)(skb->data);
37 memcpy(nh, pushed_nh, length);
38 nh->np = next_proto;
39 skb_postpush_rcsum(skb, nh, length);
40
41 skb->protocol = htons(ETH_P_NSH);
42 skb_reset_mac_header(skb);
43 skb_reset_network_header(skb);
44 skb_reset_mac_len(skb);
45
46 return 0;
47}
48EXPORT_SYMBOL_GPL(nsh_push);
49
50int nsh_pop(struct sk_buff *skb)
51{
52 struct nshhdr *nh;
53 size_t length;
54 __be16 inner_proto;
55
56 if (!pskb_may_pull(skb, NSH_BASE_HDR_LEN))
57 return -ENOMEM;
58 nh = (struct nshhdr *)(skb->data);
59 length = nsh_hdr_len(nh);
60 inner_proto = tun_p_to_eth_p(nh->np);
61 if (!pskb_may_pull(skb, length))
62 return -ENOMEM;
63
64 if (!inner_proto)
65 return -EAFNOSUPPORT;
66
67 skb_pull_rcsum(skb, length);
68 skb_reset_mac_header(skb);
69 skb_reset_network_header(skb);
70 skb_reset_mac_len(skb);
71 skb->protocol = inner_proto;
72
73 return 0;
74}
75EXPORT_SYMBOL_GPL(nsh_pop);
76
17static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, 77static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
18 netdev_features_t features) 78 netdev_features_t features)
19{ 79{
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index ce947292ae77..2650205cdaf9 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -14,6 +14,7 @@ config OPENVSWITCH
14 select MPLS 14 select MPLS
15 select NET_MPLS_GSO 15 select NET_MPLS_GSO
16 select DST_CACHE 16 select DST_CACHE
17 select NET_NSH
17 ---help--- 18 ---help---
18 Open vSwitch is a multilayer Ethernet switch targeted at virtualized 19 Open vSwitch is a multilayer Ethernet switch targeted at virtualized
19 environments. In addition to supporting a variety of features 20 environments. In addition to supporting a variety of features
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 60f809085b92..41109c326f3a 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for Open vSwitch. 3# Makefile for Open vSwitch.
3# 4#
@@ -11,6 +12,7 @@ openvswitch-y := \
11 flow.o \ 12 flow.o \
12 flow_netlink.o \ 13 flow_netlink.o \
13 flow_table.o \ 14 flow_table.o \
15 meter.o \
14 vport.o \ 16 vport.o \
15 vport-internal_dev.o \ 17 vport-internal_dev.o \
16 vport-netdev.o 18 vport-netdev.o
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index a54a556fcdb5..30a5df27116e 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -43,6 +43,7 @@
43#include "flow.h" 43#include "flow.h"
44#include "conntrack.h" 44#include "conntrack.h"
45#include "vport.h" 45#include "vport.h"
46#include "flow_netlink.h"
46 47
47struct deferred_action { 48struct deferred_action {
48 struct sk_buff *skb; 49 struct sk_buff *skb;
@@ -380,6 +381,38 @@ static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
380 return 0; 381 return 0;
381} 382}
382 383
384static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key,
385 const struct nshhdr *nh)
386{
387 int err;
388
389 err = nsh_push(skb, nh);
390 if (err)
391 return err;
392
393 /* safe right before invalidate_flow_key */
394 key->mac_proto = MAC_PROTO_NONE;
395 invalidate_flow_key(key);
396 return 0;
397}
398
399static int pop_nsh(struct sk_buff *skb, struct sw_flow_key *key)
400{
401 int err;
402
403 err = nsh_pop(skb);
404 if (err)
405 return err;
406
407 /* safe right before invalidate_flow_key */
408 if (skb->protocol == htons(ETH_P_TEB))
409 key->mac_proto = MAC_PROTO_ETHERNET;
410 else
411 key->mac_proto = MAC_PROTO_NONE;
412 invalidate_flow_key(key);
413 return 0;
414}
415
383static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh, 416static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
384 __be32 addr, __be32 new_addr) 417 __be32 addr, __be32 new_addr)
385{ 418{
@@ -602,6 +635,69 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
602 return 0; 635 return 0;
603} 636}
604 637
638static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
639 const struct nlattr *a)
640{
641 struct nshhdr *nh;
642 size_t length;
643 int err;
644 u8 flags;
645 u8 ttl;
646 int i;
647
648 struct ovs_key_nsh key;
649 struct ovs_key_nsh mask;
650
651 err = nsh_key_from_nlattr(a, &key, &mask);
652 if (err)
653 return err;
654
655 /* Make sure the NSH base header is there */
656 if (!pskb_may_pull(skb, skb_network_offset(skb) + NSH_BASE_HDR_LEN))
657 return -ENOMEM;
658
659 nh = nsh_hdr(skb);
660 length = nsh_hdr_len(nh);
661
662 /* Make sure the whole NSH header is there */
663 err = skb_ensure_writable(skb, skb_network_offset(skb) +
664 length);
665 if (unlikely(err))
666 return err;
667
668 nh = nsh_hdr(skb);
669 skb_postpull_rcsum(skb, nh, length);
670 flags = nsh_get_flags(nh);
671 flags = OVS_MASKED(flags, key.base.flags, mask.base.flags);
672 flow_key->nsh.base.flags = flags;
673 ttl = nsh_get_ttl(nh);
674 ttl = OVS_MASKED(ttl, key.base.ttl, mask.base.ttl);
675 flow_key->nsh.base.ttl = ttl;
676 nsh_set_flags_and_ttl(nh, flags, ttl);
677 nh->path_hdr = OVS_MASKED(nh->path_hdr, key.base.path_hdr,
678 mask.base.path_hdr);
679 flow_key->nsh.base.path_hdr = nh->path_hdr;
680 switch (nh->mdtype) {
681 case NSH_M_TYPE1:
682 for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) {
683 nh->md1.context[i] =
684 OVS_MASKED(nh->md1.context[i], key.context[i],
685 mask.context[i]);
686 }
687 memcpy(flow_key->nsh.context, nh->md1.context,
688 sizeof(nh->md1.context));
689 break;
690 case NSH_M_TYPE2:
691 memset(flow_key->nsh.context, 0,
692 sizeof(flow_key->nsh.context));
693 break;
694 default:
695 return -EINVAL;
696 }
697 skb_postpush_rcsum(skb, nh, length);
698 return 0;
699}
700
605/* Must follow skb_ensure_writable() since that can move the skb data. */ 701/* Must follow skb_ensure_writable() since that can move the skb data. */
606static void set_tp_port(struct sk_buff *skb, __be16 *port, 702static void set_tp_port(struct sk_buff *skb, __be16 *port,
607 __be16 new_port, __sum16 *check) 703 __be16 new_port, __sum16 *check)
@@ -1024,6 +1120,10 @@ static int execute_masked_set_action(struct sk_buff *skb,
1024 get_mask(a, struct ovs_key_ethernet *)); 1120 get_mask(a, struct ovs_key_ethernet *));
1025 break; 1121 break;
1026 1122
1123 case OVS_KEY_ATTR_NSH:
1124 err = set_nsh(skb, flow_key, a);
1125 break;
1126
1027 case OVS_KEY_ATTR_IPV4: 1127 case OVS_KEY_ATTR_IPV4:
1028 err = set_ipv4(skb, flow_key, nla_data(a), 1128 err = set_ipv4(skb, flow_key, nla_data(a),
1029 get_mask(a, struct ovs_key_ipv4 *)); 1129 get_mask(a, struct ovs_key_ipv4 *));
@@ -1203,6 +1303,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
1203 return err == -EINPROGRESS ? 0 : err; 1303 return err == -EINPROGRESS ? 0 : err;
1204 break; 1304 break;
1205 1305
1306 case OVS_ACTION_ATTR_CT_CLEAR:
1307 err = ovs_ct_clear(skb, key);
1308 break;
1309
1206 case OVS_ACTION_ATTR_PUSH_ETH: 1310 case OVS_ACTION_ATTR_PUSH_ETH:
1207 err = push_eth(skb, key, nla_data(a)); 1311 err = push_eth(skb, key, nla_data(a));
1208 break; 1312 break;
@@ -1210,6 +1314,28 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
1210 case OVS_ACTION_ATTR_POP_ETH: 1314 case OVS_ACTION_ATTR_POP_ETH:
1211 err = pop_eth(skb, key); 1315 err = pop_eth(skb, key);
1212 break; 1316 break;
1317
1318 case OVS_ACTION_ATTR_PUSH_NSH: {
1319 u8 buffer[NSH_HDR_MAX_LEN];
1320 struct nshhdr *nh = (struct nshhdr *)buffer;
1321
1322 err = nsh_hdr_from_nlattr(nla_data(a), nh,
1323 NSH_HDR_MAX_LEN);
1324 if (unlikely(err))
1325 break;
1326 err = push_nsh(skb, key, nh);
1327 break;
1328 }
1329
1330 case OVS_ACTION_ATTR_POP_NSH:
1331 err = pop_nsh(skb, key);
1332 break;
1333
1334 case OVS_ACTION_ATTR_METER:
1335 if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) {
1336 consume_skb(skb);
1337 return 0;
1338 }
1213 } 1339 }
1214 1340
1215 if (unlikely(err)) { 1341 if (unlikely(err)) {
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index d558e882ca0c..b27c5c6d9cab 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -752,6 +752,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
752 } 752 }
753 } 753 }
754 /* Non-ICMP, fall thru to initialize if needed. */ 754 /* Non-ICMP, fall thru to initialize if needed. */
755 /* fall through */
755 case IP_CT_NEW: 756 case IP_CT_NEW:
756 /* Seen it before? This can happen for loopback, retrans, 757 /* Seen it before? This can happen for loopback, retrans,
757 * or local packets. 758 * or local packets.
@@ -1129,6 +1130,17 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
1129 return err; 1130 return err;
1130} 1131}
1131 1132
1133int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
1134{
1135 if (skb_nfct(skb)) {
1136 nf_conntrack_put(skb_nfct(skb));
1137 nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
1138 ovs_ct_fill_key(skb, key);
1139 }
1140
1141 return 0;
1142}
1143
1132static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, 1144static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
1133 const struct sw_flow_key *key, bool log) 1145 const struct sw_flow_key *key, bool log)
1134{ 1146{
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
index bc7efd1867ab..399dfdd2c4f9 100644
--- a/net/openvswitch/conntrack.h
+++ b/net/openvswitch/conntrack.h
@@ -30,6 +30,7 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *, struct sk_buff *);
30 30
31int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *, 31int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,
32 const struct ovs_conntrack_info *); 32 const struct ovs_conntrack_info *);
33int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key);
33 34
34void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key); 35void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
35int ovs_ct_put_key(const struct sw_flow_key *swkey, 36int ovs_ct_put_key(const struct sw_flow_key *swkey,
@@ -73,6 +74,12 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb,
73 return -ENOTSUPP; 74 return -ENOTSUPP;
74} 75}
75 76
77static inline int ovs_ct_clear(struct sk_buff *skb,
78 struct sw_flow_key *key)
79{
80 return -ENOTSUPP;
81}
82
76static inline void ovs_ct_fill_key(const struct sk_buff *skb, 83static inline void ovs_ct_fill_key(const struct sk_buff *skb,
77 struct sw_flow_key *key) 84 struct sw_flow_key *key)
78{ 85{
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index c3aec6227c91..99cfafc2a139 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -55,6 +55,7 @@
55#include "flow.h" 55#include "flow.h"
56#include "flow_table.h" 56#include "flow_table.h"
57#include "flow_netlink.h" 57#include "flow_netlink.h"
58#include "meter.h"
58#include "vport-internal_dev.h" 59#include "vport-internal_dev.h"
59#include "vport-netdev.h" 60#include "vport-netdev.h"
60 61
@@ -142,35 +143,6 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
142 const struct dp_upcall_info *, 143 const struct dp_upcall_info *,
143 uint32_t cutlen); 144 uint32_t cutlen);
144 145
145/* Must be called with rcu_read_lock. */
146static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
147{
148 struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
149
150 if (dev) {
151 struct vport *vport = ovs_internal_dev_get_vport(dev);
152 if (vport)
153 return vport->dp;
154 }
155
156 return NULL;
157}
158
159/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
160 * returned dp pointer valid.
161 */
162static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
163{
164 struct datapath *dp;
165
166 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
167 rcu_read_lock();
168 dp = get_dp_rcu(net, dp_ifindex);
169 rcu_read_unlock();
170
171 return dp;
172}
173
174/* Must be called with rcu_read_lock or ovs_mutex. */ 146/* Must be called with rcu_read_lock or ovs_mutex. */
175const char *ovs_dp_name(const struct datapath *dp) 147const char *ovs_dp_name(const struct datapath *dp)
176{ 148{
@@ -203,6 +175,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
203 ovs_flow_tbl_destroy(&dp->table); 175 ovs_flow_tbl_destroy(&dp->table);
204 free_percpu(dp->stats_percpu); 176 free_percpu(dp->stats_percpu);
205 kfree(dp->ports); 177 kfree(dp->ports);
178 ovs_meters_exit(dp);
206 kfree(dp); 179 kfree(dp);
207} 180}
208 181
@@ -335,6 +308,8 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
335 const struct dp_upcall_info *upcall_info, 308 const struct dp_upcall_info *upcall_info,
336 uint32_t cutlen) 309 uint32_t cutlen)
337{ 310{
311 unsigned short gso_type = skb_shinfo(skb)->gso_type;
312 struct sw_flow_key later_key;
338 struct sk_buff *segs, *nskb; 313 struct sk_buff *segs, *nskb;
339 int err; 314 int err;
340 315
@@ -345,9 +320,21 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
345 if (segs == NULL) 320 if (segs == NULL)
346 return -EINVAL; 321 return -EINVAL;
347 322
323 if (gso_type & SKB_GSO_UDP) {
324 /* The initial flow key extracted by ovs_flow_key_extract()
325 * in this case is for a first fragment, so we need to
326 * properly mark later fragments.
327 */
328 later_key = *key;
329 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
330 }
331
348 /* Queue all of the segments. */ 332 /* Queue all of the segments. */
349 skb = segs; 333 skb = segs;
350 do { 334 do {
335 if (gso_type & SKB_GSO_UDP && skb != segs)
336 key = &later_key;
337
351 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); 338 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
352 if (err) 339 if (err)
353 break; 340 break;
@@ -1601,6 +1588,10 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1601 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) 1588 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1602 INIT_HLIST_HEAD(&dp->ports[i]); 1589 INIT_HLIST_HEAD(&dp->ports[i]);
1603 1590
1591 err = ovs_meters_init(dp);
1592 if (err)
1593 goto err_destroy_ports_array;
1594
1604 /* Set up our datapath device. */ 1595 /* Set up our datapath device. */
1605 parms.name = nla_data(a[OVS_DP_ATTR_NAME]); 1596 parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1606 parms.type = OVS_VPORT_TYPE_INTERNAL; 1597 parms.type = OVS_VPORT_TYPE_INTERNAL;
@@ -1629,7 +1620,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1629 ovs_dp_reset_user_features(skb, info); 1620 ovs_dp_reset_user_features(skb, info);
1630 } 1621 }
1631 1622
1632 goto err_destroy_ports_array; 1623 goto err_destroy_meters;
1633 } 1624 }
1634 1625
1635 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1626 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
@@ -1644,8 +1635,10 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1644 ovs_notify(&dp_datapath_genl_family, reply, info); 1635 ovs_notify(&dp_datapath_genl_family, reply, info);
1645 return 0; 1636 return 0;
1646 1637
1647err_destroy_ports_array: 1638err_destroy_meters:
1648 ovs_unlock(); 1639 ovs_unlock();
1640 ovs_meters_exit(dp);
1641err_destroy_ports_array:
1649 kfree(dp->ports); 1642 kfree(dp->ports);
1650err_destroy_percpu: 1643err_destroy_percpu:
1651 free_percpu(dp->stats_percpu); 1644 free_percpu(dp->stats_percpu);
@@ -1848,7 +1841,8 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = {
1848 1841
1849/* Called with ovs_mutex or RCU read lock. */ 1842/* Called with ovs_mutex or RCU read lock. */
1850static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, 1843static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1851 u32 portid, u32 seq, u32 flags, u8 cmd) 1844 struct net *net, u32 portid, u32 seq,
1845 u32 flags, u8 cmd)
1852{ 1846{
1853 struct ovs_header *ovs_header; 1847 struct ovs_header *ovs_header;
1854 struct ovs_vport_stats vport_stats; 1848 struct ovs_vport_stats vport_stats;
@@ -1864,9 +1858,17 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1864 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || 1858 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1865 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || 1859 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1866 nla_put_string(skb, OVS_VPORT_ATTR_NAME, 1860 nla_put_string(skb, OVS_VPORT_ATTR_NAME,
1867 ovs_vport_name(vport))) 1861 ovs_vport_name(vport)) ||
1862 nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
1868 goto nla_put_failure; 1863 goto nla_put_failure;
1869 1864
1865 if (!net_eq(net, dev_net(vport->dev))) {
1866 int id = peernet2id_alloc(net, dev_net(vport->dev));
1867
1868 if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
1869 goto nla_put_failure;
1870 }
1871
1870 ovs_vport_get_stats(vport, &vport_stats); 1872 ovs_vport_get_stats(vport, &vport_stats);
1871 if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS, 1873 if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
1872 sizeof(struct ovs_vport_stats), &vport_stats, 1874 sizeof(struct ovs_vport_stats), &vport_stats,
@@ -1896,8 +1898,8 @@ static struct sk_buff *ovs_vport_cmd_alloc_info(void)
1896} 1898}
1897 1899
1898/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ 1900/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
1899struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, 1901struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
1900 u32 seq, u8 cmd) 1902 u32 portid, u32 seq, u8 cmd)
1901{ 1903{
1902 struct sk_buff *skb; 1904 struct sk_buff *skb;
1903 int retval; 1905 int retval;
@@ -1906,7 +1908,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1906 if (!skb) 1908 if (!skb)
1907 return ERR_PTR(-ENOMEM); 1909 return ERR_PTR(-ENOMEM);
1908 1910
1909 retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); 1911 retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd);
1910 BUG_ON(retval < 0); 1912 BUG_ON(retval < 0);
1911 1913
1912 return skb; 1914 return skb;
@@ -1920,6 +1922,8 @@ static struct vport *lookup_vport(struct net *net,
1920 struct datapath *dp; 1922 struct datapath *dp;
1921 struct vport *vport; 1923 struct vport *vport;
1922 1924
1925 if (a[OVS_VPORT_ATTR_IFINDEX])
1926 return ERR_PTR(-EOPNOTSUPP);
1923 if (a[OVS_VPORT_ATTR_NAME]) { 1927 if (a[OVS_VPORT_ATTR_NAME]) {
1924 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME])); 1928 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1925 if (!vport) 1929 if (!vport)
@@ -1944,6 +1948,7 @@ static struct vport *lookup_vport(struct net *net,
1944 return vport; 1948 return vport;
1945 } else 1949 } else
1946 return ERR_PTR(-EINVAL); 1950 return ERR_PTR(-EINVAL);
1951
1947} 1952}
1948 1953
1949/* Called with ovs_mutex */ 1954/* Called with ovs_mutex */
@@ -1983,6 +1988,8 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1983 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || 1988 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1984 !a[OVS_VPORT_ATTR_UPCALL_PID]) 1989 !a[OVS_VPORT_ATTR_UPCALL_PID])
1985 return -EINVAL; 1990 return -EINVAL;
1991 if (a[OVS_VPORT_ATTR_IFINDEX])
1992 return -EOPNOTSUPP;
1986 1993
1987 port_no = a[OVS_VPORT_ATTR_PORT_NO] 1994 port_no = a[OVS_VPORT_ATTR_PORT_NO]
1988 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0; 1995 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
@@ -2032,8 +2039,9 @@ restart:
2032 goto exit_unlock_free; 2039 goto exit_unlock_free;
2033 } 2040 }
2034 2041
2035 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 2042 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2036 info->snd_seq, 0, OVS_VPORT_CMD_NEW); 2043 info->snd_portid, info->snd_seq, 0,
2044 OVS_VPORT_CMD_NEW);
2037 2045
2038 if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom) 2046 if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
2039 update_headroom(dp); 2047 update_headroom(dp);
@@ -2090,8 +2098,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
2090 goto exit_unlock_free; 2098 goto exit_unlock_free;
2091 } 2099 }
2092 2100
2093 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 2101 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2094 info->snd_seq, 0, OVS_VPORT_CMD_NEW); 2102 info->snd_portid, info->snd_seq, 0,
2103 OVS_VPORT_CMD_NEW);
2095 BUG_ON(err < 0); 2104 BUG_ON(err < 0);
2096 2105
2097 ovs_unlock(); 2106 ovs_unlock();
@@ -2128,8 +2137,9 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2128 goto exit_unlock_free; 2137 goto exit_unlock_free;
2129 } 2138 }
2130 2139
2131 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 2140 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2132 info->snd_seq, 0, OVS_VPORT_CMD_DEL); 2141 info->snd_portid, info->snd_seq, 0,
2142 OVS_VPORT_CMD_DEL);
2133 BUG_ON(err < 0); 2143 BUG_ON(err < 0);
2134 2144
2135 /* the vport deletion may trigger dp headroom update */ 2145 /* the vport deletion may trigger dp headroom update */
@@ -2169,8 +2179,9 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2169 err = PTR_ERR(vport); 2179 err = PTR_ERR(vport);
2170 if (IS_ERR(vport)) 2180 if (IS_ERR(vport))
2171 goto exit_unlock_free; 2181 goto exit_unlock_free;
2172 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 2182 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2173 info->snd_seq, 0, OVS_VPORT_CMD_NEW); 2183 info->snd_portid, info->snd_seq, 0,
2184 OVS_VPORT_CMD_NEW);
2174 BUG_ON(err < 0); 2185 BUG_ON(err < 0);
2175 rcu_read_unlock(); 2186 rcu_read_unlock();
2176 2187
@@ -2202,6 +2213,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2202 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { 2213 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2203 if (j >= skip && 2214 if (j >= skip &&
2204 ovs_vport_cmd_fill_info(vport, skb, 2215 ovs_vport_cmd_fill_info(vport, skb,
2216 sock_net(skb->sk),
2205 NETLINK_CB(cb->skb).portid, 2217 NETLINK_CB(cb->skb).portid,
2206 cb->nlh->nlmsg_seq, 2218 cb->nlh->nlmsg_seq,
2207 NLM_F_MULTI, 2219 NLM_F_MULTI,
@@ -2228,6 +2240,8 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2228 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, 2240 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2229 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, 2241 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
2230 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, 2242 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2243 [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
2244 [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
2231}; 2245};
2232 2246
2233static const struct genl_ops dp_vport_genl_ops[] = { 2247static const struct genl_ops dp_vport_genl_ops[] = {
@@ -2273,6 +2287,7 @@ static struct genl_family * const dp_genl_families[] = {
2273 &dp_vport_genl_family, 2287 &dp_vport_genl_family,
2274 &dp_flow_genl_family, 2288 &dp_flow_genl_family,
2275 &dp_packet_genl_family, 2289 &dp_packet_genl_family,
2290 &dp_meter_genl_family,
2276}; 2291};
2277 2292
2278static void dp_unregister_genl(int n_families) 2293static void dp_unregister_genl(int n_families)
@@ -2453,3 +2468,4 @@ MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
2453MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY); 2468MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
2454MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY); 2469MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
2455MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY); 2470MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
2471MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 480600649d0b..523d65526766 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -30,6 +30,8 @@
30#include "conntrack.h" 30#include "conntrack.h"
31#include "flow.h" 31#include "flow.h"
32#include "flow_table.h" 32#include "flow_table.h"
33#include "meter.h"
34#include "vport-internal_dev.h"
33 35
34#define DP_MAX_PORTS USHRT_MAX 36#define DP_MAX_PORTS USHRT_MAX
35#define DP_VPORT_HASH_BUCKETS 1024 37#define DP_VPORT_HASH_BUCKETS 1024
@@ -91,6 +93,9 @@ struct datapath {
91 u32 user_features; 93 u32 user_features;
92 94
93 u32 max_headroom; 95 u32 max_headroom;
96
97 /* Switch meters. */
98 struct hlist_head *meters;
94}; 99};
95 100
96/** 101/**
@@ -190,6 +195,36 @@ static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_n
190 return ovs_lookup_vport(dp, port_no); 195 return ovs_lookup_vport(dp, port_no);
191} 196}
192 197
198/* Must be called with rcu_read_lock. */
199static inline struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
200{
201 struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
202
203 if (dev) {
204 struct vport *vport = ovs_internal_dev_get_vport(dev);
205
206 if (vport)
207 return vport->dp;
208 }
209
210 return NULL;
211}
212
213/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
214 * returned dp pointer valid.
215 */
216static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
217{
218 struct datapath *dp;
219
220 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
221 rcu_read_lock();
222 dp = get_dp_rcu(net, dp_ifindex);
223 rcu_read_unlock();
224
225 return dp;
226}
227
193extern struct notifier_block ovs_dp_device_notifier; 228extern struct notifier_block ovs_dp_device_notifier;
194extern struct genl_family dp_vport_genl_family; 229extern struct genl_family dp_vport_genl_family;
195 230
@@ -200,8 +235,8 @@ int ovs_dp_upcall(struct datapath *, struct sk_buff *,
200 uint32_t cutlen); 235 uint32_t cutlen);
201 236
202const char *ovs_dp_name(const struct datapath *dp); 237const char *ovs_dp_name(const struct datapath *dp);
203struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq, 238struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
204 u8 cmd); 239 u32 portid, u32 seq, u8 cmd);
205 240
206int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, 241int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
207 const struct sw_flow_actions *, struct sw_flow_key *); 242 const struct sw_flow_actions *, struct sw_flow_key *);
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index 653d073bae45..f3ee2f2825c0 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -30,8 +30,8 @@ static void dp_detach_port_notify(struct vport *vport)
30 struct datapath *dp; 30 struct datapath *dp;
31 31
32 dp = vport->dp; 32 dp = vport->dp;
33 notify = ovs_vport_cmd_build_info(vport, 0, 0, 33 notify = ovs_vport_cmd_build_info(vport, ovs_dp_get_net(dp),
34 OVS_VPORT_CMD_DEL); 34 0, 0, OVS_VPORT_CMD_DEL);
35 ovs_dp_detach_port(vport); 35 ovs_dp_detach_port(vport);
36 if (IS_ERR(notify)) { 36 if (IS_ERR(notify)) {
37 genl_set_err(&dp_vport_genl_family, ovs_dp_get_net(dp), 0, 37 genl_set_err(&dp_vport_genl_family, ovs_dp_get_net(dp), 0,
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 8c94cef25a72..dbe2379329c5 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -46,6 +46,7 @@
46#include <net/ipv6.h> 46#include <net/ipv6.h>
47#include <net/mpls.h> 47#include <net/mpls.h>
48#include <net/ndisc.h> 48#include <net/ndisc.h>
49#include <net/nsh.h>
49 50
50#include "conntrack.h" 51#include "conntrack.h"
51#include "datapath.h" 52#include "datapath.h"
@@ -490,6 +491,52 @@ invalid:
490 return 0; 491 return 0;
491} 492}
492 493
494static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
495{
496 struct nshhdr *nh;
497 unsigned int nh_ofs = skb_network_offset(skb);
498 u8 version, length;
499 int err;
500
501 err = check_header(skb, nh_ofs + NSH_BASE_HDR_LEN);
502 if (unlikely(err))
503 return err;
504
505 nh = nsh_hdr(skb);
506 version = nsh_get_ver(nh);
507 length = nsh_hdr_len(nh);
508
509 if (version != 0)
510 return -EINVAL;
511
512 err = check_header(skb, nh_ofs + length);
513 if (unlikely(err))
514 return err;
515
516 nh = nsh_hdr(skb);
517 key->nsh.base.flags = nsh_get_flags(nh);
518 key->nsh.base.ttl = nsh_get_ttl(nh);
519 key->nsh.base.mdtype = nh->mdtype;
520 key->nsh.base.np = nh->np;
521 key->nsh.base.path_hdr = nh->path_hdr;
522 switch (key->nsh.base.mdtype) {
523 case NSH_M_TYPE1:
524 if (length != NSH_M_TYPE1_LEN)
525 return -EINVAL;
526 memcpy(key->nsh.context, nh->md1.context,
527 sizeof(nh->md1));
528 break;
529 case NSH_M_TYPE2:
530 memset(key->nsh.context, 0,
531 sizeof(nh->md1));
532 break;
533 default:
534 return -EINVAL;
535 }
536
537 return 0;
538}
539
493/** 540/**
494 * key_extract - extracts a flow key from an Ethernet frame. 541 * key_extract - extracts a flow key from an Ethernet frame.
495 * @skb: sk_buff that contains the frame, with skb->data pointing to the 542 * @skb: sk_buff that contains the frame, with skb->data pointing to the
@@ -584,7 +631,8 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
584 key->ip.frag = OVS_FRAG_TYPE_LATER; 631 key->ip.frag = OVS_FRAG_TYPE_LATER;
585 return 0; 632 return 0;
586 } 633 }
587 if (nh->frag_off & htons(IP_MF)) 634 if (nh->frag_off & htons(IP_MF) ||
635 skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
588 key->ip.frag = OVS_FRAG_TYPE_FIRST; 636 key->ip.frag = OVS_FRAG_TYPE_FIRST;
589 else 637 else
590 key->ip.frag = OVS_FRAG_TYPE_NONE; 638 key->ip.frag = OVS_FRAG_TYPE_NONE;
@@ -700,6 +748,9 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
700 748
701 if (key->ip.frag == OVS_FRAG_TYPE_LATER) 749 if (key->ip.frag == OVS_FRAG_TYPE_LATER)
702 return 0; 750 return 0;
751 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
752 key->ip.frag = OVS_FRAG_TYPE_FIRST;
753
703 /* Transport layer. */ 754 /* Transport layer. */
704 if (key->ip.proto == NEXTHDR_TCP) { 755 if (key->ip.proto == NEXTHDR_TCP) {
705 if (tcphdr_ok(skb)) { 756 if (tcphdr_ok(skb)) {
@@ -735,6 +786,10 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
735 memset(&key->tp, 0, sizeof(key->tp)); 786 memset(&key->tp, 0, sizeof(key->tp));
736 } 787 }
737 } 788 }
789 } else if (key->eth.type == htons(ETH_P_NSH)) {
790 error = parse_nsh(skb, key);
791 if (error)
792 return error;
738 } 793 }
739 return 0; 794 return 0;
740} 795}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 1875bba4f865..c670dd24b8b7 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -35,6 +35,7 @@
35#include <net/inet_ecn.h> 35#include <net/inet_ecn.h>
36#include <net/ip_tunnels.h> 36#include <net/ip_tunnels.h>
37#include <net/dst_metadata.h> 37#include <net/dst_metadata.h>
38#include <net/nsh.h>
38 39
39struct sk_buff; 40struct sk_buff;
40 41
@@ -66,6 +67,11 @@ struct vlan_head {
66 (offsetof(struct sw_flow_key, recirc_id) + \ 67 (offsetof(struct sw_flow_key, recirc_id) + \
67 FIELD_SIZEOF(struct sw_flow_key, recirc_id)) 68 FIELD_SIZEOF(struct sw_flow_key, recirc_id))
68 69
70struct ovs_key_nsh {
71 struct ovs_nsh_key_base base;
72 __be32 context[NSH_MD1_CONTEXT_SIZE];
73};
74
69struct sw_flow_key { 75struct sw_flow_key {
70 u8 tun_opts[IP_TUNNEL_OPTS_MAX]; 76 u8 tun_opts[IP_TUNNEL_OPTS_MAX];
71 u8 tun_opts_len; 77 u8 tun_opts_len;
@@ -143,6 +149,7 @@ struct sw_flow_key {
143 } nd; 149 } nd;
144 }; 150 };
145 } ipv6; 151 } ipv6;
152 struct ovs_key_nsh nsh; /* network service header */
146 }; 153 };
147 struct { 154 struct {
148 /* Connection tracking fields not packed above. */ 155 /* Connection tracking fields not packed above. */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index e8eb427ce6d1..dc424798ba6f 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -48,6 +48,8 @@
48#include <net/ndisc.h> 48#include <net/ndisc.h>
49#include <net/mpls.h> 49#include <net/mpls.h>
50#include <net/vxlan.h> 50#include <net/vxlan.h>
51#include <net/tun_proto.h>
52#include <net/erspan.h>
51 53
52#include "flow_netlink.h" 54#include "flow_netlink.h"
53 55
@@ -75,16 +77,20 @@ static bool actions_may_change_flow(const struct nlattr *actions)
75 break; 77 break;
76 78
77 case OVS_ACTION_ATTR_CT: 79 case OVS_ACTION_ATTR_CT:
80 case OVS_ACTION_ATTR_CT_CLEAR:
78 case OVS_ACTION_ATTR_HASH: 81 case OVS_ACTION_ATTR_HASH:
79 case OVS_ACTION_ATTR_POP_ETH: 82 case OVS_ACTION_ATTR_POP_ETH:
80 case OVS_ACTION_ATTR_POP_MPLS: 83 case OVS_ACTION_ATTR_POP_MPLS:
84 case OVS_ACTION_ATTR_POP_NSH:
81 case OVS_ACTION_ATTR_POP_VLAN: 85 case OVS_ACTION_ATTR_POP_VLAN:
82 case OVS_ACTION_ATTR_PUSH_ETH: 86 case OVS_ACTION_ATTR_PUSH_ETH:
83 case OVS_ACTION_ATTR_PUSH_MPLS: 87 case OVS_ACTION_ATTR_PUSH_MPLS:
88 case OVS_ACTION_ATTR_PUSH_NSH:
84 case OVS_ACTION_ATTR_PUSH_VLAN: 89 case OVS_ACTION_ATTR_PUSH_VLAN:
85 case OVS_ACTION_ATTR_SAMPLE: 90 case OVS_ACTION_ATTR_SAMPLE:
86 case OVS_ACTION_ATTR_SET: 91 case OVS_ACTION_ATTR_SET:
87 case OVS_ACTION_ATTR_SET_MASKED: 92 case OVS_ACTION_ATTR_SET_MASKED:
93 case OVS_ACTION_ATTR_METER:
88 default: 94 default:
89 return true; 95 return true;
90 } 96 }
@@ -173,7 +179,8 @@ static bool match_validate(const struct sw_flow_match *match,
173 | (1 << OVS_KEY_ATTR_ICMPV6) 179 | (1 << OVS_KEY_ATTR_ICMPV6)
174 | (1 << OVS_KEY_ATTR_ARP) 180 | (1 << OVS_KEY_ATTR_ARP)
175 | (1 << OVS_KEY_ATTR_ND) 181 | (1 << OVS_KEY_ATTR_ND)
176 | (1 << OVS_KEY_ATTR_MPLS)); 182 | (1 << OVS_KEY_ATTR_MPLS)
183 | (1 << OVS_KEY_ATTR_NSH));
177 184
178 /* Always allowed mask fields. */ 185 /* Always allowed mask fields. */
179 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) 186 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
@@ -282,6 +289,14 @@ static bool match_validate(const struct sw_flow_match *match,
282 } 289 }
283 } 290 }
284 291
292 if (match->key->eth.type == htons(ETH_P_NSH)) {
293 key_expected |= 1 << OVS_KEY_ATTR_NSH;
294 if (match->mask &&
295 match->mask->key.eth.type == htons(0xffff)) {
296 mask_allowed |= 1 << OVS_KEY_ATTR_NSH;
297 }
298 }
299
285 if ((key_attrs & key_expected) != key_expected) { 300 if ((key_attrs & key_expected) != key_expected) {
286 /* Key attributes check failed. */ 301 /* Key attributes check failed. */
287 OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)", 302 OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
@@ -319,7 +334,21 @@ size_t ovs_tun_key_attr_size(void)
319 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. 334 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
320 */ 335 */
321 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ 336 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
322 + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ 337 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_DST */
338 + nla_total_size(4); /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */
339}
340
341static size_t ovs_nsh_key_attr_size(void)
342{
343 /* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider
344 * updating this function.
345 */
346 return nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */
347 /* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are
348 * mutually exclusive, so the bigger one can cover
349 * the small one.
350 */
351 + nla_total_size(NSH_CTX_HDRS_MAX_LEN);
323} 352}
324 353
325size_t ovs_key_attr_size(void) 354size_t ovs_key_attr_size(void)
@@ -327,7 +356,7 @@ size_t ovs_key_attr_size(void)
327 /* Whenever adding new OVS_KEY_ FIELDS, we should consider 356 /* Whenever adding new OVS_KEY_ FIELDS, we should consider
328 * updating this function. 357 * updating this function.
329 */ 358 */
330 BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28); 359 BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29);
331 360
332 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ 361 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
333 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ 362 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
@@ -341,6 +370,8 @@ size_t ovs_key_attr_size(void)
341 + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ 370 + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */
342 + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ 371 + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */
343 + nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */ 372 + nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
373 + nla_total_size(0) /* OVS_KEY_ATTR_NSH */
374 + ovs_nsh_key_attr_size()
344 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ 375 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
345 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 376 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
346 + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ 377 + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
@@ -371,6 +402,14 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
371 .next = ovs_vxlan_ext_key_lens }, 402 .next = ovs_vxlan_ext_key_lens },
372 [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, 403 [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
373 [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, 404 [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) },
405 [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = sizeof(u32) },
406};
407
408static const struct ovs_len_tbl
409ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = {
410 [OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) },
411 [OVS_NSH_KEY_ATTR_MD1] = { .len = sizeof(struct ovs_nsh_key_md1) },
412 [OVS_NSH_KEY_ATTR_MD2] = { .len = OVS_ATTR_VARIABLE },
374}; 413};
375 414
376/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 415/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
@@ -405,6 +444,8 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
405 .len = sizeof(struct ovs_key_ct_tuple_ipv4) }, 444 .len = sizeof(struct ovs_key_ct_tuple_ipv4) },
406 [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = { 445 [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
407 .len = sizeof(struct ovs_key_ct_tuple_ipv6) }, 446 .len = sizeof(struct ovs_key_ct_tuple_ipv6) },
447 [OVS_KEY_ATTR_NSH] = { .len = OVS_ATTR_NESTED,
448 .next = ovs_nsh_key_attr_lens, },
408}; 449};
409 450
410static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) 451static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
@@ -593,6 +634,33 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
593 return 0; 634 return 0;
594} 635}
595 636
637static int erspan_tun_opt_from_nlattr(const struct nlattr *attr,
638 struct sw_flow_match *match, bool is_mask,
639 bool log)
640{
641 unsigned long opt_key_offset;
642 struct erspan_metadata opts;
643
644 BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
645
646 memset(&opts, 0, sizeof(opts));
647 opts.index = nla_get_be32(attr);
648
649 /* Index has only 20-bit */
650 if (ntohl(opts.index) & ~INDEX_MASK) {
651 OVS_NLERR(log, "ERSPAN index number %x too large.",
652 ntohl(opts.index));
653 return -EINVAL;
654 }
655
656 SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), is_mask);
657 opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
658 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
659 is_mask);
660
661 return 0;
662}
663
596static int ip_tun_from_nlattr(const struct nlattr *attr, 664static int ip_tun_from_nlattr(const struct nlattr *attr,
597 struct sw_flow_match *match, bool is_mask, 665 struct sw_flow_match *match, bool is_mask,
598 bool log) 666 bool log)
@@ -700,6 +768,19 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
700 break; 768 break;
701 case OVS_TUNNEL_KEY_ATTR_PAD: 769 case OVS_TUNNEL_KEY_ATTR_PAD:
702 break; 770 break;
771 case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
772 if (opts_type) {
773 OVS_NLERR(log, "Multiple metadata blocks provided");
774 return -EINVAL;
775 }
776
777 err = erspan_tun_opt_from_nlattr(a, match, is_mask, log);
778 if (err)
779 return err;
780
781 tun_flags |= TUNNEL_ERSPAN_OPT;
782 opts_type = type;
783 break;
703 default: 784 default:
704 OVS_NLERR(log, "Unknown IP tunnel attribute %d", 785 OVS_NLERR(log, "Unknown IP tunnel attribute %d",
705 type); 786 type);
@@ -824,6 +905,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
824 else if (output->tun_flags & TUNNEL_VXLAN_OPT && 905 else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
825 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) 906 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
826 return -EMSGSIZE; 907 return -EMSGSIZE;
908 else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
909 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
910 ((struct erspan_metadata *)tun_opts)->index))
911 return -EMSGSIZE;
827 } 912 }
828 913
829 return 0; 914 return 0;
@@ -1179,6 +1264,221 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
1179 return 0; 1264 return 0;
1180} 1265}
1181 1266
1267int nsh_hdr_from_nlattr(const struct nlattr *attr,
1268 struct nshhdr *nh, size_t size)
1269{
1270 struct nlattr *a;
1271 int rem;
1272 u8 flags = 0;
1273 u8 ttl = 0;
1274 int mdlen = 0;
1275
1276 /* validate_nsh has check this, so we needn't do duplicate check here
1277 */
1278 if (size < NSH_BASE_HDR_LEN)
1279 return -ENOBUFS;
1280
1281 nla_for_each_nested(a, attr, rem) {
1282 int type = nla_type(a);
1283
1284 switch (type) {
1285 case OVS_NSH_KEY_ATTR_BASE: {
1286 const struct ovs_nsh_key_base *base = nla_data(a);
1287
1288 flags = base->flags;
1289 ttl = base->ttl;
1290 nh->np = base->np;
1291 nh->mdtype = base->mdtype;
1292 nh->path_hdr = base->path_hdr;
1293 break;
1294 }
1295 case OVS_NSH_KEY_ATTR_MD1:
1296 mdlen = nla_len(a);
1297 if (mdlen > size - NSH_BASE_HDR_LEN)
1298 return -ENOBUFS;
1299 memcpy(&nh->md1, nla_data(a), mdlen);
1300 break;
1301
1302 case OVS_NSH_KEY_ATTR_MD2:
1303 mdlen = nla_len(a);
1304 if (mdlen > size - NSH_BASE_HDR_LEN)
1305 return -ENOBUFS;
1306 memcpy(&nh->md2, nla_data(a), mdlen);
1307 break;
1308
1309 default:
1310 return -EINVAL;
1311 }
1312 }
1313
1314 /* nsh header length = NSH_BASE_HDR_LEN + mdlen */
1315 nh->ver_flags_ttl_len = 0;
1316 nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen);
1317
1318 return 0;
1319}
1320
1321int nsh_key_from_nlattr(const struct nlattr *attr,
1322 struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask)
1323{
1324 struct nlattr *a;
1325 int rem;
1326
1327 /* validate_nsh has check this, so we needn't do duplicate check here
1328 */
1329 nla_for_each_nested(a, attr, rem) {
1330 int type = nla_type(a);
1331
1332 switch (type) {
1333 case OVS_NSH_KEY_ATTR_BASE: {
1334 const struct ovs_nsh_key_base *base = nla_data(a);
1335 const struct ovs_nsh_key_base *base_mask = base + 1;
1336
1337 nsh->base = *base;
1338 nsh_mask->base = *base_mask;
1339 break;
1340 }
1341 case OVS_NSH_KEY_ATTR_MD1: {
1342 const struct ovs_nsh_key_md1 *md1 = nla_data(a);
1343 const struct ovs_nsh_key_md1 *md1_mask = md1 + 1;
1344
1345 memcpy(nsh->context, md1->context, sizeof(*md1));
1346 memcpy(nsh_mask->context, md1_mask->context,
1347 sizeof(*md1_mask));
1348 break;
1349 }
1350 case OVS_NSH_KEY_ATTR_MD2:
1351 /* Not supported yet */
1352 return -ENOTSUPP;
1353 default:
1354 return -EINVAL;
1355 }
1356 }
1357
1358 return 0;
1359}
1360
1361static int nsh_key_put_from_nlattr(const struct nlattr *attr,
1362 struct sw_flow_match *match, bool is_mask,
1363 bool is_push_nsh, bool log)
1364{
1365 struct nlattr *a;
1366 int rem;
1367 bool has_base = false;
1368 bool has_md1 = false;
1369 bool has_md2 = false;
1370 u8 mdtype = 0;
1371 int mdlen = 0;
1372
1373 if (WARN_ON(is_push_nsh && is_mask))
1374 return -EINVAL;
1375
1376 nla_for_each_nested(a, attr, rem) {
1377 int type = nla_type(a);
1378 int i;
1379
1380 if (type > OVS_NSH_KEY_ATTR_MAX) {
1381 OVS_NLERR(log, "nsh attr %d is out of range max %d",
1382 type, OVS_NSH_KEY_ATTR_MAX);
1383 return -EINVAL;
1384 }
1385
1386 if (!check_attr_len(nla_len(a),
1387 ovs_nsh_key_attr_lens[type].len)) {
1388 OVS_NLERR(
1389 log,
1390 "nsh attr %d has unexpected len %d expected %d",
1391 type,
1392 nla_len(a),
1393 ovs_nsh_key_attr_lens[type].len
1394 );
1395 return -EINVAL;
1396 }
1397
1398 switch (type) {
1399 case OVS_NSH_KEY_ATTR_BASE: {
1400 const struct ovs_nsh_key_base *base = nla_data(a);
1401
1402 has_base = true;
1403 mdtype = base->mdtype;
1404 SW_FLOW_KEY_PUT(match, nsh.base.flags,
1405 base->flags, is_mask);
1406 SW_FLOW_KEY_PUT(match, nsh.base.ttl,
1407 base->ttl, is_mask);
1408 SW_FLOW_KEY_PUT(match, nsh.base.mdtype,
1409 base->mdtype, is_mask);
1410 SW_FLOW_KEY_PUT(match, nsh.base.np,
1411 base->np, is_mask);
1412 SW_FLOW_KEY_PUT(match, nsh.base.path_hdr,
1413 base->path_hdr, is_mask);
1414 break;
1415 }
1416 case OVS_NSH_KEY_ATTR_MD1: {
1417 const struct ovs_nsh_key_md1 *md1 = nla_data(a);
1418
1419 has_md1 = true;
1420 for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++)
1421 SW_FLOW_KEY_PUT(match, nsh.context[i],
1422 md1->context[i], is_mask);
1423 break;
1424 }
1425 case OVS_NSH_KEY_ATTR_MD2:
1426 if (!is_push_nsh) /* Not supported MD type 2 yet */
1427 return -ENOTSUPP;
1428
1429 has_md2 = true;
1430 mdlen = nla_len(a);
1431 if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) {
1432 OVS_NLERR(
1433 log,
1434 "Invalid MD length %d for MD type %d",
1435 mdlen,
1436 mdtype
1437 );
1438 return -EINVAL;
1439 }
1440 break;
1441 default:
1442 OVS_NLERR(log, "Unknown nsh attribute %d",
1443 type);
1444 return -EINVAL;
1445 }
1446 }
1447
1448 if (rem > 0) {
1449 OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem);
1450 return -EINVAL;
1451 }
1452
1453 if (has_md1 && has_md2) {
1454 OVS_NLERR(
1455 1,
1456 "invalid nsh attribute: md1 and md2 are exclusive."
1457 );
1458 return -EINVAL;
1459 }
1460
1461 if (!is_mask) {
1462 if ((has_md1 && mdtype != NSH_M_TYPE1) ||
1463 (has_md2 && mdtype != NSH_M_TYPE2)) {
1464 OVS_NLERR(1, "nsh attribute has unmatched MD type %d.",
1465 mdtype);
1466 return -EINVAL;
1467 }
1468
1469 if (is_push_nsh &&
1470 (!has_base || (!has_md1 && !has_md2))) {
1471 OVS_NLERR(
1472 1,
1473 "push_nsh: missing base or metadata attributes"
1474 );
1475 return -EINVAL;
1476 }
1477 }
1478
1479 return 0;
1480}
1481
1182static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, 1482static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
1183 u64 attrs, const struct nlattr **a, 1483 u64 attrs, const struct nlattr **a,
1184 bool is_mask, bool log) 1484 bool is_mask, bool log)
@@ -1306,6 +1606,13 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
1306 attrs &= ~(1 << OVS_KEY_ATTR_ARP); 1606 attrs &= ~(1 << OVS_KEY_ATTR_ARP);
1307 } 1607 }
1308 1608
1609 if (attrs & (1 << OVS_KEY_ATTR_NSH)) {
1610 if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match,
1611 is_mask, false, log) < 0)
1612 return -EINVAL;
1613 attrs &= ~(1 << OVS_KEY_ATTR_NSH);
1614 }
1615
1309 if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { 1616 if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
1310 const struct ovs_key_mpls *mpls_key; 1617 const struct ovs_key_mpls *mpls_key;
1311 1618
@@ -1622,6 +1929,34 @@ static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
1622 return 0; 1929 return 0;
1623} 1930}
1624 1931
1932static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask,
1933 struct sk_buff *skb)
1934{
1935 struct nlattr *start;
1936
1937 start = nla_nest_start(skb, OVS_KEY_ATTR_NSH);
1938 if (!start)
1939 return -EMSGSIZE;
1940
1941 if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base))
1942 goto nla_put_failure;
1943
1944 if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) {
1945 if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1,
1946 sizeof(nsh->context), nsh->context))
1947 goto nla_put_failure;
1948 }
1949
1950 /* Don't support MD type 2 yet */
1951
1952 nla_nest_end(skb, start);
1953
1954 return 0;
1955
1956nla_put_failure:
1957 return -EMSGSIZE;
1958}
1959
1625static int __ovs_nla_put_key(const struct sw_flow_key *swkey, 1960static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
1626 const struct sw_flow_key *output, bool is_mask, 1961 const struct sw_flow_key *output, bool is_mask,
1627 struct sk_buff *skb) 1962 struct sk_buff *skb)
@@ -1750,6 +2085,9 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
1750 ipv6_key->ipv6_tclass = output->ip.tos; 2085 ipv6_key->ipv6_tclass = output->ip.tos;
1751 ipv6_key->ipv6_hlimit = output->ip.ttl; 2086 ipv6_key->ipv6_hlimit = output->ip.ttl;
1752 ipv6_key->ipv6_frag = output->ip.frag; 2087 ipv6_key->ipv6_frag = output->ip.frag;
2088 } else if (swkey->eth.type == htons(ETH_P_NSH)) {
2089 if (nsh_key_to_nlattr(&output->nsh, is_mask, skb))
2090 goto nla_put_failure;
1753 } else if (swkey->eth.type == htons(ETH_P_ARP) || 2091 } else if (swkey->eth.type == htons(ETH_P_ARP) ||
1754 swkey->eth.type == htons(ETH_P_RARP)) { 2092 swkey->eth.type == htons(ETH_P_RARP)) {
1755 struct ovs_key_arp *arp_key; 2093 struct ovs_key_arp *arp_key;
@@ -2195,6 +2533,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
2195 break; 2533 break;
2196 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: 2534 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
2197 break; 2535 break;
2536 case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
2537 break;
2198 } 2538 }
2199 }; 2539 };
2200 2540
@@ -2242,6 +2582,19 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
2242 return err; 2582 return err;
2243} 2583}
2244 2584
2585static bool validate_nsh(const struct nlattr *attr, bool is_mask,
2586 bool is_push_nsh, bool log)
2587{
2588 struct sw_flow_match match;
2589 struct sw_flow_key key;
2590 int ret = 0;
2591
2592 ovs_match_init(&match, &key, true, NULL);
2593 ret = nsh_key_put_from_nlattr(attr, &match, is_mask,
2594 is_push_nsh, log);
2595 return !ret;
2596}
2597
2245/* Return false if there are any non-masked bits set. 2598/* Return false if there are any non-masked bits set.
2246 * Mask follows data immediately, before any netlink padding. 2599 * Mask follows data immediately, before any netlink padding.
2247 */ 2600 */
@@ -2384,6 +2737,13 @@ static int validate_set(const struct nlattr *a,
2384 2737
2385 break; 2738 break;
2386 2739
2740 case OVS_KEY_ATTR_NSH:
2741 if (eth_type != htons(ETH_P_NSH))
2742 return -EINVAL;
2743 if (!validate_nsh(nla_data(a), masked, false, log))
2744 return -EINVAL;
2745 break;
2746
2387 default: 2747 default:
2388 return -EINVAL; 2748 return -EINVAL;
2389 } 2749 }
@@ -2479,9 +2839,13 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2479 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, 2839 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
2480 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash), 2840 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
2481 [OVS_ACTION_ATTR_CT] = (u32)-1, 2841 [OVS_ACTION_ATTR_CT] = (u32)-1,
2842 [OVS_ACTION_ATTR_CT_CLEAR] = 0,
2482 [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc), 2843 [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
2483 [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth), 2844 [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
2484 [OVS_ACTION_ATTR_POP_ETH] = 0, 2845 [OVS_ACTION_ATTR_POP_ETH] = 0,
2846 [OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
2847 [OVS_ACTION_ATTR_POP_NSH] = 0,
2848 [OVS_ACTION_ATTR_METER] = sizeof(u32),
2485 }; 2849 };
2486 const struct ovs_action_push_vlan *vlan; 2850 const struct ovs_action_push_vlan *vlan;
2487 int type = nla_type(a); 2851 int type = nla_type(a);
@@ -2620,6 +2984,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2620 skip_copy = true; 2984 skip_copy = true;
2621 break; 2985 break;
2622 2986
2987 case OVS_ACTION_ATTR_CT_CLEAR:
2988 break;
2989
2623 case OVS_ACTION_ATTR_PUSH_ETH: 2990 case OVS_ACTION_ATTR_PUSH_ETH:
2624 /* Disallow pushing an Ethernet header if one 2991 /* Disallow pushing an Ethernet header if one
2625 * is already present */ 2992 * is already present */
@@ -2636,6 +3003,38 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2636 mac_proto = MAC_PROTO_ETHERNET; 3003 mac_proto = MAC_PROTO_ETHERNET;
2637 break; 3004 break;
2638 3005
3006 case OVS_ACTION_ATTR_PUSH_NSH:
3007 if (mac_proto != MAC_PROTO_ETHERNET) {
3008 u8 next_proto;
3009
3010 next_proto = tun_p_from_eth_p(eth_type);
3011 if (!next_proto)
3012 return -EINVAL;
3013 }
3014 mac_proto = MAC_PROTO_NONE;
3015 if (!validate_nsh(nla_data(a), false, true, true))
3016 return -EINVAL;
3017 break;
3018
3019 case OVS_ACTION_ATTR_POP_NSH: {
3020 __be16 inner_proto;
3021
3022 if (eth_type != htons(ETH_P_NSH))
3023 return -EINVAL;
3024 inner_proto = tun_p_to_eth_p(key->nsh.base.np);
3025 if (!inner_proto)
3026 return -EINVAL;
3027 if (key->nsh.base.np == TUN_P_ETHERNET)
3028 mac_proto = MAC_PROTO_ETHERNET;
3029 else
3030 mac_proto = MAC_PROTO_NONE;
3031 break;
3032 }
3033
3034 case OVS_ACTION_ATTR_METER:
3035 /* Non-existent meters are simply ignored. */
3036 break;
3037
2639 default: 3038 default:
2640 OVS_NLERR(log, "Unknown Action type %d", type); 3039 OVS_NLERR(log, "Unknown Action type %d", type);
2641 return -EINVAL; 3040 return -EINVAL;
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 929c665ac3aa..6657606b2b47 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -79,4 +79,9 @@ int ovs_nla_put_actions(const struct nlattr *attr,
79void ovs_nla_free_flow_actions(struct sw_flow_actions *); 79void ovs_nla_free_flow_actions(struct sw_flow_actions *);
80void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *); 80void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *);
81 81
82int nsh_key_from_nlattr(const struct nlattr *attr, struct ovs_key_nsh *nsh,
83 struct ovs_key_nsh *nsh_mask);
84int nsh_hdr_from_nlattr(const struct nlattr *attr, struct nshhdr *nh,
85 size_t size);
86
82#endif /* flow_netlink.h */ 87#endif /* flow_netlink.h */
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
new file mode 100644
index 000000000000..3fbfc78991ac
--- /dev/null
+++ b/net/openvswitch/meter.c
@@ -0,0 +1,597 @@
1/*
2 * Copyright (c) 2017 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 */
8
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11#include <linux/if.h>
12#include <linux/skbuff.h>
13#include <linux/ip.h>
14#include <linux/kernel.h>
15#include <linux/openvswitch.h>
16#include <linux/netlink.h>
17#include <linux/rculist.h>
18
19#include <net/netlink.h>
20#include <net/genetlink.h>
21
22#include "datapath.h"
23#include "meter.h"
24
25#define METER_HASH_BUCKETS 1024
26
27static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
28 [OVS_METER_ATTR_ID] = { .type = NLA_U32, },
29 [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
30 [OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
31 [OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED },
32 [OVS_METER_ATTR_USED] = { .type = NLA_U64 },
33 [OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG },
34 [OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 },
35 [OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 },
36};
37
38static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
39 [OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, },
40 [OVS_BAND_ATTR_RATE] = { .type = NLA_U32, },
41 [OVS_BAND_ATTR_BURST] = { .type = NLA_U32, },
42 [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
43};
44
45static void ovs_meter_free(struct dp_meter *meter)
46{
47 if (!meter)
48 return;
49
50 kfree_rcu(meter, rcu);
51}
52
53static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
54 u32 meter_id)
55{
56 return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
57}
58
59/* Call with ovs_mutex or RCU read lock. */
60static struct dp_meter *lookup_meter(const struct datapath *dp,
61 u32 meter_id)
62{
63 struct dp_meter *meter;
64 struct hlist_head *head;
65
66 head = meter_hash_bucket(dp, meter_id);
67 hlist_for_each_entry_rcu(meter, head, dp_hash_node) {
68 if (meter->id == meter_id)
69 return meter;
70 }
71 return NULL;
72}
73
74static void attach_meter(struct datapath *dp, struct dp_meter *meter)
75{
76 struct hlist_head *head = meter_hash_bucket(dp, meter->id);
77
78 hlist_add_head_rcu(&meter->dp_hash_node, head);
79}
80
81static void detach_meter(struct dp_meter *meter)
82{
83 ASSERT_OVSL();
84 if (meter)
85 hlist_del_rcu(&meter->dp_hash_node);
86}
87
88static struct sk_buff *
89ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd,
90 struct ovs_header **ovs_reply_header)
91{
92 struct sk_buff *skb;
93 struct ovs_header *ovs_header = info->userhdr;
94
95 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
96 if (!skb)
97 return ERR_PTR(-ENOMEM);
98
99 *ovs_reply_header = genlmsg_put(skb, info->snd_portid,
100 info->snd_seq,
101 &dp_meter_genl_family, 0, cmd);
102 if (!*ovs_reply_header) {
103 nlmsg_free(skb);
104 return ERR_PTR(-EMSGSIZE);
105 }
106 (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex;
107
108 return skb;
109}
110
111static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
112 struct dp_meter *meter)
113{
114 struct nlattr *nla;
115 struct dp_meter_band *band;
116 u16 i;
117
118 if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
119 goto error;
120
121 if (!meter)
122 return 0;
123
124 if (nla_put(reply, OVS_METER_ATTR_STATS,
125 sizeof(struct ovs_flow_stats), &meter->stats) ||
126 nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
127 OVS_METER_ATTR_PAD))
128 goto error;
129
130 nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
131 if (!nla)
132 goto error;
133
134 band = meter->bands;
135
136 for (i = 0; i < meter->n_bands; ++i, ++band) {
137 struct nlattr *band_nla;
138
139 band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
140 if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS,
141 sizeof(struct ovs_flow_stats),
142 &band->stats))
143 goto error;
144 nla_nest_end(reply, band_nla);
145 }
146 nla_nest_end(reply, nla);
147
148 return 0;
149error:
150 return -EMSGSIZE;
151}
152
153static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
154{
155 struct sk_buff *reply;
156 struct ovs_header *ovs_reply_header;
157 struct nlattr *nla, *band_nla;
158 int err;
159
160 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
161 &ovs_reply_header);
162 if (IS_ERR(reply))
163 return PTR_ERR(reply);
164
165 if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
166 nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
167 goto nla_put_failure;
168
169 nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
170 if (!nla)
171 goto nla_put_failure;
172
173 band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
174 if (!band_nla)
175 goto nla_put_failure;
176 /* Currently only DROP band type is supported. */
177 if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP))
178 goto nla_put_failure;
179 nla_nest_end(reply, band_nla);
180 nla_nest_end(reply, nla);
181
182 genlmsg_end(reply, ovs_reply_header);
183 return genlmsg_reply(reply, info);
184
185nla_put_failure:
186 nlmsg_free(reply);
187 err = -EMSGSIZE;
188 return err;
189}
190
191static struct dp_meter *dp_meter_create(struct nlattr **a)
192{
193 struct nlattr *nla;
194 int rem;
195 u16 n_bands = 0;
196 struct dp_meter *meter;
197 struct dp_meter_band *band;
198 int err;
199
200 /* Validate attributes, count the bands. */
201 if (!a[OVS_METER_ATTR_BANDS])
202 return ERR_PTR(-EINVAL);
203
204 nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem)
205 if (++n_bands > DP_MAX_BANDS)
206 return ERR_PTR(-EINVAL);
207
208 /* Allocate and set up the meter before locking anything. */
209 meter = kzalloc(n_bands * sizeof(struct dp_meter_band) +
210 sizeof(*meter), GFP_KERNEL);
211 if (!meter)
212 return ERR_PTR(-ENOMEM);
213
214 meter->used = div_u64(ktime_get_ns(), 1000 * 1000);
215 meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0;
216 meter->keep_stats = !a[OVS_METER_ATTR_CLEAR];
217 spin_lock_init(&meter->lock);
218 if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) {
219 meter->stats = *(struct ovs_flow_stats *)
220 nla_data(a[OVS_METER_ATTR_STATS]);
221 }
222 meter->n_bands = n_bands;
223
224 /* Set up meter bands. */
225 band = meter->bands;
226 nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) {
227 struct nlattr *attr[OVS_BAND_ATTR_MAX + 1];
228 u32 band_max_delta_t;
229
230 err = nla_parse((struct nlattr **)&attr, OVS_BAND_ATTR_MAX,
231 nla_data(nla), nla_len(nla), band_policy,
232 NULL);
233 if (err)
234 goto exit_free_meter;
235
236 if (!attr[OVS_BAND_ATTR_TYPE] ||
237 !attr[OVS_BAND_ATTR_RATE] ||
238 !attr[OVS_BAND_ATTR_BURST]) {
239 err = -EINVAL;
240 goto exit_free_meter;
241 }
242
243 band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]);
244 band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]);
245 band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]);
246 /* Figure out max delta_t that is enough to fill any bucket.
247 * Keep max_delta_t size to the bucket units:
248 * pkts => 1/1000 packets, kilobits => bits.
249 */
250 band_max_delta_t = (band->burst_size + band->rate) * 1000;
251 /* Start with a full bucket. */
252 band->bucket = band_max_delta_t;
253 if (band_max_delta_t > meter->max_delta_t)
254 meter->max_delta_t = band_max_delta_t;
255 band++;
256 }
257
258 return meter;
259
260exit_free_meter:
261 kfree(meter);
262 return ERR_PTR(err);
263}
264
265static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
266{
267 struct nlattr **a = info->attrs;
268 struct dp_meter *meter, *old_meter;
269 struct sk_buff *reply;
270 struct ovs_header *ovs_reply_header;
271 struct ovs_header *ovs_header = info->userhdr;
272 struct datapath *dp;
273 int err;
274 u32 meter_id;
275 bool failed;
276
277 meter = dp_meter_create(a);
278 if (IS_ERR_OR_NULL(meter))
279 return PTR_ERR(meter);
280
281 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET,
282 &ovs_reply_header);
283 if (IS_ERR(reply)) {
284 err = PTR_ERR(reply);
285 goto exit_free_meter;
286 }
287
288 ovs_lock();
289 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
290 if (!dp) {
291 err = -ENODEV;
292 goto exit_unlock;
293 }
294
295 if (!a[OVS_METER_ATTR_ID]) {
296 err = -ENODEV;
297 goto exit_unlock;
298 }
299
300 meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
301
302 /* Cannot fail after this. */
303 old_meter = lookup_meter(dp, meter_id);
304 detach_meter(old_meter);
305 attach_meter(dp, meter);
306 ovs_unlock();
307
308 /* Build response with the meter_id and stats from
309 * the old meter, if any.
310 */
311 failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id);
312 WARN_ON(failed);
313 if (old_meter) {
314 spin_lock_bh(&old_meter->lock);
315 if (old_meter->keep_stats) {
316 err = ovs_meter_cmd_reply_stats(reply, meter_id,
317 old_meter);
318 WARN_ON(err);
319 }
320 spin_unlock_bh(&old_meter->lock);
321 ovs_meter_free(old_meter);
322 }
323
324 genlmsg_end(reply, ovs_reply_header);
325 return genlmsg_reply(reply, info);
326
327exit_unlock:
328 ovs_unlock();
329 nlmsg_free(reply);
330exit_free_meter:
331 kfree(meter);
332 return err;
333}
334
335static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
336{
337 struct nlattr **a = info->attrs;
338 u32 meter_id;
339 struct ovs_header *ovs_header = info->userhdr;
340 struct ovs_header *ovs_reply_header;
341 struct datapath *dp;
342 int err;
343 struct sk_buff *reply;
344 struct dp_meter *meter;
345
346 if (!a[OVS_METER_ATTR_ID])
347 return -EINVAL;
348
349 meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
350
351 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET,
352 &ovs_reply_header);
353 if (IS_ERR(reply))
354 return PTR_ERR(reply);
355
356 ovs_lock();
357
358 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
359 if (!dp) {
360 err = -ENODEV;
361 goto exit_unlock;
362 }
363
364 /* Locate meter, copy stats. */
365 meter = lookup_meter(dp, meter_id);
366 if (!meter) {
367 err = -ENOENT;
368 goto exit_unlock;
369 }
370
371 spin_lock_bh(&meter->lock);
372 err = ovs_meter_cmd_reply_stats(reply, meter_id, meter);
373 spin_unlock_bh(&meter->lock);
374 if (err)
375 goto exit_unlock;
376
377 ovs_unlock();
378
379 genlmsg_end(reply, ovs_reply_header);
380 return genlmsg_reply(reply, info);
381
382exit_unlock:
383 ovs_unlock();
384 nlmsg_free(reply);
385 return err;
386}
387
388static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
389{
390 struct nlattr **a = info->attrs;
391 u32 meter_id;
392 struct ovs_header *ovs_header = info->userhdr;
393 struct ovs_header *ovs_reply_header;
394 struct datapath *dp;
395 int err;
396 struct sk_buff *reply;
397 struct dp_meter *old_meter;
398
399 if (!a[OVS_METER_ATTR_ID])
400 return -EINVAL;
401 meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
402
403 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL,
404 &ovs_reply_header);
405 if (IS_ERR(reply))
406 return PTR_ERR(reply);
407
408 ovs_lock();
409
410 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
411 if (!dp) {
412 err = -ENODEV;
413 goto exit_unlock;
414 }
415
416 old_meter = lookup_meter(dp, meter_id);
417 if (old_meter) {
418 spin_lock_bh(&old_meter->lock);
419 err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
420 WARN_ON(err);
421 spin_unlock_bh(&old_meter->lock);
422 detach_meter(old_meter);
423 }
424 ovs_unlock();
425 ovs_meter_free(old_meter);
426 genlmsg_end(reply, ovs_reply_header);
427 return genlmsg_reply(reply, info);
428
429exit_unlock:
430 ovs_unlock();
431 nlmsg_free(reply);
432 return err;
433}
434
435/* Meter action execution.
436 *
437 * Return true 'meter_id' drop band is triggered. The 'skb' should be
438 * dropped by the caller'.
439 */
440bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
441 struct sw_flow_key *key, u32 meter_id)
442{
443 struct dp_meter *meter;
444 struct dp_meter_band *band;
445 long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000);
446 long long int long_delta_ms;
447 u32 delta_ms;
448 u32 cost;
449 int i, band_exceeded_max = -1;
450 u32 band_exceeded_rate = 0;
451
452 meter = lookup_meter(dp, meter_id);
453 /* Do not drop the packet when there is no meter. */
454 if (!meter)
455 return false;
456
457 /* Lock the meter while using it. */
458 spin_lock(&meter->lock);
459
460 long_delta_ms = (now_ms - meter->used); /* ms */
461
462 /* Make sure delta_ms will not be too large, so that bucket will not
463 * wrap around below.
464 */
465 delta_ms = (long_delta_ms > (long long int)meter->max_delta_t)
466 ? meter->max_delta_t : (u32)long_delta_ms;
467
468 /* Update meter statistics.
469 */
470 meter->used = now_ms;
471 meter->stats.n_packets += 1;
472 meter->stats.n_bytes += skb->len;
473
474 /* Bucket rate is either in kilobits per second, or in packets per
475 * second. We maintain the bucket in the units of either bits or
476 * 1/1000th of a packet, correspondingly.
477 * Then, when rate is multiplied with milliseconds, we get the
478 * bucket units:
479 * msec * kbps = bits, and
480 * msec * packets/sec = 1/1000 packets.
481 *
482 * 'cost' is the number of bucket units in this packet.
483 */
484 cost = (meter->kbps) ? skb->len * 8 : 1000;
485
486 /* Update all bands and find the one hit with the highest rate. */
487 for (i = 0; i < meter->n_bands; ++i) {
488 long long int max_bucket_size;
489
490 band = &meter->bands[i];
491 max_bucket_size = (band->burst_size + band->rate) * 1000;
492
493 band->bucket += delta_ms * band->rate;
494 if (band->bucket > max_bucket_size)
495 band->bucket = max_bucket_size;
496
497 if (band->bucket >= cost) {
498 band->bucket -= cost;
499 } else if (band->rate > band_exceeded_rate) {
500 band_exceeded_rate = band->rate;
501 band_exceeded_max = i;
502 }
503 }
504
505 if (band_exceeded_max >= 0) {
506 /* Update band statistics. */
507 band = &meter->bands[band_exceeded_max];
508 band->stats.n_packets += 1;
509 band->stats.n_bytes += skb->len;
510
511 /* Drop band triggered, let the caller drop the 'skb'. */
512 if (band->type == OVS_METER_BAND_TYPE_DROP) {
513 spin_unlock(&meter->lock);
514 return true;
515 }
516 }
517
518 spin_unlock(&meter->lock);
519 return false;
520}
521
522static struct genl_ops dp_meter_genl_ops[] = {
523 { .cmd = OVS_METER_CMD_FEATURES,
524 .flags = 0, /* OK for unprivileged users. */
525 .policy = meter_policy,
526 .doit = ovs_meter_cmd_features
527 },
528 { .cmd = OVS_METER_CMD_SET,
529 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
530 * privilege.
531 */
532 .policy = meter_policy,
533 .doit = ovs_meter_cmd_set,
534 },
535 { .cmd = OVS_METER_CMD_GET,
536 .flags = 0, /* OK for unprivileged users. */
537 .policy = meter_policy,
538 .doit = ovs_meter_cmd_get,
539 },
540 { .cmd = OVS_METER_CMD_DEL,
541 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
542 * privilege.
543 */
544 .policy = meter_policy,
545 .doit = ovs_meter_cmd_del
546 },
547};
548
549static const struct genl_multicast_group ovs_meter_multicast_group = {
550 .name = OVS_METER_MCGROUP,
551};
552
553struct genl_family dp_meter_genl_family __ro_after_init = {
554 .hdrsize = sizeof(struct ovs_header),
555 .name = OVS_METER_FAMILY,
556 .version = OVS_METER_VERSION,
557 .maxattr = OVS_METER_ATTR_MAX,
558 .netnsok = true,
559 .parallel_ops = true,
560 .ops = dp_meter_genl_ops,
561 .n_ops = ARRAY_SIZE(dp_meter_genl_ops),
562 .mcgrps = &ovs_meter_multicast_group,
563 .n_mcgrps = 1,
564 .module = THIS_MODULE,
565};
566
567int ovs_meters_init(struct datapath *dp)
568{
569 int i;
570
571 dp->meters = kmalloc_array(METER_HASH_BUCKETS,
572 sizeof(struct hlist_head), GFP_KERNEL);
573
574 if (!dp->meters)
575 return -ENOMEM;
576
577 for (i = 0; i < METER_HASH_BUCKETS; i++)
578 INIT_HLIST_HEAD(&dp->meters[i]);
579
580 return 0;
581}
582
583void ovs_meters_exit(struct datapath *dp)
584{
585 int i;
586
587 for (i = 0; i < METER_HASH_BUCKETS; i++) {
588 struct hlist_head *head = &dp->meters[i];
589 struct dp_meter *meter;
590 struct hlist_node *n;
591
592 hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
593 kfree(meter);
594 }
595
596 kfree(dp->meters);
597}
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
new file mode 100644
index 000000000000..964ace2650f8
--- /dev/null
+++ b/net/openvswitch/meter.h
@@ -0,0 +1,54 @@
1/*
2 * Copyright (c) 2017 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 */
8
9#ifndef METER_H
10#define METER_H 1
11
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/netlink.h>
16#include <linux/openvswitch.h>
17#include <linux/genetlink.h>
18#include <linux/skbuff.h>
19
20#include "flow.h"
21struct datapath;
22
23#define DP_MAX_BANDS 1
24
25struct dp_meter_band {
26 u32 type;
27 u32 rate;
28 u32 burst_size;
29 u32 bucket; /* 1/1000 packets, or in bits */
30 struct ovs_flow_stats stats;
31};
32
33struct dp_meter {
34 spinlock_t lock; /* Per meter lock */
35 struct rcu_head rcu;
36 struct hlist_node dp_hash_node; /*Element in datapath->meters
37 * hash table.
38 */
39 u32 id;
40 u16 kbps:1, keep_stats:1;
41 u16 n_bands;
42 u32 max_delta_t;
43 u64 used;
44 struct ovs_flow_stats stats;
45 struct dp_meter_band bands[];
46};
47
48extern struct genl_family dp_meter_genl_family;
49int ovs_meters_init(struct datapath *dp);
50void ovs_meters_exit(struct datapath *dp);
51bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
52 struct sw_flow_key *key, u32 meter_id);
53
54#endif /* meter.h */
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 0389398fa4ab..2e5e7a41d8ef 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -108,7 +108,8 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name)
108 108
109 rtnl_lock(); 109 rtnl_lock();
110 err = netdev_master_upper_dev_link(vport->dev, 110 err = netdev_master_upper_dev_link(vport->dev,
111 get_dpdev(vport->dp), NULL, NULL); 111 get_dpdev(vport->dp),
112 NULL, NULL, NULL);
112 if (err) 113 if (err)
113 goto error_unlock; 114 goto error_unlock;
114 115
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c26172995511..737092ca9b4e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -201,11 +201,8 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *,
201static int prb_queue_frozen(struct tpacket_kbdq_core *); 201static int prb_queue_frozen(struct tpacket_kbdq_core *);
202static void prb_open_block(struct tpacket_kbdq_core *, 202static void prb_open_block(struct tpacket_kbdq_core *,
203 struct tpacket_block_desc *); 203 struct tpacket_block_desc *);
204static void prb_retire_rx_blk_timer_expired(unsigned long); 204static void prb_retire_rx_blk_timer_expired(struct timer_list *);
205static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *); 205static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
206static void prb_init_blk_timer(struct packet_sock *,
207 struct tpacket_kbdq_core *,
208 void (*func) (unsigned long));
209static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *); 206static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
210static void prb_clear_rxhash(struct tpacket_kbdq_core *, 207static void prb_clear_rxhash(struct tpacket_kbdq_core *,
211 struct tpacket3_hdr *); 208 struct tpacket3_hdr *);
@@ -540,22 +537,14 @@ static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
540 prb_del_retire_blk_timer(pkc); 537 prb_del_retire_blk_timer(pkc);
541} 538}
542 539
543static void prb_init_blk_timer(struct packet_sock *po,
544 struct tpacket_kbdq_core *pkc,
545 void (*func) (unsigned long))
546{
547 init_timer(&pkc->retire_blk_timer);
548 pkc->retire_blk_timer.data = (long)po;
549 pkc->retire_blk_timer.function = func;
550 pkc->retire_blk_timer.expires = jiffies;
551}
552
553static void prb_setup_retire_blk_timer(struct packet_sock *po) 540static void prb_setup_retire_blk_timer(struct packet_sock *po)
554{ 541{
555 struct tpacket_kbdq_core *pkc; 542 struct tpacket_kbdq_core *pkc;
556 543
557 pkc = GET_PBDQC_FROM_RB(&po->rx_ring); 544 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
558 prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired); 545 timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired,
546 0);
547 pkc->retire_blk_timer.expires = jiffies;
559} 548}
560 549
561static int prb_calc_retire_blk_tmo(struct packet_sock *po, 550static int prb_calc_retire_blk_tmo(struct packet_sock *po,
@@ -673,9 +662,10 @@ static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
673 * prb_calc_retire_blk_tmo() calculates the tmo. 662 * prb_calc_retire_blk_tmo() calculates the tmo.
674 * 663 *
675 */ 664 */
676static void prb_retire_rx_blk_timer_expired(unsigned long data) 665static void prb_retire_rx_blk_timer_expired(struct timer_list *t)
677{ 666{
678 struct packet_sock *po = (struct packet_sock *)data; 667 struct packet_sock *po =
668 from_timer(po, t, rx_ring.prb_bdqc.retire_blk_timer);
679 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring); 669 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
680 unsigned int frozen; 670 unsigned int frozen;
681 struct tpacket_block_desc *pbd; 671 struct tpacket_block_desc *pbd;
@@ -1684,10 +1674,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1684 1674
1685 mutex_lock(&fanout_mutex); 1675 mutex_lock(&fanout_mutex);
1686 1676
1687 err = -EINVAL;
1688 if (!po->running)
1689 goto out;
1690
1691 err = -EALREADY; 1677 err = -EALREADY;
1692 if (po->fanout) 1678 if (po->fanout)
1693 goto out; 1679 goto out;
@@ -1749,7 +1735,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1749 list_add(&match->list, &fanout_list); 1735 list_add(&match->list, &fanout_list);
1750 } 1736 }
1751 err = -EINVAL; 1737 err = -EINVAL;
1752 if (match->type == type && 1738
1739 spin_lock(&po->bind_lock);
1740 if (po->running &&
1741 match->type == type &&
1753 match->prot_hook.type == po->prot_hook.type && 1742 match->prot_hook.type == po->prot_hook.type &&
1754 match->prot_hook.dev == po->prot_hook.dev) { 1743 match->prot_hook.dev == po->prot_hook.dev) {
1755 err = -ENOSPC; 1744 err = -ENOSPC;
@@ -1761,9 +1750,16 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1761 err = 0; 1750 err = 0;
1762 } 1751 }
1763 } 1752 }
1753 spin_unlock(&po->bind_lock);
1754
1755 if (err && !refcount_read(&match->sk_ref)) {
1756 list_del(&match->list);
1757 kfree(match);
1758 }
1759
1764out: 1760out:
1765 if (err && rollover) { 1761 if (err && rollover) {
1766 kfree(rollover); 1762 kfree_rcu(rollover, rcu);
1767 po->rollover = NULL; 1763 po->rollover = NULL;
1768 } 1764 }
1769 mutex_unlock(&fanout_mutex); 1765 mutex_unlock(&fanout_mutex);
@@ -1790,8 +1786,10 @@ static struct packet_fanout *fanout_release(struct sock *sk)
1790 else 1786 else
1791 f = NULL; 1787 f = NULL;
1792 1788
1793 if (po->rollover) 1789 if (po->rollover) {
1794 kfree_rcu(po->rollover, rcu); 1790 kfree_rcu(po->rollover, rcu);
1791 po->rollover = NULL;
1792 }
1795 } 1793 }
1796 mutex_unlock(&fanout_mutex); 1794 mutex_unlock(&fanout_mutex);
1797 1795
@@ -2834,6 +2832,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2834 struct virtio_net_hdr vnet_hdr = { 0 }; 2832 struct virtio_net_hdr vnet_hdr = { 0 };
2835 int offset = 0; 2833 int offset = 0;
2836 struct packet_sock *po = pkt_sk(sk); 2834 struct packet_sock *po = pkt_sk(sk);
2835 bool has_vnet_hdr = false;
2837 int hlen, tlen, linear; 2836 int hlen, tlen, linear;
2838 int extra_len = 0; 2837 int extra_len = 0;
2839 2838
@@ -2877,6 +2876,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2877 err = packet_snd_vnet_parse(msg, &len, &vnet_hdr); 2876 err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
2878 if (err) 2877 if (err)
2879 goto out_unlock; 2878 goto out_unlock;
2879 has_vnet_hdr = true;
2880 } 2880 }
2881 2881
2882 if (unlikely(sock_flag(sk, SOCK_NOFCS))) { 2882 if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
@@ -2935,7 +2935,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2935 skb->priority = sk->sk_priority; 2935 skb->priority = sk->sk_priority;
2936 skb->mark = sockc.mark; 2936 skb->mark = sockc.mark;
2937 2937
2938 if (po->has_vnet_hdr) { 2938 if (has_vnet_hdr) {
2939 err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le()); 2939 err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
2940 if (err) 2940 if (err)
2941 goto out_free; 2941 goto out_free;
@@ -3063,13 +3063,15 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
3063 int ret = 0; 3063 int ret = 0;
3064 bool unlisted = false; 3064 bool unlisted = false;
3065 3065
3066 if (po->fanout)
3067 return -EINVAL;
3068
3069 lock_sock(sk); 3066 lock_sock(sk);
3070 spin_lock(&po->bind_lock); 3067 spin_lock(&po->bind_lock);
3071 rcu_read_lock(); 3068 rcu_read_lock();
3072 3069
3070 if (po->fanout) {
3071 ret = -EINVAL;
3072 goto out_unlock;
3073 }
3074
3073 if (name) { 3075 if (name) {
3074 dev = dev_get_by_name_rcu(sock_net(sk), name); 3076 dev = dev_get_by_name_rcu(sock_net(sk), name);
3075 if (!dev) { 3077 if (!dev) {
@@ -3841,6 +3843,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
3841 void *data = &val; 3843 void *data = &val;
3842 union tpacket_stats_u st; 3844 union tpacket_stats_u st;
3843 struct tpacket_rollover_stats rstats; 3845 struct tpacket_rollover_stats rstats;
3846 struct packet_rollover *rollover;
3844 3847
3845 if (level != SOL_PACKET) 3848 if (level != SOL_PACKET)
3846 return -ENOPROTOOPT; 3849 return -ENOPROTOOPT;
@@ -3919,13 +3922,18 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
3919 0); 3922 0);
3920 break; 3923 break;
3921 case PACKET_ROLLOVER_STATS: 3924 case PACKET_ROLLOVER_STATS:
3922 if (!po->rollover) 3925 rcu_read_lock();
3926 rollover = rcu_dereference(po->rollover);
3927 if (rollover) {
3928 rstats.tp_all = atomic_long_read(&rollover->num);
3929 rstats.tp_huge = atomic_long_read(&rollover->num_huge);
3930 rstats.tp_failed = atomic_long_read(&rollover->num_failed);
3931 data = &rstats;
3932 lv = sizeof(rstats);
3933 }
3934 rcu_read_unlock();
3935 if (!rollover)
3923 return -EINVAL; 3936 return -EINVAL;
3924 rstats.tp_all = atomic_long_read(&po->rollover->num);
3925 rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
3926 rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
3927 data = &rstats;
3928 lv = sizeof(rstats);
3929 break; 3937 break;
3930 case PACKET_TX_HAS_OFF: 3938 case PACKET_TX_HAS_OFF:
3931 val = po->tp_tx_has_off; 3939 val = po->tp_tx_has_off;
@@ -4552,6 +4560,7 @@ static int __net_init packet_net_init(struct net *net)
4552static void __net_exit packet_net_exit(struct net *net) 4560static void __net_exit packet_net_exit(struct net *net)
4553{ 4561{
4554 remove_proc_entry("packet", net->proc_net); 4562 remove_proc_entry("packet", net->proc_net);
4563 WARN_ON_ONCE(!hlist_empty(&net->packet.sklist));
4555} 4564}
4556 4565
4557static struct pernet_operations packet_net_ops = { 4566static struct pernet_operations packet_net_ops = {
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 94d1d405a116..562fbc155006 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __PACKET_INTERNAL_H__ 2#ifndef __PACKET_INTERNAL_H__
2#define __PACKET_INTERNAL_H__ 3#define __PACKET_INTERNAL_H__
3 4
diff --git a/net/phonet/Makefile b/net/phonet/Makefile
index e10b1b182ce3..444f875932b9 100644
--- a/net/phonet/Makefile
+++ b/net/phonet/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1obj-$(CONFIG_PHONET) += phonet.o pn_pep.o 2obj-$(CONFIG_PHONET) += phonet.o pn_pep.o
2 3
3phonet-y := \ 4phonet-y := \
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index f925753668a7..3b0ef691f5b1 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -35,11 +35,11 @@
35#include <net/phonet/pn_dev.h> 35#include <net/phonet/pn_dev.h>
36 36
37/* Transport protocol registration */ 37/* Transport protocol registration */
38static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly; 38static const struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly;
39 39
40static struct phonet_protocol *phonet_proto_get(unsigned int protocol) 40static const struct phonet_protocol *phonet_proto_get(unsigned int protocol)
41{ 41{
42 struct phonet_protocol *pp; 42 const struct phonet_protocol *pp;
43 43
44 if (protocol >= PHONET_NPROTO) 44 if (protocol >= PHONET_NPROTO)
45 return NULL; 45 return NULL;
@@ -53,7 +53,7 @@ static struct phonet_protocol *phonet_proto_get(unsigned int protocol)
53 return pp; 53 return pp;
54} 54}
55 55
56static inline void phonet_proto_put(struct phonet_protocol *pp) 56static inline void phonet_proto_put(const struct phonet_protocol *pp)
57{ 57{
58 module_put(pp->prot->owner); 58 module_put(pp->prot->owner);
59} 59}
@@ -65,7 +65,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol,
65{ 65{
66 struct sock *sk; 66 struct sock *sk;
67 struct pn_sock *pn; 67 struct pn_sock *pn;
68 struct phonet_protocol *pnp; 68 const struct phonet_protocol *pnp;
69 int err; 69 int err;
70 70
71 if (!capable(CAP_SYS_ADMIN)) 71 if (!capable(CAP_SYS_ADMIN))
@@ -149,7 +149,7 @@ static int pn_header_parse(const struct sk_buff *skb, unsigned char *haddr)
149 return 1; 149 return 1;
150} 150}
151 151
152struct header_ops phonet_header_ops = { 152const struct header_ops phonet_header_ops = {
153 .create = pn_header_create, 153 .create = pn_header_create,
154 .parse = pn_header_parse, 154 .parse = pn_header_parse,
155}; 155};
@@ -470,7 +470,7 @@ static struct packet_type phonet_packet_type __read_mostly = {
470static DEFINE_MUTEX(proto_tab_lock); 470static DEFINE_MUTEX(proto_tab_lock);
471 471
472int __init_or_module phonet_proto_register(unsigned int protocol, 472int __init_or_module phonet_proto_register(unsigned int protocol,
473 struct phonet_protocol *pp) 473 const struct phonet_protocol *pp)
474{ 474{
475 int err = 0; 475 int err = 0;
476 476
@@ -492,7 +492,8 @@ int __init_or_module phonet_proto_register(unsigned int protocol,
492} 492}
493EXPORT_SYMBOL(phonet_proto_register); 493EXPORT_SYMBOL(phonet_proto_register);
494 494
495void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp) 495void phonet_proto_unregister(unsigned int protocol,
496 const struct phonet_protocol *pp)
496{ 497{
497 mutex_lock(&proto_tab_lock); 498 mutex_lock(&proto_tab_lock);
498 BUG_ON(proto_tab[protocol] != pp); 499 BUG_ON(proto_tab[protocol] != pp);
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 5e710435ffa9..b44fb9018fb8 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -195,7 +195,7 @@ static struct proto pn_proto = {
195 .name = "PHONET", 195 .name = "PHONET",
196}; 196};
197 197
198static struct phonet_protocol pn_dgram_proto = { 198static const struct phonet_protocol pn_dgram_proto = {
199 .ops = &phonet_dgram_ops, 199 .ops = &phonet_dgram_ops,
200 .prot = &pn_proto, 200 .prot = &pn_proto,
201 .sock_type = SOCK_DGRAM, 201 .sock_type = SOCK_DGRAM,
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index e81537991ddf..9fc76b19cd3c 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -1351,7 +1351,7 @@ static struct proto pep_proto = {
1351 .name = "PNPIPE", 1351 .name = "PNPIPE",
1352}; 1352};
1353 1353
1354static struct phonet_protocol pep_pn_proto = { 1354static const struct phonet_protocol pep_pn_proto = {
1355 .ops = &phonet_stream_ops, 1355 .ops = &phonet_stream_ops,
1356 .prot = &pep_proto, 1356 .prot = &pep_proto,
1357 .sock_type = SOCK_SEQPACKET, 1357 .sock_type = SOCK_SEQPACKET,
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 2cb4c5dfad6f..77787512fc32 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -331,7 +331,10 @@ static int __net_init phonet_init_net(struct net *net)
331 331
332static void __net_exit phonet_exit_net(struct net *net) 332static void __net_exit phonet_exit_net(struct net *net)
333{ 333{
334 struct phonet_net *pnn = phonet_pernet(net);
335
334 remove_proc_entry("phonet", net->proc_net); 336 remove_proc_entry("phonet", net->proc_net);
337 WARN_ON_ONCE(!list_empty(&pnn->pndevs.list));
335} 338}
336 339
337static struct pernet_operations phonet_net_ops = { 340static struct pernet_operations phonet_net_ops = {
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 3a6ad0f438dc..64f95624f219 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -296,6 +296,6 @@ static void __exit psample_module_exit(void)
296module_init(psample_module_init); 296module_init(psample_module_init);
297module_exit(psample_module_exit); 297module_exit(psample_module_exit);
298 298
299MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>"); 299MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>");
300MODULE_DESCRIPTION("netlink channel for packet sampling"); 300MODULE_DESCRIPTION("netlink channel for packet sampling");
301MODULE_LICENSE("GPL v2"); 301MODULE_LICENSE("GPL v2");
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index c2f5c13550c0..77ab05e23001 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -20,26 +20,15 @@
20 20
21#include "qrtr.h" 21#include "qrtr.h"
22 22
23#define QRTR_PROTO_VER 1 23#define QRTR_PROTO_VER_1 1
24#define QRTR_PROTO_VER_2 3
24 25
25/* auto-bind range */ 26/* auto-bind range */
26#define QRTR_MIN_EPH_SOCKET 0x4000 27#define QRTR_MIN_EPH_SOCKET 0x4000
27#define QRTR_MAX_EPH_SOCKET 0x7fff 28#define QRTR_MAX_EPH_SOCKET 0x7fff
28 29
29enum qrtr_pkt_type {
30 QRTR_TYPE_DATA = 1,
31 QRTR_TYPE_HELLO = 2,
32 QRTR_TYPE_BYE = 3,
33 QRTR_TYPE_NEW_SERVER = 4,
34 QRTR_TYPE_DEL_SERVER = 5,
35 QRTR_TYPE_DEL_CLIENT = 6,
36 QRTR_TYPE_RESUME_TX = 7,
37 QRTR_TYPE_EXIT = 8,
38 QRTR_TYPE_PING = 9,
39};
40
41/** 30/**
42 * struct qrtr_hdr - (I|R)PCrouter packet header 31 * struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1
43 * @version: protocol version 32 * @version: protocol version
44 * @type: packet type; one of QRTR_TYPE_* 33 * @type: packet type; one of QRTR_TYPE_*
45 * @src_node_id: source node 34 * @src_node_id: source node
@@ -49,7 +38,7 @@ enum qrtr_pkt_type {
49 * @dst_node_id: destination node 38 * @dst_node_id: destination node
50 * @dst_port_id: destination port 39 * @dst_port_id: destination port
51 */ 40 */
52struct qrtr_hdr { 41struct qrtr_hdr_v1 {
53 __le32 version; 42 __le32 version;
54 __le32 type; 43 __le32 type;
55 __le32 src_node_id; 44 __le32 src_node_id;
@@ -60,9 +49,44 @@ struct qrtr_hdr {
60 __le32 dst_port_id; 49 __le32 dst_port_id;
61} __packed; 50} __packed;
62 51
63#define QRTR_HDR_SIZE sizeof(struct qrtr_hdr) 52/**
64#define QRTR_NODE_BCAST ((unsigned int)-1) 53 * struct qrtr_hdr_v2 - (I|R)PCrouter packet header later versions
65#define QRTR_PORT_CTRL ((unsigned int)-2) 54 * @version: protocol version
55 * @type: packet type; one of QRTR_TYPE_*
56 * @flags: bitmask of QRTR_FLAGS_*
57 * @optlen: length of optional header data
58 * @size: length of packet, excluding this header and optlen
59 * @src_node_id: source node
60 * @src_port_id: source port
61 * @dst_node_id: destination node
62 * @dst_port_id: destination port
63 */
64struct qrtr_hdr_v2 {
65 u8 version;
66 u8 type;
67 u8 flags;
68 u8 optlen;
69 __le32 size;
70 __le16 src_node_id;
71 __le16 src_port_id;
72 __le16 dst_node_id;
73 __le16 dst_port_id;
74};
75
76#define QRTR_FLAGS_CONFIRM_RX BIT(0)
77
78struct qrtr_cb {
79 u32 src_node;
80 u32 src_port;
81 u32 dst_node;
82 u32 dst_port;
83
84 u8 type;
85 u8 confirm_rx;
86};
87
88#define QRTR_HDR_MAX_SIZE max_t(size_t, sizeof(struct qrtr_hdr_v1), \
89 sizeof(struct qrtr_hdr_v2))
66 90
67struct qrtr_sock { 91struct qrtr_sock {
68 /* WARNING: sk must be the first member */ 92 /* WARNING: sk must be the first member */
@@ -111,8 +135,12 @@ struct qrtr_node {
111 struct list_head item; 135 struct list_head item;
112}; 136};
113 137
114static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb); 138static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
115static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb); 139 int type, struct sockaddr_qrtr *from,
140 struct sockaddr_qrtr *to);
141static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
142 int type, struct sockaddr_qrtr *from,
143 struct sockaddr_qrtr *to);
116 144
117/* Release node resources and free the node. 145/* Release node resources and free the node.
118 * 146 *
@@ -150,10 +178,27 @@ static void qrtr_node_release(struct qrtr_node *node)
150} 178}
151 179
152/* Pass an outgoing packet socket buffer to the endpoint driver. */ 180/* Pass an outgoing packet socket buffer to the endpoint driver. */
153static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb) 181static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
182 int type, struct sockaddr_qrtr *from,
183 struct sockaddr_qrtr *to)
154{ 184{
185 struct qrtr_hdr_v1 *hdr;
186 size_t len = skb->len;
155 int rc = -ENODEV; 187 int rc = -ENODEV;
156 188
189 hdr = skb_push(skb, sizeof(*hdr));
190 hdr->version = cpu_to_le32(QRTR_PROTO_VER_1);
191 hdr->type = cpu_to_le32(type);
192 hdr->src_node_id = cpu_to_le32(from->sq_node);
193 hdr->src_port_id = cpu_to_le32(from->sq_port);
194 hdr->dst_node_id = cpu_to_le32(to->sq_node);
195 hdr->dst_port_id = cpu_to_le32(to->sq_port);
196
197 hdr->size = cpu_to_le32(len);
198 hdr->confirm_rx = 0;
199
200 skb_put_padto(skb, ALIGN(len, 4));
201
157 mutex_lock(&node->ep_lock); 202 mutex_lock(&node->ep_lock);
158 if (node->ep) 203 if (node->ep)
159 rc = node->ep->xmit(node->ep, skb); 204 rc = node->ep->xmit(node->ep, skb);
@@ -207,125 +252,103 @@ static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid)
207int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) 252int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
208{ 253{
209 struct qrtr_node *node = ep->node; 254 struct qrtr_node *node = ep->node;
210 const struct qrtr_hdr *phdr = data; 255 const struct qrtr_hdr_v1 *v1;
256 const struct qrtr_hdr_v2 *v2;
211 struct sk_buff *skb; 257 struct sk_buff *skb;
212 unsigned int psize; 258 struct qrtr_cb *cb;
213 unsigned int size; 259 unsigned int size;
214 unsigned int type;
215 unsigned int ver; 260 unsigned int ver;
216 unsigned int dst; 261 size_t hdrlen;
217
218 if (len < QRTR_HDR_SIZE || len & 3)
219 return -EINVAL;
220
221 ver = le32_to_cpu(phdr->version);
222 size = le32_to_cpu(phdr->size);
223 type = le32_to_cpu(phdr->type);
224 dst = le32_to_cpu(phdr->dst_port_id);
225
226 psize = (size + 3) & ~3;
227 262
228 if (ver != QRTR_PROTO_VER) 263 if (len & 3)
229 return -EINVAL;
230
231 if (len != psize + QRTR_HDR_SIZE)
232 return -EINVAL;
233
234 if (dst != QRTR_PORT_CTRL && type != QRTR_TYPE_DATA)
235 return -EINVAL; 264 return -EINVAL;
236 265
237 skb = netdev_alloc_skb(NULL, len); 266 skb = netdev_alloc_skb(NULL, len);
238 if (!skb) 267 if (!skb)
239 return -ENOMEM; 268 return -ENOMEM;
240 269
241 skb_reset_transport_header(skb); 270 cb = (struct qrtr_cb *)skb->cb;
242 skb_put_data(skb, data, len);
243
244 skb_queue_tail(&node->rx_queue, skb);
245 schedule_work(&node->work);
246
247 return 0;
248}
249EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
250 271
251static struct sk_buff *qrtr_alloc_ctrl_packet(u32 type, size_t pkt_len, 272 /* Version field in v1 is little endian, so this works for both cases */
252 u32 src_node, u32 dst_node) 273 ver = *(u8*)data;
253{
254 struct qrtr_hdr *hdr;
255 struct sk_buff *skb;
256
257 skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL);
258 if (!skb)
259 return NULL;
260 skb_reset_transport_header(skb);
261 274
262 hdr = skb_put(skb, QRTR_HDR_SIZE); 275 switch (ver) {
263 hdr->version = cpu_to_le32(QRTR_PROTO_VER); 276 case QRTR_PROTO_VER_1:
264 hdr->type = cpu_to_le32(type); 277 v1 = data;
265 hdr->src_node_id = cpu_to_le32(src_node); 278 hdrlen = sizeof(*v1);
266 hdr->src_port_id = cpu_to_le32(QRTR_PORT_CTRL);
267 hdr->confirm_rx = cpu_to_le32(0);
268 hdr->size = cpu_to_le32(pkt_len);
269 hdr->dst_node_id = cpu_to_le32(dst_node);
270 hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL);
271 279
272 return skb; 280 cb->type = le32_to_cpu(v1->type);
273} 281 cb->src_node = le32_to_cpu(v1->src_node_id);
282 cb->src_port = le32_to_cpu(v1->src_port_id);
283 cb->confirm_rx = !!v1->confirm_rx;
284 cb->dst_node = le32_to_cpu(v1->dst_node_id);
285 cb->dst_port = le32_to_cpu(v1->dst_port_id);
274 286
275/* Allocate and construct a resume-tx packet. */ 287 size = le32_to_cpu(v1->size);
276static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, 288 break;
277 u32 dst_node, u32 port) 289 case QRTR_PROTO_VER_2:
278{ 290 v2 = data;
279 const int pkt_len = 20; 291 hdrlen = sizeof(*v2) + v2->optlen;
280 struct sk_buff *skb; 292
281 __le32 *buf; 293 cb->type = v2->type;
294 cb->confirm_rx = !!(v2->flags & QRTR_FLAGS_CONFIRM_RX);
295 cb->src_node = le16_to_cpu(v2->src_node_id);
296 cb->src_port = le16_to_cpu(v2->src_port_id);
297 cb->dst_node = le16_to_cpu(v2->dst_node_id);
298 cb->dst_port = le16_to_cpu(v2->dst_port_id);
299
300 if (cb->src_port == (u16)QRTR_PORT_CTRL)
301 cb->src_port = QRTR_PORT_CTRL;
302 if (cb->dst_port == (u16)QRTR_PORT_CTRL)
303 cb->dst_port = QRTR_PORT_CTRL;
304
305 size = le32_to_cpu(v2->size);
306 break;
307 default:
308 pr_err("qrtr: Invalid version %d\n", ver);
309 goto err;
310 }
282 311
283 skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_RESUME_TX, pkt_len, 312 if (len != ALIGN(size, 4) + hdrlen)
284 src_node, dst_node); 313 goto err;
285 if (!skb)
286 return NULL;
287 314
288 buf = skb_put_zero(skb, pkt_len); 315 if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA)
289 buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX); 316 goto err;
290 buf[1] = cpu_to_le32(src_node);
291 buf[2] = cpu_to_le32(port);
292 317
293 return skb; 318 skb_put_data(skb, data + hdrlen, size);
294}
295 319
296/* Allocate and construct a BYE message to signal remote termination */ 320 skb_queue_tail(&node->rx_queue, skb);
297static struct sk_buff *qrtr_alloc_local_bye(u32 src_node) 321 schedule_work(&node->work);
298{
299 const int pkt_len = 20;
300 struct sk_buff *skb;
301 __le32 *buf;
302 322
303 skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_BYE, pkt_len, 323 return 0;
304 src_node, qrtr_local_nid);
305 if (!skb)
306 return NULL;
307 324
308 buf = skb_put_zero(skb, pkt_len); 325err:
309 buf[0] = cpu_to_le32(QRTR_TYPE_BYE); 326 kfree_skb(skb);
327 return -EINVAL;
310 328
311 return skb;
312} 329}
330EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
313 331
314static struct sk_buff *qrtr_alloc_del_client(struct sockaddr_qrtr *sq) 332/**
333 * qrtr_alloc_ctrl_packet() - allocate control packet skb
334 * @pkt: reference to qrtr_ctrl_pkt pointer
335 *
336 * Returns newly allocated sk_buff, or NULL on failure
337 *
338 * This function allocates a sk_buff large enough to carry a qrtr_ctrl_pkt and
339 * on success returns a reference to the control packet in @pkt.
340 */
341static struct sk_buff *qrtr_alloc_ctrl_packet(struct qrtr_ctrl_pkt **pkt)
315{ 342{
316 const int pkt_len = 20; 343 const int pkt_len = sizeof(struct qrtr_ctrl_pkt);
317 struct sk_buff *skb; 344 struct sk_buff *skb;
318 __le32 *buf;
319 345
320 skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_DEL_CLIENT, pkt_len, 346 skb = alloc_skb(QRTR_HDR_MAX_SIZE + pkt_len, GFP_KERNEL);
321 sq->sq_node, QRTR_NODE_BCAST);
322 if (!skb) 347 if (!skb)
323 return NULL; 348 return NULL;
324 349
325 buf = skb_put_zero(skb, pkt_len); 350 skb_reserve(skb, QRTR_HDR_MAX_SIZE);
326 buf[0] = cpu_to_le32(QRTR_TYPE_DEL_CLIENT); 351 *pkt = skb_put_zero(skb, pkt_len);
327 buf[1] = cpu_to_le32(sq->sq_node);
328 buf[2] = cpu_to_le32(sq->sq_port);
329 352
330 return skb; 353 return skb;
331} 354}
@@ -340,24 +363,26 @@ static void qrtr_port_put(struct qrtr_sock *ipc);
340static void qrtr_node_rx_work(struct work_struct *work) 363static void qrtr_node_rx_work(struct work_struct *work)
341{ 364{
342 struct qrtr_node *node = container_of(work, struct qrtr_node, work); 365 struct qrtr_node *node = container_of(work, struct qrtr_node, work);
366 struct qrtr_ctrl_pkt *pkt;
367 struct sockaddr_qrtr dst;
368 struct sockaddr_qrtr src;
343 struct sk_buff *skb; 369 struct sk_buff *skb;
344 370
345 while ((skb = skb_dequeue(&node->rx_queue)) != NULL) { 371 while ((skb = skb_dequeue(&node->rx_queue)) != NULL) {
346 const struct qrtr_hdr *phdr;
347 u32 dst_node, dst_port;
348 struct qrtr_sock *ipc; 372 struct qrtr_sock *ipc;
349 u32 src_node; 373 struct qrtr_cb *cb;
350 int confirm; 374 int confirm;
351 375
352 phdr = (const struct qrtr_hdr *)skb_transport_header(skb); 376 cb = (struct qrtr_cb *)skb->cb;
353 src_node = le32_to_cpu(phdr->src_node_id); 377 src.sq_node = cb->src_node;
354 dst_node = le32_to_cpu(phdr->dst_node_id); 378 src.sq_port = cb->src_port;
355 dst_port = le32_to_cpu(phdr->dst_port_id); 379 dst.sq_node = cb->dst_node;
356 confirm = !!phdr->confirm_rx; 380 dst.sq_port = cb->dst_port;
381 confirm = !!cb->confirm_rx;
357 382
358 qrtr_node_assign(node, src_node); 383 qrtr_node_assign(node, cb->src_node);
359 384
360 ipc = qrtr_port_lookup(dst_port); 385 ipc = qrtr_port_lookup(cb->dst_port);
361 if (!ipc) { 386 if (!ipc) {
362 kfree_skb(skb); 387 kfree_skb(skb);
363 } else { 388 } else {
@@ -368,10 +393,16 @@ static void qrtr_node_rx_work(struct work_struct *work)
368 } 393 }
369 394
370 if (confirm) { 395 if (confirm) {
371 skb = qrtr_alloc_resume_tx(dst_node, node->nid, dst_port); 396 skb = qrtr_alloc_ctrl_packet(&pkt);
372 if (!skb) 397 if (!skb)
373 break; 398 break;
374 if (qrtr_node_enqueue(node, skb)) 399
400 pkt->cmd = cpu_to_le32(QRTR_TYPE_RESUME_TX);
401 pkt->client.node = cpu_to_le32(dst.sq_node);
402 pkt->client.port = cpu_to_le32(dst.sq_port);
403
404 if (qrtr_node_enqueue(node, skb, QRTR_TYPE_RESUME_TX,
405 &dst, &src))
375 break; 406 break;
376 } 407 }
377 } 408 }
@@ -421,6 +452,9 @@ EXPORT_SYMBOL_GPL(qrtr_endpoint_register);
421void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) 452void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
422{ 453{
423 struct qrtr_node *node = ep->node; 454 struct qrtr_node *node = ep->node;
455 struct sockaddr_qrtr src = {AF_QIPCRTR, node->nid, QRTR_PORT_CTRL};
456 struct sockaddr_qrtr dst = {AF_QIPCRTR, qrtr_local_nid, QRTR_PORT_CTRL};
457 struct qrtr_ctrl_pkt *pkt;
424 struct sk_buff *skb; 458 struct sk_buff *skb;
425 459
426 mutex_lock(&node->ep_lock); 460 mutex_lock(&node->ep_lock);
@@ -428,9 +462,11 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
428 mutex_unlock(&node->ep_lock); 462 mutex_unlock(&node->ep_lock);
429 463
430 /* Notify the local controller about the event */ 464 /* Notify the local controller about the event */
431 skb = qrtr_alloc_local_bye(node->nid); 465 skb = qrtr_alloc_ctrl_packet(&pkt);
432 if (skb) 466 if (skb) {
433 qrtr_local_enqueue(NULL, skb); 467 pkt->cmd = cpu_to_le32(QRTR_TYPE_BYE);
468 qrtr_local_enqueue(NULL, skb, QRTR_TYPE_BYE, &src, &dst);
469 }
434 470
435 qrtr_node_release(node); 471 qrtr_node_release(node);
436 ep->node = NULL; 472 ep->node = NULL;
@@ -466,13 +502,24 @@ static void qrtr_port_put(struct qrtr_sock *ipc)
466/* Remove port assignment. */ 502/* Remove port assignment. */
467static void qrtr_port_remove(struct qrtr_sock *ipc) 503static void qrtr_port_remove(struct qrtr_sock *ipc)
468{ 504{
505 struct qrtr_ctrl_pkt *pkt;
469 struct sk_buff *skb; 506 struct sk_buff *skb;
470 int port = ipc->us.sq_port; 507 int port = ipc->us.sq_port;
508 struct sockaddr_qrtr to;
471 509
472 skb = qrtr_alloc_del_client(&ipc->us); 510 to.sq_family = AF_QIPCRTR;
511 to.sq_node = QRTR_NODE_BCAST;
512 to.sq_port = QRTR_PORT_CTRL;
513
514 skb = qrtr_alloc_ctrl_packet(&pkt);
473 if (skb) { 515 if (skb) {
516 pkt->cmd = cpu_to_le32(QRTR_TYPE_DEL_CLIENT);
517 pkt->client.node = cpu_to_le32(ipc->us.sq_node);
518 pkt->client.port = cpu_to_le32(ipc->us.sq_port);
519
474 skb_set_owner_w(skb, &ipc->sk); 520 skb_set_owner_w(skb, &ipc->sk);
475 qrtr_bcast_enqueue(NULL, skb); 521 qrtr_bcast_enqueue(NULL, skb, QRTR_TYPE_DEL_CLIENT, &ipc->us,
522 &to);
476 } 523 }
477 524
478 if (port == QRTR_PORT_CTRL) 525 if (port == QRTR_PORT_CTRL)
@@ -541,7 +588,7 @@ static void qrtr_reset_ports(void)
541 588
542 sock_hold(&ipc->sk); 589 sock_hold(&ipc->sk);
543 ipc->sk.sk_err = ENETRESET; 590 ipc->sk.sk_err = ENETRESET;
544 wake_up_interruptible(sk_sleep(&ipc->sk)); 591 ipc->sk.sk_error_report(&ipc->sk);
545 sock_put(&ipc->sk); 592 sock_put(&ipc->sk);
546 } 593 }
547 mutex_unlock(&qrtr_port_lock); 594 mutex_unlock(&qrtr_port_lock);
@@ -620,19 +667,23 @@ static int qrtr_bind(struct socket *sock, struct sockaddr *saddr, int len)
620} 667}
621 668
622/* Queue packet to local peer socket. */ 669/* Queue packet to local peer socket. */
623static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb) 670static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
671 int type, struct sockaddr_qrtr *from,
672 struct sockaddr_qrtr *to)
624{ 673{
625 const struct qrtr_hdr *phdr;
626 struct qrtr_sock *ipc; 674 struct qrtr_sock *ipc;
675 struct qrtr_cb *cb;
627 676
628 phdr = (const struct qrtr_hdr *)skb_transport_header(skb); 677 ipc = qrtr_port_lookup(to->sq_port);
629
630 ipc = qrtr_port_lookup(le32_to_cpu(phdr->dst_port_id));
631 if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */ 678 if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */
632 kfree_skb(skb); 679 kfree_skb(skb);
633 return -ENODEV; 680 return -ENODEV;
634 } 681 }
635 682
683 cb = (struct qrtr_cb *)skb->cb;
684 cb->src_node = from->sq_node;
685 cb->src_port = from->sq_port;
686
636 if (sock_queue_rcv_skb(&ipc->sk, skb)) { 687 if (sock_queue_rcv_skb(&ipc->sk, skb)) {
637 qrtr_port_put(ipc); 688 qrtr_port_put(ipc);
638 kfree_skb(skb); 689 kfree_skb(skb);
@@ -645,7 +696,9 @@ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb)
645} 696}
646 697
647/* Queue packet for broadcast. */ 698/* Queue packet for broadcast. */
648static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb) 699static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
700 int type, struct sockaddr_qrtr *from,
701 struct sockaddr_qrtr *to)
649{ 702{
650 struct sk_buff *skbn; 703 struct sk_buff *skbn;
651 704
@@ -655,11 +708,11 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
655 if (!skbn) 708 if (!skbn)
656 break; 709 break;
657 skb_set_owner_w(skbn, skb->sk); 710 skb_set_owner_w(skbn, skb->sk);
658 qrtr_node_enqueue(node, skbn); 711 qrtr_node_enqueue(node, skbn, type, from, to);
659 } 712 }
660 mutex_unlock(&qrtr_node_lock); 713 mutex_unlock(&qrtr_node_lock);
661 714
662 qrtr_local_enqueue(node, skb); 715 qrtr_local_enqueue(node, skb, type, from, to);
663 716
664 return 0; 717 return 0;
665} 718}
@@ -667,13 +720,14 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
667static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 720static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
668{ 721{
669 DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); 722 DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
670 int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *); 723 int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *, int,
724 struct sockaddr_qrtr *, struct sockaddr_qrtr *);
671 struct qrtr_sock *ipc = qrtr_sk(sock->sk); 725 struct qrtr_sock *ipc = qrtr_sk(sock->sk);
672 struct sock *sk = sock->sk; 726 struct sock *sk = sock->sk;
673 struct qrtr_node *node; 727 struct qrtr_node *node;
674 struct qrtr_hdr *hdr;
675 struct sk_buff *skb; 728 struct sk_buff *skb;
676 size_t plen; 729 size_t plen;
730 u32 type = QRTR_TYPE_DATA;
677 int rc; 731 int rc;
678 732
679 if (msg->msg_flags & ~(MSG_DONTWAIT)) 733 if (msg->msg_flags & ~(MSG_DONTWAIT))
@@ -722,37 +776,19 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
722 } 776 }
723 777
724 plen = (len + 3) & ~3; 778 plen = (len + 3) & ~3;
725 skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_SIZE, 779 skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE,
726 msg->msg_flags & MSG_DONTWAIT, &rc); 780 msg->msg_flags & MSG_DONTWAIT, &rc);
727 if (!skb) 781 if (!skb)
728 goto out_node; 782 goto out_node;
729 783
730 skb_reset_transport_header(skb); 784 skb_reserve(skb, QRTR_HDR_MAX_SIZE);
731 skb_put(skb, len + QRTR_HDR_SIZE);
732
733 hdr = (struct qrtr_hdr *)skb_transport_header(skb);
734 hdr->version = cpu_to_le32(QRTR_PROTO_VER);
735 hdr->src_node_id = cpu_to_le32(ipc->us.sq_node);
736 hdr->src_port_id = cpu_to_le32(ipc->us.sq_port);
737 hdr->confirm_rx = cpu_to_le32(0);
738 hdr->size = cpu_to_le32(len);
739 hdr->dst_node_id = cpu_to_le32(addr->sq_node);
740 hdr->dst_port_id = cpu_to_le32(addr->sq_port);
741 785
742 rc = skb_copy_datagram_from_iter(skb, QRTR_HDR_SIZE, 786 rc = memcpy_from_msg(skb_put(skb, len), msg, len);
743 &msg->msg_iter, len);
744 if (rc) { 787 if (rc) {
745 kfree_skb(skb); 788 kfree_skb(skb);
746 goto out_node; 789 goto out_node;
747 } 790 }
748 791
749 if (plen != len) {
750 rc = skb_pad(skb, plen - len);
751 if (rc)
752 goto out_node;
753 skb_put(skb, plen - len);
754 }
755
756 if (ipc->us.sq_port == QRTR_PORT_CTRL) { 792 if (ipc->us.sq_port == QRTR_PORT_CTRL) {
757 if (len < 4) { 793 if (len < 4) {
758 rc = -EINVAL; 794 rc = -EINVAL;
@@ -761,12 +797,11 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
761 } 797 }
762 798
763 /* control messages already require the type as 'command' */ 799 /* control messages already require the type as 'command' */
764 skb_copy_bits(skb, QRTR_HDR_SIZE, &hdr->type, 4); 800 skb_copy_bits(skb, 0, &type, 4);
765 } else { 801 type = le32_to_cpu(type);
766 hdr->type = cpu_to_le32(QRTR_TYPE_DATA);
767 } 802 }
768 803
769 rc = enqueue_fn(node, skb); 804 rc = enqueue_fn(node, skb, type, &ipc->us, addr);
770 if (rc >= 0) 805 if (rc >= 0)
771 rc = len; 806 rc = len;
772 807
@@ -781,9 +816,9 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
781 size_t size, int flags) 816 size_t size, int flags)
782{ 817{
783 DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); 818 DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
784 const struct qrtr_hdr *phdr;
785 struct sock *sk = sock->sk; 819 struct sock *sk = sock->sk;
786 struct sk_buff *skb; 820 struct sk_buff *skb;
821 struct qrtr_cb *cb;
787 int copied, rc; 822 int copied, rc;
788 823
789 lock_sock(sk); 824 lock_sock(sk);
@@ -800,22 +835,22 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
800 return rc; 835 return rc;
801 } 836 }
802 837
803 phdr = (const struct qrtr_hdr *)skb_transport_header(skb); 838 copied = skb->len;
804 copied = le32_to_cpu(phdr->size);
805 if (copied > size) { 839 if (copied > size) {
806 copied = size; 840 copied = size;
807 msg->msg_flags |= MSG_TRUNC; 841 msg->msg_flags |= MSG_TRUNC;
808 } 842 }
809 843
810 rc = skb_copy_datagram_msg(skb, QRTR_HDR_SIZE, msg, copied); 844 rc = skb_copy_datagram_msg(skb, 0, msg, copied);
811 if (rc < 0) 845 if (rc < 0)
812 goto out; 846 goto out;
813 rc = copied; 847 rc = copied;
814 848
815 if (addr) { 849 if (addr) {
850 cb = (struct qrtr_cb *)skb->cb;
816 addr->sq_family = AF_QIPCRTR; 851 addr->sq_family = AF_QIPCRTR;
817 addr->sq_node = le32_to_cpu(phdr->src_node_id); 852 addr->sq_node = cb->src_node;
818 addr->sq_port = le32_to_cpu(phdr->src_port_id); 853 addr->sq_port = cb->src_port;
819 msg->msg_namelen = sizeof(*addr); 854 msg->msg_namelen = sizeof(*addr);
820 } 855 }
821 856
@@ -908,7 +943,7 @@ static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
908 case TIOCINQ: 943 case TIOCINQ:
909 skb = skb_peek(&sk->sk_receive_queue); 944 skb = skb_peek(&sk->sk_receive_queue);
910 if (skb) 945 if (skb)
911 len = skb->len - QRTR_HDR_SIZE; 946 len = skb->len;
912 rc = put_user(len, (int __user *)argp); 947 rc = put_user(len, (int __user *)argp);
913 break; 948 break;
914 case SIOCGIFADDR: 949 case SIOCGIFADDR:
@@ -1085,7 +1120,7 @@ static int __init qrtr_proto_init(void)
1085 1120
1086 return 0; 1121 return 0;
1087} 1122}
1088module_init(qrtr_proto_init); 1123postcore_initcall(qrtr_proto_init);
1089 1124
1090static void __exit qrtr_proto_fini(void) 1125static void __exit qrtr_proto_fini(void)
1091{ 1126{
diff --git a/net/qrtr/qrtr.h b/net/qrtr/qrtr.h
index 2b848718f8fe..b81e6953c04b 100644
--- a/net/qrtr/qrtr.h
+++ b/net/qrtr/qrtr.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __QRTR_H_ 2#ifndef __QRTR_H_
2#define __QRTR_H_ 3#define __QRTR_H_
3 4
diff --git a/net/rds/Makefile b/net/rds/Makefile
index 56c7d27eefee..b5d568bd479c 100644
--- a/net/rds/Makefile
+++ b/net/rds/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1obj-$(CONFIG_RDS) += rds.o 2obj-$(CONFIG_RDS) += rds.o
2rds-y := af_rds.o bind.o cong.o connection.o info.o message.o \ 3rds-y := af_rds.o bind.o cong.o connection.o info.o message.o \
3 recv.o send.o stats.o sysctl.o threads.o transport.o \ 4 recv.o send.o stats.o sysctl.o threads.o transport.o \
diff --git a/net/rds/ib.c b/net/rds/ib.c
index a0954ace3774..36dd2099048a 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -126,6 +126,7 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
126static void rds_ib_add_one(struct ib_device *device) 126static void rds_ib_add_one(struct ib_device *device)
127{ 127{
128 struct rds_ib_device *rds_ibdev; 128 struct rds_ib_device *rds_ibdev;
129 bool has_fr, has_fmr;
129 130
130 /* Only handle IB (no iWARP) devices */ 131 /* Only handle IB (no iWARP) devices */
131 if (device->node_type != RDMA_NODE_IB_CA) 132 if (device->node_type != RDMA_NODE_IB_CA)
@@ -143,11 +144,11 @@ static void rds_ib_add_one(struct ib_device *device)
143 rds_ibdev->max_wrs = device->attrs.max_qp_wr; 144 rds_ibdev->max_wrs = device->attrs.max_qp_wr;
144 rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE); 145 rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE);
145 146
146 rds_ibdev->has_fr = (device->attrs.device_cap_flags & 147 has_fr = (device->attrs.device_cap_flags &
147 IB_DEVICE_MEM_MGT_EXTENSIONS); 148 IB_DEVICE_MEM_MGT_EXTENSIONS);
148 rds_ibdev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && 149 has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
149 device->map_phys_fmr && device->unmap_fmr); 150 device->map_phys_fmr && device->unmap_fmr);
150 rds_ibdev->use_fastreg = (rds_ibdev->has_fr && !rds_ibdev->has_fmr); 151 rds_ibdev->use_fastreg = (has_fr && !has_fmr);
151 152
152 rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32; 153 rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
153 rds_ibdev->max_1m_mrs = device->attrs.max_mr ? 154 rds_ibdev->max_1m_mrs = device->attrs.max_mr ?
diff --git a/net/rds/ib.h b/net/rds/ib.h
index bf4822407567..a6f4d7d68e95 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef _RDS_IB_H 2#ifndef _RDS_IB_H
2#define _RDS_IB_H 3#define _RDS_IB_H
3 4
@@ -215,8 +216,6 @@ struct rds_ib_device {
215 struct list_head conn_list; 216 struct list_head conn_list;
216 struct ib_device *dev; 217 struct ib_device *dev;
217 struct ib_pd *pd; 218 struct ib_pd *pd;
218 bool has_fmr;
219 bool has_fr;
220 bool use_fastreg; 219 bool use_fastreg;
221 220
222 unsigned int max_mrs; 221 unsigned int max_mrs;
diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c
index 86ef907067bb..e0f70c4051b6 100644
--- a/net/rds/ib_fmr.c
+++ b/net/rds/ib_fmr.c
@@ -139,8 +139,8 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev,
139 return -EINVAL; 139 return -EINVAL;
140 } 140 }
141 141
142 dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC, 142 dma_pages = kmalloc_array_node(sizeof(u64), page_cnt, GFP_ATOMIC,
143 rdsibdev_to_node(rds_ibdev)); 143 rdsibdev_to_node(rds_ibdev));
144 if (!dma_pages) { 144 if (!dma_pages) {
145 ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); 145 ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
146 return -ENOMEM; 146 return -ENOMEM;
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 9a3c54e659e9..e678699268a2 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -601,11 +601,11 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
601 if (pool_type == RDS_IB_MR_1M_POOL) { 601 if (pool_type == RDS_IB_MR_1M_POOL) {
602 /* +1 allows for unaligned MRs */ 602 /* +1 allows for unaligned MRs */
603 pool->fmr_attr.max_pages = RDS_MR_1M_MSG_SIZE + 1; 603 pool->fmr_attr.max_pages = RDS_MR_1M_MSG_SIZE + 1;
604 pool->max_items = RDS_MR_1M_POOL_SIZE; 604 pool->max_items = rds_ibdev->max_1m_mrs;
605 } else { 605 } else {
606 /* pool_type == RDS_IB_MR_8K_POOL */ 606 /* pool_type == RDS_IB_MR_8K_POOL */
607 pool->fmr_attr.max_pages = RDS_MR_8K_MSG_SIZE + 1; 607 pool->fmr_attr.max_pages = RDS_MR_8K_MSG_SIZE + 1;
608 pool->max_items = RDS_MR_8K_POOL_SIZE; 608 pool->max_items = rds_ibdev->max_8k_mrs;
609 } 609 }
610 610
611 pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4; 611 pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4;
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 9722bf839d9d..b4e421aa9727 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -410,14 +410,14 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
410 break; 410 break;
411 } 411 }
412 412
413 /* XXX when can this fail? */ 413 rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv,
414 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
415 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv,
416 recv->r_ibinc, sg_page(&recv->r_frag->f_sg), 414 recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
417 (long) ib_sg_dma_address( 415 (long) ib_sg_dma_address(
418 ic->i_cm_id->device, 416 ic->i_cm_id->device,
419 &recv->r_frag->f_sg), 417 &recv->r_frag->f_sg));
420 ret); 418
419 /* XXX when can this fail? */
420 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
421 if (ret) { 421 if (ret) {
422 rds_ib_conn_error(conn, "recv post on " 422 rds_ib_conn_error(conn, "recv post on "
423 "%pI4 returned %d, disconnecting and " 423 "%pI4 returned %d, disconnecting and "
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 6ab39dbcca01..8557a1cae041 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -661,13 +661,15 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
661 } 661 }
662 } 662 }
663 663
664 rds_ib_set_wr_signal_state(ic, send, 0); 664 rds_ib_set_wr_signal_state(ic, send, false);
665 665
666 /* 666 /*
667 * Always signal the last one if we're stopping due to flow control. 667 * Always signal the last one if we're stopping due to flow control.
668 */ 668 */
669 if (ic->i_flowctl && flow_controlled && i == (work_alloc-1)) 669 if (ic->i_flowctl && flow_controlled && i == (work_alloc - 1)) {
670 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 670 rds_ib_set_wr_signal_state(ic, send, true);
671 send->s_wr.send_flags |= IB_SEND_SOLICITED;
672 }
671 673
672 if (send->s_wr.send_flags & IB_SEND_SIGNALED) 674 if (send->s_wr.send_flags & IB_SEND_SIGNALED)
673 nr_sig++; 675 nr_sig++;
@@ -705,11 +707,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
705 if (scat == &rm->data.op_sg[rm->data.op_count]) { 707 if (scat == &rm->data.op_sg[rm->data.op_count]) {
706 prev->s_op = ic->i_data_op; 708 prev->s_op = ic->i_data_op;
707 prev->s_wr.send_flags |= IB_SEND_SOLICITED; 709 prev->s_wr.send_flags |= IB_SEND_SOLICITED;
708 if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED)) { 710 if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED))
709 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs; 711 nr_sig += rds_ib_set_wr_signal_state(ic, prev, true);
710 prev->s_wr.send_flags |= IB_SEND_SIGNALED;
711 nr_sig++;
712 }
713 ic->i_data_op = NULL; 712 ic->i_data_op = NULL;
714 } 713 }
715 714
@@ -792,6 +791,7 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
792 send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask; 791 send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask;
793 send->s_atomic_wr.swap_mask = 0; 792 send->s_atomic_wr.swap_mask = 0;
794 } 793 }
794 send->s_wr.send_flags = 0;
795 nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify); 795 nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
796 send->s_atomic_wr.wr.num_sge = 1; 796 send->s_atomic_wr.wr.num_sge = 1;
797 send->s_atomic_wr.wr.next = NULL; 797 send->s_atomic_wr.wr.next = NULL;
diff --git a/net/rds/info.h b/net/rds/info.h
index b6c052ca7d22..a069b51c4679 100644
--- a/net/rds/info.h
+++ b/net/rds/info.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef _RDS_INFO_H 2#ifndef _RDS_INFO_H
2#define _RDS_INFO_H 3#define _RDS_INFO_H
3 4
diff --git a/net/rds/loop.h b/net/rds/loop.h
index f32b0939a04d..469fa4b2da4f 100644
--- a/net/rds/loop.h
+++ b/net/rds/loop.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef _RDS_LOOP_H 2#ifndef _RDS_LOOP_H
2#define _RDS_LOOP_H 3#define _RDS_LOOP_H
3 4
diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h
index ff2010e9d20c..d309c4430124 100644
--- a/net/rds/rdma_transport.h
+++ b/net/rds/rdma_transport.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef _RDMA_TRANSPORT_H 2#ifndef _RDMA_TRANSPORT_H
2#define _RDMA_TRANSPORT_H 3#define _RDMA_TRANSPORT_H
3 4
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 2e0315b159cb..c349c71babff 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef _RDS_RDS_H 2#ifndef _RDS_RDS_H
2#define _RDS_RDS_H 3#define _RDS_RDS_H
3 4
diff --git a/net/rds/rds_single_path.h b/net/rds/rds_single_path.h
index e1241af7c1ad..9521f6e99bef 100644
--- a/net/rds/rds_single_path.h
+++ b/net/rds/rds_single_path.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef _RDS_RDS_SINGLE_H 2#ifndef _RDS_RDS_SINGLE_H
2#define _RDS_RDS_SINGLE_H 3#define _RDS_RDS_SINGLE_H
3 4
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index f8800b7ce79c..1aafbf7c3011 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef _RDS_TCP_H 2#ifndef _RDS_TCP_H
2#define _RDS_TCP_H 3#define _RDS_TCP_H
3 4
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 4a9729257023..6a5c4992cf61 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -318,9 +318,11 @@ void rose_destroy_socket(struct sock *);
318/* 318/*
319 * Handler for deferred kills. 319 * Handler for deferred kills.
320 */ 320 */
321static void rose_destroy_timer(unsigned long data) 321static void rose_destroy_timer(struct timer_list *t)
322{ 322{
323 rose_destroy_socket((struct sock *)data); 323 struct sock *sk = from_timer(sk, t, sk_timer);
324
325 rose_destroy_socket(sk);
324} 326}
325 327
326/* 328/*
@@ -353,8 +355,7 @@ void rose_destroy_socket(struct sock *sk)
353 355
354 if (sk_has_allocations(sk)) { 356 if (sk_has_allocations(sk)) {
355 /* Defer: outstanding buffers */ 357 /* Defer: outstanding buffers */
356 setup_timer(&sk->sk_timer, rose_destroy_timer, 358 timer_setup(&sk->sk_timer, rose_destroy_timer, 0);
357 (unsigned long)sk);
358 sk->sk_timer.expires = jiffies + 10 * HZ; 359 sk->sk_timer.expires = jiffies + 10 * HZ;
359 add_timer(&sk->sk_timer); 360 add_timer(&sk->sk_timer);
360 } else 361 } else
@@ -538,8 +539,8 @@ static int rose_create(struct net *net, struct socket *sock, int protocol,
538 sock->ops = &rose_proto_ops; 539 sock->ops = &rose_proto_ops;
539 sk->sk_protocol = protocol; 540 sk->sk_protocol = protocol;
540 541
541 init_timer(&rose->timer); 542 timer_setup(&rose->timer, NULL, 0);
542 init_timer(&rose->idletimer); 543 timer_setup(&rose->idletimer, NULL, 0);
543 544
544 rose->t1 = msecs_to_jiffies(sysctl_rose_call_request_timeout); 545 rose->t1 = msecs_to_jiffies(sysctl_rose_call_request_timeout);
545 rose->t2 = msecs_to_jiffies(sysctl_rose_reset_request_timeout); 546 rose->t2 = msecs_to_jiffies(sysctl_rose_reset_request_timeout);
@@ -582,8 +583,8 @@ static struct sock *rose_make_new(struct sock *osk)
582 sk->sk_state = TCP_ESTABLISHED; 583 sk->sk_state = TCP_ESTABLISHED;
583 sock_copy_flags(sk, osk); 584 sock_copy_flags(sk, osk);
584 585
585 init_timer(&rose->timer); 586 timer_setup(&rose->timer, NULL, 0);
586 init_timer(&rose->idletimer); 587 timer_setup(&rose->idletimer, NULL, 0);
587 588
588 orose = rose_sk(osk); 589 orose = rose_sk(osk);
589 rose->t1 = orose->t1; 590 rose->t1 = orose->t1;
diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c
index 0a6394754e81..9bbbfe325c5a 100644
--- a/net/rose/rose_in.c
+++ b/net/rose/rose_in.c
@@ -219,6 +219,7 @@ static int rose_state4_machine(struct sock *sk, struct sk_buff *skb, int framety
219 switch (frametype) { 219 switch (frametype) {
220 case ROSE_RESET_REQUEST: 220 case ROSE_RESET_REQUEST:
221 rose_write_internal(sk, ROSE_RESET_CONFIRMATION); 221 rose_write_internal(sk, ROSE_RESET_CONFIRMATION);
222 /* fall through */
222 case ROSE_RESET_CONFIRMATION: 223 case ROSE_RESET_CONFIRMATION:
223 rose_stop_timer(sk); 224 rose_stop_timer(sk);
224 rose_start_idletimer(sk); 225 rose_start_idletimer(sk);
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index c76638cc2cd5..62055d3069d2 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -27,8 +27,8 @@
27#include <linux/interrupt.h> 27#include <linux/interrupt.h>
28#include <net/rose.h> 28#include <net/rose.h>
29 29
30static void rose_ftimer_expiry(unsigned long); 30static void rose_ftimer_expiry(struct timer_list *);
31static void rose_t0timer_expiry(unsigned long); 31static void rose_t0timer_expiry(struct timer_list *);
32 32
33static void rose_transmit_restart_confirmation(struct rose_neigh *neigh); 33static void rose_transmit_restart_confirmation(struct rose_neigh *neigh);
34static void rose_transmit_restart_request(struct rose_neigh *neigh); 34static void rose_transmit_restart_request(struct rose_neigh *neigh);
@@ -37,8 +37,7 @@ void rose_start_ftimer(struct rose_neigh *neigh)
37{ 37{
38 del_timer(&neigh->ftimer); 38 del_timer(&neigh->ftimer);
39 39
40 neigh->ftimer.data = (unsigned long)neigh; 40 neigh->ftimer.function = rose_ftimer_expiry;
41 neigh->ftimer.function = &rose_ftimer_expiry;
42 neigh->ftimer.expires = 41 neigh->ftimer.expires =
43 jiffies + msecs_to_jiffies(sysctl_rose_link_fail_timeout); 42 jiffies + msecs_to_jiffies(sysctl_rose_link_fail_timeout);
44 43
@@ -49,8 +48,7 @@ static void rose_start_t0timer(struct rose_neigh *neigh)
49{ 48{
50 del_timer(&neigh->t0timer); 49 del_timer(&neigh->t0timer);
51 50
52 neigh->t0timer.data = (unsigned long)neigh; 51 neigh->t0timer.function = rose_t0timer_expiry;
53 neigh->t0timer.function = &rose_t0timer_expiry;
54 neigh->t0timer.expires = 52 neigh->t0timer.expires =
55 jiffies + msecs_to_jiffies(sysctl_rose_restart_request_timeout); 53 jiffies + msecs_to_jiffies(sysctl_rose_restart_request_timeout);
56 54
@@ -77,13 +75,13 @@ static int rose_t0timer_running(struct rose_neigh *neigh)
77 return timer_pending(&neigh->t0timer); 75 return timer_pending(&neigh->t0timer);
78} 76}
79 77
80static void rose_ftimer_expiry(unsigned long param) 78static void rose_ftimer_expiry(struct timer_list *t)
81{ 79{
82} 80}
83 81
84static void rose_t0timer_expiry(unsigned long param) 82static void rose_t0timer_expiry(struct timer_list *t)
85{ 83{
86 struct rose_neigh *neigh = (struct rose_neigh *)param; 84 struct rose_neigh *neigh = from_timer(neigh, t, t0timer);
87 85
88 rose_transmit_restart_request(neigh); 86 rose_transmit_restart_request(neigh);
89 87
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 344456206b70..7af4f99c4a93 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -19,12 +19,13 @@ static struct sk_buff_head loopback_queue;
19static struct timer_list loopback_timer; 19static struct timer_list loopback_timer;
20 20
21static void rose_set_loopback_timer(void); 21static void rose_set_loopback_timer(void);
22static void rose_loopback_timer(struct timer_list *unused);
22 23
23void rose_loopback_init(void) 24void rose_loopback_init(void)
24{ 25{
25 skb_queue_head_init(&loopback_queue); 26 skb_queue_head_init(&loopback_queue);
26 27
27 init_timer(&loopback_timer); 28 timer_setup(&loopback_timer, rose_loopback_timer, 0);
28} 29}
29 30
30static int rose_loopback_running(void) 31static int rose_loopback_running(void)
@@ -50,20 +51,16 @@ int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh)
50 return 1; 51 return 1;
51} 52}
52 53
53static void rose_loopback_timer(unsigned long);
54 54
55static void rose_set_loopback_timer(void) 55static void rose_set_loopback_timer(void)
56{ 56{
57 del_timer(&loopback_timer); 57 del_timer(&loopback_timer);
58 58
59 loopback_timer.data = 0;
60 loopback_timer.function = &rose_loopback_timer;
61 loopback_timer.expires = jiffies + 10; 59 loopback_timer.expires = jiffies + 10;
62
63 add_timer(&loopback_timer); 60 add_timer(&loopback_timer);
64} 61}
65 62
66static void rose_loopback_timer(unsigned long param) 63static void rose_loopback_timer(struct timer_list *unused)
67{ 64{
68 struct sk_buff *skb; 65 struct sk_buff *skb;
69 struct net_device *dev; 66 struct net_device *dev;
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 452bbb38d943..8ca3124df83f 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -104,8 +104,8 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route,
104 104
105 skb_queue_head_init(&rose_neigh->queue); 105 skb_queue_head_init(&rose_neigh->queue);
106 106
107 init_timer(&rose_neigh->ftimer); 107 timer_setup(&rose_neigh->ftimer, NULL, 0);
108 init_timer(&rose_neigh->t0timer); 108 timer_setup(&rose_neigh->t0timer, NULL, 0);
109 109
110 if (rose_route->ndigis != 0) { 110 if (rose_route->ndigis != 0) {
111 rose_neigh->digipeat = 111 rose_neigh->digipeat =
@@ -346,6 +346,7 @@ static int rose_del_node(struct rose_route_struct *rose_route,
346 case 0: 346 case 0:
347 rose_node->neighbour[0] = 347 rose_node->neighbour[0] =
348 rose_node->neighbour[1]; 348 rose_node->neighbour[1];
349 /* fall through */
349 case 1: 350 case 1:
350 rose_node->neighbour[1] = 351 rose_node->neighbour[1] =
351 rose_node->neighbour[2]; 352 rose_node->neighbour[2];
@@ -390,8 +391,8 @@ void rose_add_loopback_neigh(void)
390 391
391 skb_queue_head_init(&sn->queue); 392 skb_queue_head_init(&sn->queue);
392 393
393 init_timer(&sn->ftimer); 394 timer_setup(&sn->ftimer, NULL, 0);
394 init_timer(&sn->t0timer); 395 timer_setup(&sn->t0timer, NULL, 0);
395 396
396 spin_lock_bh(&rose_neigh_list_lock); 397 spin_lock_bh(&rose_neigh_list_lock);
397 sn->next = rose_neigh_list; 398 sn->next = rose_neigh_list;
@@ -507,6 +508,7 @@ void rose_rt_device_down(struct net_device *dev)
507 switch (i) { 508 switch (i) {
508 case 0: 509 case 0:
509 t->neighbour[0] = t->neighbour[1]; 510 t->neighbour[0] = t->neighbour[1];
511 /* fall through */
510 case 1: 512 case 1:
511 t->neighbour[1] = t->neighbour[2]; 513 t->neighbour[1] = t->neighbour[2];
512 case 2: 514 case 2:
diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c
index bc5469d6d9cb..74555fb95615 100644
--- a/net/rose/rose_timer.c
+++ b/net/rose/rose_timer.c
@@ -28,16 +28,15 @@
28#include <linux/interrupt.h> 28#include <linux/interrupt.h>
29#include <net/rose.h> 29#include <net/rose.h>
30 30
31static void rose_heartbeat_expiry(unsigned long); 31static void rose_heartbeat_expiry(struct timer_list *t);
32static void rose_timer_expiry(unsigned long); 32static void rose_timer_expiry(struct timer_list *);
33static void rose_idletimer_expiry(unsigned long); 33static void rose_idletimer_expiry(struct timer_list *);
34 34
35void rose_start_heartbeat(struct sock *sk) 35void rose_start_heartbeat(struct sock *sk)
36{ 36{
37 del_timer(&sk->sk_timer); 37 del_timer(&sk->sk_timer);
38 38
39 sk->sk_timer.data = (unsigned long)sk; 39 sk->sk_timer.function = rose_heartbeat_expiry;
40 sk->sk_timer.function = &rose_heartbeat_expiry;
41 sk->sk_timer.expires = jiffies + 5 * HZ; 40 sk->sk_timer.expires = jiffies + 5 * HZ;
42 41
43 add_timer(&sk->sk_timer); 42 add_timer(&sk->sk_timer);
@@ -49,8 +48,7 @@ void rose_start_t1timer(struct sock *sk)
49 48
50 del_timer(&rose->timer); 49 del_timer(&rose->timer);
51 50
52 rose->timer.data = (unsigned long)sk; 51 rose->timer.function = rose_timer_expiry;
53 rose->timer.function = &rose_timer_expiry;
54 rose->timer.expires = jiffies + rose->t1; 52 rose->timer.expires = jiffies + rose->t1;
55 53
56 add_timer(&rose->timer); 54 add_timer(&rose->timer);
@@ -62,8 +60,7 @@ void rose_start_t2timer(struct sock *sk)
62 60
63 del_timer(&rose->timer); 61 del_timer(&rose->timer);
64 62
65 rose->timer.data = (unsigned long)sk; 63 rose->timer.function = rose_timer_expiry;
66 rose->timer.function = &rose_timer_expiry;
67 rose->timer.expires = jiffies + rose->t2; 64 rose->timer.expires = jiffies + rose->t2;
68 65
69 add_timer(&rose->timer); 66 add_timer(&rose->timer);
@@ -75,8 +72,7 @@ void rose_start_t3timer(struct sock *sk)
75 72
76 del_timer(&rose->timer); 73 del_timer(&rose->timer);
77 74
78 rose->timer.data = (unsigned long)sk; 75 rose->timer.function = rose_timer_expiry;
79 rose->timer.function = &rose_timer_expiry;
80 rose->timer.expires = jiffies + rose->t3; 76 rose->timer.expires = jiffies + rose->t3;
81 77
82 add_timer(&rose->timer); 78 add_timer(&rose->timer);
@@ -88,8 +84,7 @@ void rose_start_hbtimer(struct sock *sk)
88 84
89 del_timer(&rose->timer); 85 del_timer(&rose->timer);
90 86
91 rose->timer.data = (unsigned long)sk; 87 rose->timer.function = rose_timer_expiry;
92 rose->timer.function = &rose_timer_expiry;
93 rose->timer.expires = jiffies + rose->hb; 88 rose->timer.expires = jiffies + rose->hb;
94 89
95 add_timer(&rose->timer); 90 add_timer(&rose->timer);
@@ -102,8 +97,7 @@ void rose_start_idletimer(struct sock *sk)
102 del_timer(&rose->idletimer); 97 del_timer(&rose->idletimer);
103 98
104 if (rose->idle > 0) { 99 if (rose->idle > 0) {
105 rose->idletimer.data = (unsigned long)sk; 100 rose->idletimer.function = rose_idletimer_expiry;
106 rose->idletimer.function = &rose_idletimer_expiry;
107 rose->idletimer.expires = jiffies + rose->idle; 101 rose->idletimer.expires = jiffies + rose->idle;
108 102
109 add_timer(&rose->idletimer); 103 add_timer(&rose->idletimer);
@@ -125,9 +119,9 @@ void rose_stop_idletimer(struct sock *sk)
125 del_timer(&rose_sk(sk)->idletimer); 119 del_timer(&rose_sk(sk)->idletimer);
126} 120}
127 121
128static void rose_heartbeat_expiry(unsigned long param) 122static void rose_heartbeat_expiry(struct timer_list *t)
129{ 123{
130 struct sock *sk = (struct sock *)param; 124 struct sock *sk = from_timer(sk, t, sk_timer);
131 struct rose_sock *rose = rose_sk(sk); 125 struct rose_sock *rose = rose_sk(sk);
132 126
133 bh_lock_sock(sk); 127 bh_lock_sock(sk);
@@ -163,10 +157,10 @@ static void rose_heartbeat_expiry(unsigned long param)
163 bh_unlock_sock(sk); 157 bh_unlock_sock(sk);
164} 158}
165 159
166static void rose_timer_expiry(unsigned long param) 160static void rose_timer_expiry(struct timer_list *t)
167{ 161{
168 struct sock *sk = (struct sock *)param; 162 struct rose_sock *rose = from_timer(rose, t, timer);
169 struct rose_sock *rose = rose_sk(sk); 163 struct sock *sk = &rose->sock;
170 164
171 bh_lock_sock(sk); 165 bh_lock_sock(sk);
172 switch (rose->state) { 166 switch (rose->state) {
@@ -192,9 +186,10 @@ static void rose_timer_expiry(unsigned long param)
192 bh_unlock_sock(sk); 186 bh_unlock_sock(sk);
193} 187}
194 188
195static void rose_idletimer_expiry(unsigned long param) 189static void rose_idletimer_expiry(struct timer_list *t)
196{ 190{
197 struct sock *sk = (struct sock *)param; 191 struct rose_sock *rose = from_timer(rose, t, idletimer);
192 struct sock *sk = &rose->sock;
198 193
199 bh_lock_sock(sk); 194 bh_lock_sock(sk);
200 rose_clear_queues(sk); 195 rose_clear_queues(sk);
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index 9c68d2f8ba39..6ffb7e9887ce 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for Linux kernel RxRPC 3# Makefile for Linux kernel RxRPC
3# 4#
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index fb17552fd292..9b5c46b052fd 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -246,6 +246,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
246 ret = 0; 246 ret = 0;
247 break; 247 break;
248 } 248 }
249 /* Fall through */
249 default: 250 default:
250 ret = -EBUSY; 251 ret = -EBUSY;
251 break; 252 break;
@@ -265,6 +266,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
265 * @tx_total_len: Total length of data to transmit during the call (or -1) 266 * @tx_total_len: Total length of data to transmit during the call (or -1)
266 * @gfp: The allocation constraints 267 * @gfp: The allocation constraints
267 * @notify_rx: Where to send notifications instead of socket queue 268 * @notify_rx: Where to send notifications instead of socket queue
269 * @upgrade: Request service upgrade for call
268 * 270 *
269 * Allow a kernel service to begin a call on the nominated socket. This just 271 * Allow a kernel service to begin a call on the nominated socket. This just
270 * sets up all the internal tracking structures and allocates connection and 272 * sets up all the internal tracking structures and allocates connection and
@@ -279,7 +281,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
279 unsigned long user_call_ID, 281 unsigned long user_call_ID,
280 s64 tx_total_len, 282 s64 tx_total_len,
281 gfp_t gfp, 283 gfp_t gfp,
282 rxrpc_notify_rx_t notify_rx) 284 rxrpc_notify_rx_t notify_rx,
285 bool upgrade)
283{ 286{
284 struct rxrpc_conn_parameters cp; 287 struct rxrpc_conn_parameters cp;
285 struct rxrpc_call *call; 288 struct rxrpc_call *call;
@@ -304,19 +307,29 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
304 cp.key = key; 307 cp.key = key;
305 cp.security_level = 0; 308 cp.security_level = 0;
306 cp.exclusive = false; 309 cp.exclusive = false;
310 cp.upgrade = upgrade;
307 cp.service_id = srx->srx_service; 311 cp.service_id = srx->srx_service;
308 call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len, 312 call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len,
309 gfp); 313 gfp);
310 /* The socket has been unlocked. */ 314 /* The socket has been unlocked. */
311 if (!IS_ERR(call)) 315 if (!IS_ERR(call)) {
312 call->notify_rx = notify_rx; 316 call->notify_rx = notify_rx;
317 mutex_unlock(&call->user_mutex);
318 }
313 319
314 mutex_unlock(&call->user_mutex);
315 _leave(" = %p", call); 320 _leave(" = %p", call);
316 return call; 321 return call;
317} 322}
318EXPORT_SYMBOL(rxrpc_kernel_begin_call); 323EXPORT_SYMBOL(rxrpc_kernel_begin_call);
319 324
325/*
326 * Dummy function used to stop the notifier talking to recvmsg().
327 */
328static void rxrpc_dummy_notify_rx(struct sock *sk, struct rxrpc_call *rxcall,
329 unsigned long call_user_ID)
330{
331}
332
320/** 333/**
321 * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using 334 * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using
322 * @sock: The socket the call is on 335 * @sock: The socket the call is on
@@ -331,12 +344,39 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
331 344
332 mutex_lock(&call->user_mutex); 345 mutex_lock(&call->user_mutex);
333 rxrpc_release_call(rxrpc_sk(sock->sk), call); 346 rxrpc_release_call(rxrpc_sk(sock->sk), call);
347
348 /* Make sure we're not going to call back into a kernel service */
349 if (call->notify_rx) {
350 spin_lock_bh(&call->notify_lock);
351 call->notify_rx = rxrpc_dummy_notify_rx;
352 spin_unlock_bh(&call->notify_lock);
353 }
354
334 mutex_unlock(&call->user_mutex); 355 mutex_unlock(&call->user_mutex);
335 rxrpc_put_call(call, rxrpc_call_put_kernel); 356 rxrpc_put_call(call, rxrpc_call_put_kernel);
336} 357}
337EXPORT_SYMBOL(rxrpc_kernel_end_call); 358EXPORT_SYMBOL(rxrpc_kernel_end_call);
338 359
339/** 360/**
361 * rxrpc_kernel_check_life - Check to see whether a call is still alive
362 * @sock: The socket the call is on
363 * @call: The call to check
364 *
365 * Allow a kernel service to find out whether a call is still alive - ie. we're
366 * getting ACKs from the server. Returns a number representing the life state
367 * which can be compared to that returned by a previous call.
368 *
369 * If this is a client call, ping ACKs will be sent to the server to find out
370 * whether it's still responsive and whether the call is still alive on the
371 * server.
372 */
373u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call)
374{
375 return call->acks_latest;
376}
377EXPORT_SYMBOL(rxrpc_kernel_check_life);
378
379/**
340 * rxrpc_kernel_check_call - Check a call's state 380 * rxrpc_kernel_check_call - Check a call's state
341 * @sock: The socket the call is on 381 * @sock: The socket the call is on
342 * @call: The call to check 382 * @call: The call to check
@@ -537,6 +577,7 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
537 m->msg_name = &rx->connect_srx; 577 m->msg_name = &rx->connect_srx;
538 m->msg_namelen = sizeof(rx->connect_srx); 578 m->msg_namelen = sizeof(rx->connect_srx);
539 } 579 }
580 /* Fall through */
540 case RXRPC_SERVER_BOUND: 581 case RXRPC_SERVER_BOUND:
541 case RXRPC_SERVER_LISTENING: 582 case RXRPC_SERVER_LISTENING:
542 ret = rxrpc_do_sendmsg(rx, m, len); 583 ret = rxrpc_do_sendmsg(rx, m, len);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index ea5600b747cc..b2151993d384 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -525,6 +525,7 @@ struct rxrpc_call {
525 unsigned long flags; 525 unsigned long flags;
526 unsigned long events; 526 unsigned long events;
527 spinlock_t lock; 527 spinlock_t lock;
528 spinlock_t notify_lock; /* Kernel notification lock */
528 rwlock_t state_lock; /* lock for state transition */ 529 rwlock_t state_lock; /* lock for state transition */
529 u32 abort_code; /* Local/remote abort code */ 530 u32 abort_code; /* Local/remote abort code */
530 int error; /* Local error incurred */ 531 int error; /* Local error incurred */
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 7a77844aab16..3574508baf9a 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -386,7 +386,7 @@ recheck_state:
386 386
387 now = ktime_get_real(); 387 now = ktime_get_real();
388 if (ktime_before(call->expire_at, now)) { 388 if (ktime_before(call->expire_at, now)) {
389 rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME); 389 rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME);
390 set_bit(RXRPC_CALL_EV_ABORT, &call->events); 390 set_bit(RXRPC_CALL_EV_ABORT, &call->events);
391 goto recheck_state; 391 goto recheck_state;
392 } 392 }
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index fcdd6555a820..994dc2df57e4 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -45,9 +45,9 @@ const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = {
45 45
46struct kmem_cache *rxrpc_call_jar; 46struct kmem_cache *rxrpc_call_jar;
47 47
48static void rxrpc_call_timer_expired(unsigned long _call) 48static void rxrpc_call_timer_expired(struct timer_list *t)
49{ 49{
50 struct rxrpc_call *call = (struct rxrpc_call *)_call; 50 struct rxrpc_call *call = from_timer(call, t, timer);
51 51
52 _enter("%d", call->debug_id); 52 _enter("%d", call->debug_id);
53 53
@@ -114,8 +114,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
114 goto nomem_2; 114 goto nomem_2;
115 115
116 mutex_init(&call->user_mutex); 116 mutex_init(&call->user_mutex);
117 setup_timer(&call->timer, rxrpc_call_timer_expired, 117 timer_setup(&call->timer, rxrpc_call_timer_expired, 0);
118 (unsigned long)call);
119 INIT_WORK(&call->processor, &rxrpc_process_call); 118 INIT_WORK(&call->processor, &rxrpc_process_call);
120 INIT_LIST_HEAD(&call->link); 119 INIT_LIST_HEAD(&call->link);
121 INIT_LIST_HEAD(&call->chan_wait_link); 120 INIT_LIST_HEAD(&call->chan_wait_link);
@@ -124,6 +123,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
124 INIT_LIST_HEAD(&call->sock_link); 123 INIT_LIST_HEAD(&call->sock_link);
125 init_waitqueue_head(&call->waitq); 124 init_waitqueue_head(&call->waitq);
126 spin_lock_init(&call->lock); 125 spin_lock_init(&call->lock);
126 spin_lock_init(&call->notify_lock);
127 rwlock_init(&call->state_lock); 127 rwlock_init(&call->state_lock);
128 atomic_set(&call->usage, 1); 128 atomic_set(&call->usage, 1);
129 call->debug_id = atomic_inc_return(&rxrpc_debug_id); 129 call->debug_id = atomic_inc_return(&rxrpc_debug_id);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index e56e23ed2229..1b592073ec96 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -298,8 +298,6 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
298 298
299 write_unlock(&call->state_lock); 299 write_unlock(&call->state_lock);
300 if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) { 300 if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) {
301 rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, true,
302 rxrpc_propose_ack_client_tx_end);
303 trace_rxrpc_transmit(call, rxrpc_transmit_await_reply); 301 trace_rxrpc_transmit(call, rxrpc_transmit_await_reply);
304 } else { 302 } else {
305 trace_rxrpc_transmit(call, rxrpc_transmit_end); 303 trace_rxrpc_transmit(call, rxrpc_transmit_end);
@@ -1125,6 +1123,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
1125 case RXRPC_PACKET_TYPE_BUSY: 1123 case RXRPC_PACKET_TYPE_BUSY:
1126 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) 1124 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED)
1127 goto discard; 1125 goto discard;
1126 /* Fall through */
1128 1127
1129 case RXRPC_PACKET_TYPE_DATA: 1128 case RXRPC_PACKET_TYPE_DATA:
1130 if (sp->hdr.callNumber == 0) 1129 if (sp->hdr.callNumber == 0)
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 71e6f713fbe7..f47659c7b224 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -35,7 +35,8 @@ struct rxrpc_abort_buffer {
35/* 35/*
36 * Fill out an ACK packet. 36 * Fill out an ACK packet.
37 */ 37 */
38static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, 38static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
39 struct rxrpc_call *call,
39 struct rxrpc_ack_buffer *pkt, 40 struct rxrpc_ack_buffer *pkt,
40 rxrpc_seq_t *_hard_ack, 41 rxrpc_seq_t *_hard_ack,
41 rxrpc_seq_t *_top, 42 rxrpc_seq_t *_top,
@@ -77,8 +78,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
77 } while (before_eq(seq, top)); 78 } while (before_eq(seq, top));
78 } 79 }
79 80
80 mtu = call->conn->params.peer->if_mtu; 81 mtu = conn->params.peer->if_mtu;
81 mtu -= call->conn->params.peer->hdrsize; 82 mtu -= conn->params.peer->hdrsize;
82 jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max; 83 jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max;
83 pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); 84 pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu);
84 pkt->ackinfo.maxMTU = htonl(mtu); 85 pkt->ackinfo.maxMTU = htonl(mtu);
@@ -148,7 +149,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping)
148 } 149 }
149 call->ackr_reason = 0; 150 call->ackr_reason = 0;
150 } 151 }
151 n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top, reason); 152 n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason);
152 153
153 spin_unlock_bh(&call->lock); 154 spin_unlock_bh(&call->lock);
154 155
@@ -221,6 +222,16 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
221 rxrpc_serial_t serial; 222 rxrpc_serial_t serial;
222 int ret; 223 int ret;
223 224
225 /* Don't bother sending aborts for a client call once the server has
226 * hard-ACK'd all of its request data. After that point, we're not
227 * going to stop the operation proceeding, and whilst we might limit
228 * the reply, it's not worth it if we can send a new call on the same
229 * channel instead, thereby closing off this call.
230 */
231 if (rxrpc_is_client_call(call) &&
232 test_bit(RXRPC_CALL_TX_LAST, &call->flags))
233 return 0;
234
224 spin_lock_bh(&call->lock); 235 spin_lock_bh(&call->lock);
225 if (call->conn) 236 if (call->conn)
226 conn = rxrpc_get_connection_maybe(call->conn); 237 conn = rxrpc_get_connection_maybe(call->conn);
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 5787f97f5330..d02a99f37f5f 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -411,3 +411,16 @@ void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call,
411 *_srx = call->peer->srx; 411 *_srx = call->peer->srx;
412} 412}
413EXPORT_SYMBOL(rxrpc_kernel_get_peer); 413EXPORT_SYMBOL(rxrpc_kernel_get_peer);
414
415/**
416 * rxrpc_kernel_get_rtt - Get a call's peer RTT
417 * @sock: The socket on which the call is in progress.
418 * @call: The call to query
419 *
420 * Get the call's peer RTT.
421 */
422u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call)
423{
424 return call->peer->rtt;
425}
426EXPORT_SYMBOL(rxrpc_kernel_get_rtt);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index bdece21f313d..8510a98b87e1 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -40,7 +40,9 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
40 sk = &rx->sk; 40 sk = &rx->sk;
41 if (rx && sk->sk_state < RXRPC_CLOSE) { 41 if (rx && sk->sk_state < RXRPC_CLOSE) {
42 if (call->notify_rx) { 42 if (call->notify_rx) {
43 spin_lock_bh(&call->notify_lock);
43 call->notify_rx(sk, call, call->user_call_ID); 44 call->notify_rx(sk, call, call->user_call_ID);
45 spin_unlock_bh(&call->notify_lock);
44 } else { 46 } else {
45 write_lock_bh(&rx->recvmsg_lock); 47 write_lock_bh(&rx->recvmsg_lock);
46 if (list_empty(&call->recvmsg_link)) { 48 if (list_empty(&call->recvmsg_link)) {
@@ -607,6 +609,7 @@ wait_error:
607 * @_offset: The running offset into the buffer. 609 * @_offset: The running offset into the buffer.
608 * @want_more: True if more data is expected to be read 610 * @want_more: True if more data is expected to be read
609 * @_abort: Where the abort code is stored if -ECONNABORTED is returned 611 * @_abort: Where the abort code is stored if -ECONNABORTED is returned
612 * @_service: Where to store the actual service ID (may be upgraded)
610 * 613 *
611 * Allow a kernel service to receive data and pick up information about the 614 * Allow a kernel service to receive data and pick up information about the
612 * state of a call. Returns 0 if got what was asked for and there's more 615 * state of a call. Returns 0 if got what was asked for and there's more
@@ -624,7 +627,7 @@ wait_error:
624 */ 627 */
625int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, 628int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
626 void *buf, size_t size, size_t *_offset, 629 void *buf, size_t size, size_t *_offset,
627 bool want_more, u32 *_abort) 630 bool want_more, u32 *_abort, u16 *_service)
628{ 631{
629 struct iov_iter iter; 632 struct iov_iter iter;
630 struct kvec iov; 633 struct kvec iov;
@@ -680,6 +683,8 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
680read_phase_complete: 683read_phase_complete:
681 ret = 1; 684 ret = 1;
682out: 685out:
686 if (_service)
687 *_service = call->service_id;
683 mutex_unlock(&call->user_mutex); 688 mutex_unlock(&call->user_mutex);
684 _leave(" = %d [%zu,%d]", ret, *_offset, *_abort); 689 _leave(" = %d [%zu,%d]", ret, *_offset, *_abort);
685 return ret; 690 return ret;
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 9ea6f972767e..7d2595582c09 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -38,12 +38,86 @@ struct rxrpc_send_params {
38}; 38};
39 39
40/* 40/*
41 * Wait for space to appear in the Tx queue or a signal to occur.
42 */
43static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
44 struct rxrpc_call *call,
45 long *timeo)
46{
47 for (;;) {
48 set_current_state(TASK_INTERRUPTIBLE);
49 if (call->tx_top - call->tx_hard_ack <
50 min_t(unsigned int, call->tx_winsize,
51 call->cong_cwnd + call->cong_extra))
52 return 0;
53
54 if (call->state >= RXRPC_CALL_COMPLETE)
55 return call->error;
56
57 if (signal_pending(current))
58 return sock_intr_errno(*timeo);
59
60 trace_rxrpc_transmit(call, rxrpc_transmit_wait);
61 mutex_unlock(&call->user_mutex);
62 *timeo = schedule_timeout(*timeo);
63 if (mutex_lock_interruptible(&call->user_mutex) < 0)
64 return sock_intr_errno(*timeo);
65 }
66}
67
68/*
69 * Wait for space to appear in the Tx queue uninterruptibly, but with
70 * a timeout of 2*RTT if no progress was made and a signal occurred.
71 */
72static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
73 struct rxrpc_call *call)
74{
75 rxrpc_seq_t tx_start, tx_win;
76 signed long rtt2, timeout;
77 u64 rtt;
78
79 rtt = READ_ONCE(call->peer->rtt);
80 rtt2 = nsecs_to_jiffies64(rtt) * 2;
81 if (rtt2 < 1)
82 rtt2 = 1;
83
84 timeout = rtt2;
85 tx_start = READ_ONCE(call->tx_hard_ack);
86
87 for (;;) {
88 set_current_state(TASK_UNINTERRUPTIBLE);
89
90 tx_win = READ_ONCE(call->tx_hard_ack);
91 if (call->tx_top - tx_win <
92 min_t(unsigned int, call->tx_winsize,
93 call->cong_cwnd + call->cong_extra))
94 return 0;
95
96 if (call->state >= RXRPC_CALL_COMPLETE)
97 return call->error;
98
99 if (timeout == 0 &&
100 tx_win == tx_start && signal_pending(current))
101 return -EINTR;
102
103 if (tx_win != tx_start) {
104 timeout = rtt2;
105 tx_start = tx_win;
106 }
107
108 trace_rxrpc_transmit(call, rxrpc_transmit_wait);
109 timeout = schedule_timeout(timeout);
110 }
111}
112
113/*
41 * wait for space to appear in the transmit/ACK window 114 * wait for space to appear in the transmit/ACK window
42 * - caller holds the socket locked 115 * - caller holds the socket locked
43 */ 116 */
44static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, 117static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
45 struct rxrpc_call *call, 118 struct rxrpc_call *call,
46 long *timeo) 119 long *timeo,
120 bool waitall)
47{ 121{
48 DECLARE_WAITQUEUE(myself, current); 122 DECLARE_WAITQUEUE(myself, current);
49 int ret; 123 int ret;
@@ -53,30 +127,10 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
53 127
54 add_wait_queue(&call->waitq, &myself); 128 add_wait_queue(&call->waitq, &myself);
55 129
56 for (;;) { 130 if (waitall)
57 set_current_state(TASK_INTERRUPTIBLE); 131 ret = rxrpc_wait_for_tx_window_nonintr(rx, call);
58 ret = 0; 132 else
59 if (call->tx_top - call->tx_hard_ack < 133 ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo);
60 min_t(unsigned int, call->tx_winsize,
61 call->cong_cwnd + call->cong_extra))
62 break;
63 if (call->state >= RXRPC_CALL_COMPLETE) {
64 ret = call->error;
65 break;
66 }
67 if (signal_pending(current)) {
68 ret = sock_intr_errno(*timeo);
69 break;
70 }
71
72 trace_rxrpc_transmit(call, rxrpc_transmit_wait);
73 mutex_unlock(&call->user_mutex);
74 *timeo = schedule_timeout(*timeo);
75 if (mutex_lock_interruptible(&call->user_mutex) < 0) {
76 ret = sock_intr_errno(*timeo);
77 break;
78 }
79 }
80 134
81 remove_wait_queue(&call->waitq, &myself); 135 remove_wait_queue(&call->waitq, &myself);
82 set_current_state(TASK_RUNNING); 136 set_current_state(TASK_RUNNING);
@@ -166,6 +220,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
166 ktime_get_real()); 220 ktime_get_real());
167 if (!last) 221 if (!last)
168 break; 222 break;
223 /* Fall through */
169 case RXRPC_CALL_SERVER_SEND_REPLY: 224 case RXRPC_CALL_SERVER_SEND_REPLY:
170 call->state = RXRPC_CALL_SERVER_AWAIT_ACK; 225 call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
171 rxrpc_notify_end_tx(rx, call, notify_end_tx); 226 rxrpc_notify_end_tx(rx, call, notify_end_tx);
@@ -254,7 +309,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
254 if (msg->msg_flags & MSG_DONTWAIT) 309 if (msg->msg_flags & MSG_DONTWAIT)
255 goto maybe_error; 310 goto maybe_error;
256 ret = rxrpc_wait_for_tx_window(rx, call, 311 ret = rxrpc_wait_for_tx_window(rx, call,
257 &timeo); 312 &timeo,
313 msg->msg_flags & MSG_WAITALL);
258 if (ret < 0) 314 if (ret < 0)
259 goto maybe_error; 315 goto maybe_error;
260 } 316 }
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index e70ed26485a2..c03d86a7775e 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -172,6 +172,17 @@ config NET_SCH_TBF
172 To compile this code as a module, choose M here: the 172 To compile this code as a module, choose M here: the
173 module will be called sch_tbf. 173 module will be called sch_tbf.
174 174
175config NET_SCH_CBS
176 tristate "Credit Based Shaper (CBS)"
177 ---help---
178 Say Y here if you want to use the Credit Based Shaper (CBS) packet
179 scheduling algorithm.
180
181 See the top of <file:net/sched/sch_cbs.c> for more details.
182
183 To compile this code as a module, choose M here: the
184 module will be called sch_cbs.
185
175config NET_SCH_GRED 186config NET_SCH_GRED
176 tristate "Generic Random Early Detection (GRED)" 187 tristate "Generic Random Early Detection (GRED)"
177 ---help--- 188 ---help---
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 7b915d226de7..5b635447e3f8 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the Linux Traffic Control Unit. 3# Makefile for the Linux Traffic Control Unit.
3# 4#
@@ -52,6 +53,7 @@ obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o
52obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o 53obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o
53obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o 54obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o
54obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o 55obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o
56obj-$(CONFIG_NET_SCH_CBS) += sch_cbs.o
55 57
56obj-$(CONFIG_NET_CLS_U32) += cls_u32.o 58obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
57obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o 59obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index da6fa82c98a8..4d33a50a8a6d 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -21,6 +21,8 @@
21#include <linux/kmod.h> 21#include <linux/kmod.h>
22#include <linux/err.h> 22#include <linux/err.h>
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/rhashtable.h>
25#include <linux/list.h>
24#include <net/net_namespace.h> 26#include <net/net_namespace.h>
25#include <net/sock.h> 27#include <net/sock.h>
26#include <net/sch_generic.h> 28#include <net/sch_generic.h>
@@ -86,6 +88,8 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
86{ 88{
87 int ret = 0; 89 int ret = 0;
88 90
91 ASSERT_RTNL();
92
89 if (p) { 93 if (p) {
90 if (bind) 94 if (bind)
91 p->tcfa_bindcnt--; 95 p->tcfa_bindcnt--;
@@ -1249,8 +1253,227 @@ out_module_put:
1249 return skb->len; 1253 return skb->len;
1250} 1254}
1251 1255
1256struct tcf_action_net {
1257 struct rhashtable egdev_ht;
1258};
1259
1260static unsigned int tcf_action_net_id;
1261
1262struct tcf_action_egdev_cb {
1263 struct list_head list;
1264 tc_setup_cb_t *cb;
1265 void *cb_priv;
1266};
1267
1268struct tcf_action_egdev {
1269 struct rhash_head ht_node;
1270 const struct net_device *dev;
1271 unsigned int refcnt;
1272 struct list_head cb_list;
1273};
1274
1275static const struct rhashtable_params tcf_action_egdev_ht_params = {
1276 .key_offset = offsetof(struct tcf_action_egdev, dev),
1277 .head_offset = offsetof(struct tcf_action_egdev, ht_node),
1278 .key_len = sizeof(const struct net_device *),
1279};
1280
1281static struct tcf_action_egdev *
1282tcf_action_egdev_lookup(const struct net_device *dev)
1283{
1284 struct net *net = dev_net(dev);
1285 struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
1286
1287 return rhashtable_lookup_fast(&tan->egdev_ht, &dev,
1288 tcf_action_egdev_ht_params);
1289}
1290
1291static struct tcf_action_egdev *
1292tcf_action_egdev_get(const struct net_device *dev)
1293{
1294 struct tcf_action_egdev *egdev;
1295 struct tcf_action_net *tan;
1296
1297 egdev = tcf_action_egdev_lookup(dev);
1298 if (egdev)
1299 goto inc_ref;
1300
1301 egdev = kzalloc(sizeof(*egdev), GFP_KERNEL);
1302 if (!egdev)
1303 return NULL;
1304 INIT_LIST_HEAD(&egdev->cb_list);
1305 egdev->dev = dev;
1306 tan = net_generic(dev_net(dev), tcf_action_net_id);
1307 rhashtable_insert_fast(&tan->egdev_ht, &egdev->ht_node,
1308 tcf_action_egdev_ht_params);
1309
1310inc_ref:
1311 egdev->refcnt++;
1312 return egdev;
1313}
1314
1315static void tcf_action_egdev_put(struct tcf_action_egdev *egdev)
1316{
1317 struct tcf_action_net *tan;
1318
1319 if (--egdev->refcnt)
1320 return;
1321 tan = net_generic(dev_net(egdev->dev), tcf_action_net_id);
1322 rhashtable_remove_fast(&tan->egdev_ht, &egdev->ht_node,
1323 tcf_action_egdev_ht_params);
1324 kfree(egdev);
1325}
1326
1327static struct tcf_action_egdev_cb *
1328tcf_action_egdev_cb_lookup(struct tcf_action_egdev *egdev,
1329 tc_setup_cb_t *cb, void *cb_priv)
1330{
1331 struct tcf_action_egdev_cb *egdev_cb;
1332
1333 list_for_each_entry(egdev_cb, &egdev->cb_list, list)
1334 if (egdev_cb->cb == cb && egdev_cb->cb_priv == cb_priv)
1335 return egdev_cb;
1336 return NULL;
1337}
1338
1339static int tcf_action_egdev_cb_call(struct tcf_action_egdev *egdev,
1340 enum tc_setup_type type,
1341 void *type_data, bool err_stop)
1342{
1343 struct tcf_action_egdev_cb *egdev_cb;
1344 int ok_count = 0;
1345 int err;
1346
1347 list_for_each_entry(egdev_cb, &egdev->cb_list, list) {
1348 err = egdev_cb->cb(type, type_data, egdev_cb->cb_priv);
1349 if (err) {
1350 if (err_stop)
1351 return err;
1352 } else {
1353 ok_count++;
1354 }
1355 }
1356 return ok_count;
1357}
1358
1359static int tcf_action_egdev_cb_add(struct tcf_action_egdev *egdev,
1360 tc_setup_cb_t *cb, void *cb_priv)
1361{
1362 struct tcf_action_egdev_cb *egdev_cb;
1363
1364 egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
1365 if (WARN_ON(egdev_cb))
1366 return -EEXIST;
1367 egdev_cb = kzalloc(sizeof(*egdev_cb), GFP_KERNEL);
1368 if (!egdev_cb)
1369 return -ENOMEM;
1370 egdev_cb->cb = cb;
1371 egdev_cb->cb_priv = cb_priv;
1372 list_add(&egdev_cb->list, &egdev->cb_list);
1373 return 0;
1374}
1375
1376static void tcf_action_egdev_cb_del(struct tcf_action_egdev *egdev,
1377 tc_setup_cb_t *cb, void *cb_priv)
1378{
1379 struct tcf_action_egdev_cb *egdev_cb;
1380
1381 egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
1382 if (WARN_ON(!egdev_cb))
1383 return;
1384 list_del(&egdev_cb->list);
1385 kfree(egdev_cb);
1386}
1387
1388static int __tc_setup_cb_egdev_register(const struct net_device *dev,
1389 tc_setup_cb_t *cb, void *cb_priv)
1390{
1391 struct tcf_action_egdev *egdev = tcf_action_egdev_get(dev);
1392 int err;
1393
1394 if (!egdev)
1395 return -ENOMEM;
1396 err = tcf_action_egdev_cb_add(egdev, cb, cb_priv);
1397 if (err)
1398 goto err_cb_add;
1399 return 0;
1400
1401err_cb_add:
1402 tcf_action_egdev_put(egdev);
1403 return err;
1404}
1405int tc_setup_cb_egdev_register(const struct net_device *dev,
1406 tc_setup_cb_t *cb, void *cb_priv)
1407{
1408 int err;
1409
1410 rtnl_lock();
1411 err = __tc_setup_cb_egdev_register(dev, cb, cb_priv);
1412 rtnl_unlock();
1413 return err;
1414}
1415EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_register);
1416
1417static void __tc_setup_cb_egdev_unregister(const struct net_device *dev,
1418 tc_setup_cb_t *cb, void *cb_priv)
1419{
1420 struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
1421
1422 if (WARN_ON(!egdev))
1423 return;
1424 tcf_action_egdev_cb_del(egdev, cb, cb_priv);
1425 tcf_action_egdev_put(egdev);
1426}
1427void tc_setup_cb_egdev_unregister(const struct net_device *dev,
1428 tc_setup_cb_t *cb, void *cb_priv)
1429{
1430 rtnl_lock();
1431 __tc_setup_cb_egdev_unregister(dev, cb, cb_priv);
1432 rtnl_unlock();
1433}
1434EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_unregister);
1435
1436int tc_setup_cb_egdev_call(const struct net_device *dev,
1437 enum tc_setup_type type, void *type_data,
1438 bool err_stop)
1439{
1440 struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
1441
1442 if (!egdev)
1443 return 0;
1444 return tcf_action_egdev_cb_call(egdev, type, type_data, err_stop);
1445}
1446EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_call);
1447
1448static __net_init int tcf_action_net_init(struct net *net)
1449{
1450 struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
1451
1452 return rhashtable_init(&tan->egdev_ht, &tcf_action_egdev_ht_params);
1453}
1454
1455static void __net_exit tcf_action_net_exit(struct net *net)
1456{
1457 struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
1458
1459 rhashtable_destroy(&tan->egdev_ht);
1460}
1461
1462static struct pernet_operations tcf_action_net_ops = {
1463 .init = tcf_action_net_init,
1464 .exit = tcf_action_net_exit,
1465 .id = &tcf_action_net_id,
1466 .size = sizeof(struct tcf_action_net),
1467};
1468
1252static int __init tc_action_init(void) 1469static int __init tc_action_init(void)
1253{ 1470{
1471 int err;
1472
1473 err = register_pernet_subsys(&tcf_action_net_ops);
1474 if (err)
1475 return err;
1476
1254 rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0); 1477 rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0);
1255 rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0); 1478 rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0);
1256 rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action, 1479 rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action,
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index c0c707eb2c96..5ef8ce8c83d4 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -49,11 +49,11 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
49 filter = rcu_dereference(prog->filter); 49 filter = rcu_dereference(prog->filter);
50 if (at_ingress) { 50 if (at_ingress) {
51 __skb_push(skb, skb->mac_len); 51 __skb_push(skb, skb->mac_len);
52 bpf_compute_data_end(skb); 52 bpf_compute_data_pointers(skb);
53 filter_res = BPF_PROG_RUN(filter, skb); 53 filter_res = BPF_PROG_RUN(filter, skb);
54 __skb_pull(skb, skb->mac_len); 54 __skb_pull(skb, skb->mac_len);
55 } else { 55 } else {
56 bpf_compute_data_end(skb); 56 bpf_compute_data_pointers(skb);
57 filter_res = BPF_PROG_RUN(filter, skb); 57 filter_res = BPF_PROG_RUN(filter, skb);
58 } 58 }
59 rcu_read_unlock(); 59 rcu_read_unlock();
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 1c40caadcff9..d836f998117b 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -229,6 +229,9 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl,
229 const struct iphdr *iph; 229 const struct iphdr *iph;
230 u16 ul; 230 u16 ul;
231 231
232 if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
233 return 1;
234
232 /* 235 /*
233 * Support both UDP and UDPLITE checksum algorithms, Don't use 236 * Support both UDP and UDPLITE checksum algorithms, Don't use
234 * udph->len to get the real length without any protocol check, 237 * udph->len to get the real length without any protocol check,
@@ -282,6 +285,9 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl,
282 const struct ipv6hdr *ip6h; 285 const struct ipv6hdr *ip6h;
283 u16 ul; 286 u16 ul;
284 287
288 if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
289 return 1;
290
285 /* 291 /*
286 * Support both UDP and UDPLITE checksum algorithms, Don't use 292 * Support both UDP and UDPLITE checksum algorithms, Don't use
287 * udph->len to get the real length without any protocol check, 293 * udph->len to get the real length without any protocol check,
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 8ccd35825b6b..3007cb1310ea 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -248,6 +248,22 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len)
248 return ret; 248 return ret;
249} 249}
250 250
251#ifdef CONFIG_MODULES
252static const char *ife_meta_id2name(u32 metaid)
253{
254 switch (metaid) {
255 case IFE_META_SKBMARK:
256 return "skbmark";
257 case IFE_META_PRIO:
258 return "skbprio";
259 case IFE_META_TCINDEX:
260 return "tcindex";
261 default:
262 return "unknown";
263 }
264}
265#endif
266
251/* called when adding new meta information 267/* called when adding new meta information
252 * under ife->tcf_lock for existing action 268 * under ife->tcf_lock for existing action
253*/ 269*/
@@ -263,7 +279,7 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
263 if (exists) 279 if (exists)
264 spin_unlock_bh(&ife->tcf_lock); 280 spin_unlock_bh(&ife->tcf_lock);
265 rtnl_unlock(); 281 rtnl_unlock();
266 request_module("ifemeta%u", metaid); 282 request_module("ife-meta-%s", ife_meta_id2name(metaid));
267 rtnl_lock(); 283 rtnl_lock();
268 if (exists) 284 if (exists)
269 spin_lock_bh(&ife->tcf_lock); 285 spin_lock_bh(&ife->tcf_lock);
@@ -392,10 +408,14 @@ static void _tcf_ife_cleanup(struct tc_action *a, int bind)
392static void tcf_ife_cleanup(struct tc_action *a, int bind) 408static void tcf_ife_cleanup(struct tc_action *a, int bind)
393{ 409{
394 struct tcf_ife_info *ife = to_ife(a); 410 struct tcf_ife_info *ife = to_ife(a);
411 struct tcf_ife_params *p;
395 412
396 spin_lock_bh(&ife->tcf_lock); 413 spin_lock_bh(&ife->tcf_lock);
397 _tcf_ife_cleanup(a, bind); 414 _tcf_ife_cleanup(a, bind);
398 spin_unlock_bh(&ife->tcf_lock); 415 spin_unlock_bh(&ife->tcf_lock);
416
417 p = rcu_dereference_protected(ife->params, 1);
418 kfree_rcu(p, rcu);
399} 419}
400 420
401/* under ife->tcf_lock for existing action */ 421/* under ife->tcf_lock for existing action */
@@ -432,6 +452,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
432 struct tc_action_net *tn = net_generic(net, ife_net_id); 452 struct tc_action_net *tn = net_generic(net, ife_net_id);
433 struct nlattr *tb[TCA_IFE_MAX + 1]; 453 struct nlattr *tb[TCA_IFE_MAX + 1];
434 struct nlattr *tb2[IFE_META_MAX + 1]; 454 struct nlattr *tb2[IFE_META_MAX + 1];
455 struct tcf_ife_params *p, *p_old;
435 struct tcf_ife_info *ife; 456 struct tcf_ife_info *ife;
436 u16 ife_type = ETH_P_IFE; 457 u16 ife_type = ETH_P_IFE;
437 struct tc_ife *parm; 458 struct tc_ife *parm;
@@ -450,24 +471,41 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
450 471
451 parm = nla_data(tb[TCA_IFE_PARMS]); 472 parm = nla_data(tb[TCA_IFE_PARMS]);
452 473
474 /* IFE_DECODE is 0 and indicates the opposite of IFE_ENCODE because
475 * they cannot run as the same time. Check on all other values which
476 * are not supported right now.
477 */
478 if (parm->flags & ~IFE_ENCODE)
479 return -EINVAL;
480
481 p = kzalloc(sizeof(*p), GFP_KERNEL);
482 if (!p)
483 return -ENOMEM;
484
453 exists = tcf_idr_check(tn, parm->index, a, bind); 485 exists = tcf_idr_check(tn, parm->index, a, bind);
454 if (exists && bind) 486 if (exists && bind) {
487 kfree(p);
455 return 0; 488 return 0;
489 }
456 490
457 if (!exists) { 491 if (!exists) {
458 ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops, 492 ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops,
459 bind, false); 493 bind, true);
460 if (ret) 494 if (ret) {
495 kfree(p);
461 return ret; 496 return ret;
497 }
462 ret = ACT_P_CREATED; 498 ret = ACT_P_CREATED;
463 } else { 499 } else {
464 tcf_idr_release(*a, bind); 500 tcf_idr_release(*a, bind);
465 if (!ovr) 501 if (!ovr) {
502 kfree(p);
466 return -EEXIST; 503 return -EEXIST;
504 }
467 } 505 }
468 506
469 ife = to_ife(*a); 507 ife = to_ife(*a);
470 ife->flags = parm->flags; 508 p->flags = parm->flags;
471 509
472 if (parm->flags & IFE_ENCODE) { 510 if (parm->flags & IFE_ENCODE) {
473 if (tb[TCA_IFE_TYPE]) 511 if (tb[TCA_IFE_TYPE])
@@ -478,24 +516,25 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
478 saddr = nla_data(tb[TCA_IFE_SMAC]); 516 saddr = nla_data(tb[TCA_IFE_SMAC]);
479 } 517 }
480 518
481 if (exists)
482 spin_lock_bh(&ife->tcf_lock);
483 ife->tcf_action = parm->action; 519 ife->tcf_action = parm->action;
484 520
485 if (parm->flags & IFE_ENCODE) { 521 if (parm->flags & IFE_ENCODE) {
486 if (daddr) 522 if (daddr)
487 ether_addr_copy(ife->eth_dst, daddr); 523 ether_addr_copy(p->eth_dst, daddr);
488 else 524 else
489 eth_zero_addr(ife->eth_dst); 525 eth_zero_addr(p->eth_dst);
490 526
491 if (saddr) 527 if (saddr)
492 ether_addr_copy(ife->eth_src, saddr); 528 ether_addr_copy(p->eth_src, saddr);
493 else 529 else
494 eth_zero_addr(ife->eth_src); 530 eth_zero_addr(p->eth_src);
495 531
496 ife->eth_type = ife_type; 532 p->eth_type = ife_type;
497 } 533 }
498 534
535 if (exists)
536 spin_lock_bh(&ife->tcf_lock);
537
499 if (ret == ACT_P_CREATED) 538 if (ret == ACT_P_CREATED)
500 INIT_LIST_HEAD(&ife->metalist); 539 INIT_LIST_HEAD(&ife->metalist);
501 540
@@ -511,6 +550,7 @@ metadata_parse_err:
511 550
512 if (exists) 551 if (exists)
513 spin_unlock_bh(&ife->tcf_lock); 552 spin_unlock_bh(&ife->tcf_lock);
553 kfree(p);
514 return err; 554 return err;
515 } 555 }
516 556
@@ -531,6 +571,7 @@ metadata_parse_err:
531 571
532 if (exists) 572 if (exists)
533 spin_unlock_bh(&ife->tcf_lock); 573 spin_unlock_bh(&ife->tcf_lock);
574 kfree(p);
534 return err; 575 return err;
535 } 576 }
536 } 577 }
@@ -538,6 +579,11 @@ metadata_parse_err:
538 if (exists) 579 if (exists)
539 spin_unlock_bh(&ife->tcf_lock); 580 spin_unlock_bh(&ife->tcf_lock);
540 581
582 p_old = rtnl_dereference(ife->params);
583 rcu_assign_pointer(ife->params, p);
584 if (p_old)
585 kfree_rcu(p_old, rcu);
586
541 if (ret == ACT_P_CREATED) 587 if (ret == ACT_P_CREATED)
542 tcf_idr_insert(tn, *a); 588 tcf_idr_insert(tn, *a);
543 589
@@ -549,12 +595,13 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
549{ 595{
550 unsigned char *b = skb_tail_pointer(skb); 596 unsigned char *b = skb_tail_pointer(skb);
551 struct tcf_ife_info *ife = to_ife(a); 597 struct tcf_ife_info *ife = to_ife(a);
598 struct tcf_ife_params *p = rtnl_dereference(ife->params);
552 struct tc_ife opt = { 599 struct tc_ife opt = {
553 .index = ife->tcf_index, 600 .index = ife->tcf_index,
554 .refcnt = ife->tcf_refcnt - ref, 601 .refcnt = ife->tcf_refcnt - ref,
555 .bindcnt = ife->tcf_bindcnt - bind, 602 .bindcnt = ife->tcf_bindcnt - bind,
556 .action = ife->tcf_action, 603 .action = ife->tcf_action,
557 .flags = ife->flags, 604 .flags = p->flags,
558 }; 605 };
559 struct tcf_t t; 606 struct tcf_t t;
560 607
@@ -565,17 +612,17 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
565 if (nla_put_64bit(skb, TCA_IFE_TM, sizeof(t), &t, TCA_IFE_PAD)) 612 if (nla_put_64bit(skb, TCA_IFE_TM, sizeof(t), &t, TCA_IFE_PAD))
566 goto nla_put_failure; 613 goto nla_put_failure;
567 614
568 if (!is_zero_ether_addr(ife->eth_dst)) { 615 if (!is_zero_ether_addr(p->eth_dst)) {
569 if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, ife->eth_dst)) 616 if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, p->eth_dst))
570 goto nla_put_failure; 617 goto nla_put_failure;
571 } 618 }
572 619
573 if (!is_zero_ether_addr(ife->eth_src)) { 620 if (!is_zero_ether_addr(p->eth_src)) {
574 if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, ife->eth_src)) 621 if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, p->eth_src))
575 goto nla_put_failure; 622 goto nla_put_failure;
576 } 623 }
577 624
578 if (nla_put(skb, TCA_IFE_TYPE, 2, &ife->eth_type)) 625 if (nla_put(skb, TCA_IFE_TYPE, 2, &p->eth_type))
579 goto nla_put_failure; 626 goto nla_put_failure;
580 627
581 if (dump_metalist(skb, ife)) { 628 if (dump_metalist(skb, ife)) {
@@ -617,19 +664,15 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
617 u8 *tlv_data; 664 u8 *tlv_data;
618 u16 metalen; 665 u16 metalen;
619 666
620 spin_lock(&ife->tcf_lock); 667 bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
621 bstats_update(&ife->tcf_bstats, skb);
622 tcf_lastuse_update(&ife->tcf_tm); 668 tcf_lastuse_update(&ife->tcf_tm);
623 spin_unlock(&ife->tcf_lock);
624 669
625 if (skb_at_tc_ingress(skb)) 670 if (skb_at_tc_ingress(skb))
626 skb_push(skb, skb->dev->hard_header_len); 671 skb_push(skb, skb->dev->hard_header_len);
627 672
628 tlv_data = ife_decode(skb, &metalen); 673 tlv_data = ife_decode(skb, &metalen);
629 if (unlikely(!tlv_data)) { 674 if (unlikely(!tlv_data)) {
630 spin_lock(&ife->tcf_lock); 675 qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
631 ife->tcf_qstats.drops++;
632 spin_unlock(&ife->tcf_lock);
633 return TC_ACT_SHOT; 676 return TC_ACT_SHOT;
634 } 677 }
635 678
@@ -647,14 +690,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
647 */ 690 */
648 pr_info_ratelimited("Unknown metaid %d dlen %d\n", 691 pr_info_ratelimited("Unknown metaid %d dlen %d\n",
649 mtype, dlen); 692 mtype, dlen);
650 ife->tcf_qstats.overlimits++; 693 qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats));
651 } 694 }
652 } 695 }
653 696
654 if (WARN_ON(tlv_data != ifehdr_end)) { 697 if (WARN_ON(tlv_data != ifehdr_end)) {
655 spin_lock(&ife->tcf_lock); 698 qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
656 ife->tcf_qstats.drops++;
657 spin_unlock(&ife->tcf_lock);
658 return TC_ACT_SHOT; 699 return TC_ACT_SHOT;
659 } 700 }
660 701
@@ -683,7 +724,7 @@ static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_info *ife)
683} 724}
684 725
685static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, 726static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
686 struct tcf_result *res) 727 struct tcf_result *res, struct tcf_ife_params *p)
687{ 728{
688 struct tcf_ife_info *ife = to_ife(a); 729 struct tcf_ife_info *ife = to_ife(a);
689 int action = ife->tcf_action; 730 int action = ife->tcf_action;
@@ -706,23 +747,20 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
706 exceed_mtu = true; 747 exceed_mtu = true;
707 } 748 }
708 749
709 spin_lock(&ife->tcf_lock); 750 bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
710 bstats_update(&ife->tcf_bstats, skb);
711 tcf_lastuse_update(&ife->tcf_tm); 751 tcf_lastuse_update(&ife->tcf_tm);
712 752
713 if (!metalen) { /* no metadata to send */ 753 if (!metalen) { /* no metadata to send */
714 /* abuse overlimits to count when we allow packet 754 /* abuse overlimits to count when we allow packet
715 * with no metadata 755 * with no metadata
716 */ 756 */
717 ife->tcf_qstats.overlimits++; 757 qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats));
718 spin_unlock(&ife->tcf_lock);
719 return action; 758 return action;
720 } 759 }
721 /* could be stupid policy setup or mtu config 760 /* could be stupid policy setup or mtu config
722 * so lets be conservative.. */ 761 * so lets be conservative.. */
723 if ((action == TC_ACT_SHOT) || exceed_mtu) { 762 if ((action == TC_ACT_SHOT) || exceed_mtu) {
724 ife->tcf_qstats.drops++; 763 qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
725 spin_unlock(&ife->tcf_lock);
726 return TC_ACT_SHOT; 764 return TC_ACT_SHOT;
727 } 765 }
728 766
@@ -731,6 +769,8 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
731 769
732 ife_meta = ife_encode(skb, metalen); 770 ife_meta = ife_encode(skb, metalen);
733 771
772 spin_lock(&ife->tcf_lock);
773
734 /* XXX: we dont have a clever way of telling encode to 774 /* XXX: we dont have a clever way of telling encode to
735 * not repeat some of the computations that are done by 775 * not repeat some of the computations that are done by
736 * ops->presence_check... 776 * ops->presence_check...
@@ -742,25 +782,24 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
742 } 782 }
743 if (err < 0) { 783 if (err < 0) {
744 /* too corrupt to keep around if overwritten */ 784 /* too corrupt to keep around if overwritten */
745 ife->tcf_qstats.drops++;
746 spin_unlock(&ife->tcf_lock); 785 spin_unlock(&ife->tcf_lock);
786 qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
747 return TC_ACT_SHOT; 787 return TC_ACT_SHOT;
748 } 788 }
749 skboff += err; 789 skboff += err;
750 } 790 }
791 spin_unlock(&ife->tcf_lock);
751 oethh = (struct ethhdr *)skb->data; 792 oethh = (struct ethhdr *)skb->data;
752 793
753 if (!is_zero_ether_addr(ife->eth_src)) 794 if (!is_zero_ether_addr(p->eth_src))
754 ether_addr_copy(oethh->h_source, ife->eth_src); 795 ether_addr_copy(oethh->h_source, p->eth_src);
755 if (!is_zero_ether_addr(ife->eth_dst)) 796 if (!is_zero_ether_addr(p->eth_dst))
756 ether_addr_copy(oethh->h_dest, ife->eth_dst); 797 ether_addr_copy(oethh->h_dest, p->eth_dst);
757 oethh->h_proto = htons(ife->eth_type); 798 oethh->h_proto = htons(p->eth_type);
758 799
759 if (skb_at_tc_ingress(skb)) 800 if (skb_at_tc_ingress(skb))
760 skb_pull(skb, skb->dev->hard_header_len); 801 skb_pull(skb, skb->dev->hard_header_len);
761 802
762 spin_unlock(&ife->tcf_lock);
763
764 return action; 803 return action;
765} 804}
766 805
@@ -768,21 +807,19 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
768 struct tcf_result *res) 807 struct tcf_result *res)
769{ 808{
770 struct tcf_ife_info *ife = to_ife(a); 809 struct tcf_ife_info *ife = to_ife(a);
810 struct tcf_ife_params *p;
811 int ret;
812
813 rcu_read_lock();
814 p = rcu_dereference(ife->params);
815 if (p->flags & IFE_ENCODE) {
816 ret = tcf_ife_encode(skb, a, res, p);
817 rcu_read_unlock();
818 return ret;
819 }
820 rcu_read_unlock();
771 821
772 if (ife->flags & IFE_ENCODE) 822 return tcf_ife_decode(skb, a, res);
773 return tcf_ife_encode(skb, a, res);
774
775 if (!(ife->flags & IFE_ENCODE))
776 return tcf_ife_decode(skb, a, res);
777
778 pr_info_ratelimited("unknown failure(policy neither de/encode\n");
779 spin_lock(&ife->tcf_lock);
780 bstats_update(&ife->tcf_bstats, skb);
781 tcf_lastuse_update(&ife->tcf_tm);
782 ife->tcf_qstats.drops++;
783 spin_unlock(&ife->tcf_lock);
784
785 return TC_ACT_SHOT;
786} 823}
787 824
788static int tcf_ife_walker(struct net *net, struct sk_buff *skb, 825static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c
index 82892170ce4f..1e3f10e5da99 100644
--- a/net/sched/act_meta_mark.c
+++ b/net/sched/act_meta_mark.c
@@ -76,4 +76,4 @@ module_exit(ifemark_cleanup_module);
76MODULE_AUTHOR("Jamal Hadi Salim(2015)"); 76MODULE_AUTHOR("Jamal Hadi Salim(2015)");
77MODULE_DESCRIPTION("Inter-FE skb mark metadata module"); 77MODULE_DESCRIPTION("Inter-FE skb mark metadata module");
78MODULE_LICENSE("GPL"); 78MODULE_LICENSE("GPL");
79MODULE_ALIAS_IFE_META(IFE_META_SKBMARK); 79MODULE_ALIAS_IFE_META("skbmark");
diff --git a/net/sched/act_meta_skbprio.c b/net/sched/act_meta_skbprio.c
index 26bf4d86030b..4033f9fc4d4a 100644
--- a/net/sched/act_meta_skbprio.c
+++ b/net/sched/act_meta_skbprio.c
@@ -73,4 +73,4 @@ module_exit(ifeprio_cleanup_module);
73MODULE_AUTHOR("Jamal Hadi Salim(2015)"); 73MODULE_AUTHOR("Jamal Hadi Salim(2015)");
74MODULE_DESCRIPTION("Inter-FE skb prio metadata action"); 74MODULE_DESCRIPTION("Inter-FE skb prio metadata action");
75MODULE_LICENSE("GPL"); 75MODULE_LICENSE("GPL");
76MODULE_ALIAS_IFE_META(IFE_META_PRIO); 76MODULE_ALIAS_IFE_META("skbprio");
diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c
index 3b35774ce890..2ea1f26c9e96 100644
--- a/net/sched/act_meta_skbtcindex.c
+++ b/net/sched/act_meta_skbtcindex.c
@@ -76,4 +76,4 @@ module_exit(ifetc_index_cleanup_module);
76MODULE_AUTHOR("Jamal Hadi Salim(2016)"); 76MODULE_AUTHOR("Jamal Hadi Salim(2016)");
77MODULE_DESCRIPTION("Inter-FE skb tc_index metadata module"); 77MODULE_DESCRIPTION("Inter-FE skb tc_index metadata module");
78MODULE_LICENSE("GPL"); 78MODULE_LICENSE("GPL");
79MODULE_ALIAS_IFE_META(IFE_META_SKBTCINDEX); 79MODULE_ALIAS_IFE_META("tcindex");
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 416627c66f08..8b3e59388480 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -140,6 +140,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
140 m->tcfm_eaction = parm->eaction; 140 m->tcfm_eaction = parm->eaction;
141 if (dev != NULL) { 141 if (dev != NULL) {
142 m->tcfm_ifindex = parm->ifindex; 142 m->tcfm_ifindex = parm->ifindex;
143 m->net = net;
143 if (ret != ACT_P_CREATED) 144 if (ret != ACT_P_CREATED)
144 dev_put(rcu_dereference_protected(m->tcfm_dev, 1)); 145 dev_put(rcu_dereference_protected(m->tcfm_dev, 1));
145 dev_hold(dev); 146 dev_hold(dev);
@@ -313,15 +314,11 @@ static struct notifier_block mirred_device_notifier = {
313 .notifier_call = mirred_device_event, 314 .notifier_call = mirred_device_event,
314}; 315};
315 316
316static int tcf_mirred_device(const struct tc_action *a, struct net *net, 317static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
317 struct net_device **mirred_dev)
318{ 318{
319 int ifindex = tcf_mirred_ifindex(a); 319 struct tcf_mirred *m = to_mirred(a);
320 320
321 *mirred_dev = __dev_get_by_index(net, ifindex); 321 return __dev_get_by_index(m->net, m->tcfm_ifindex);
322 if (!*mirred_dev)
323 return -EINVAL;
324 return 0;
325} 322}
326 323
327static struct tc_action_ops act_mirred_ops = { 324static struct tc_action_ops act_mirred_ops = {
@@ -336,7 +333,7 @@ static struct tc_action_ops act_mirred_ops = {
336 .walk = tcf_mirred_walker, 333 .walk = tcf_mirred_walker,
337 .lookup = tcf_mirred_search, 334 .lookup = tcf_mirred_search,
338 .size = sizeof(struct tcf_mirred), 335 .size = sizeof(struct tcf_mirred),
339 .get_dev = tcf_mirred_device, 336 .get_dev = tcf_mirred_get_dev,
340}; 337};
341 338
342static __net_init int mirred_init_net(struct net *net) 339static __net_init int mirred_init_net(struct net *net)
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index ec986ae52808..8b5abcd2f32f 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -264,12 +264,13 @@ static int __init sample_init_module(void)
264 264
265static void __exit sample_cleanup_module(void) 265static void __exit sample_cleanup_module(void)
266{ 266{
267 rcu_barrier();
267 tcf_unregister_action(&act_sample_ops, &sample_net_ops); 268 tcf_unregister_action(&act_sample_ops, &sample_net_ops);
268} 269}
269 270
270module_init(sample_init_module); 271module_init(sample_init_module);
271module_exit(sample_cleanup_module); 272module_exit(sample_cleanup_module);
272 273
273MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>"); 274MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>");
274MODULE_DESCRIPTION("Packet sampling action"); 275MODULE_DESCRIPTION("Packet sampling action");
275MODULE_LICENSE("GPL v2"); 276MODULE_LICENSE("GPL v2");
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 16eb067a8d8f..97f717a13ad5 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -26,14 +26,13 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
26 struct tcf_result *res) 26 struct tcf_result *res)
27{ 27{
28 struct tcf_vlan *v = to_vlan(a); 28 struct tcf_vlan *v = to_vlan(a);
29 struct tcf_vlan_params *p;
29 int action; 30 int action;
30 int err; 31 int err;
31 u16 tci; 32 u16 tci;
32 33
33 spin_lock(&v->tcf_lock);
34 tcf_lastuse_update(&v->tcf_tm); 34 tcf_lastuse_update(&v->tcf_tm);
35 bstats_update(&v->tcf_bstats, skb); 35 bstats_cpu_update(this_cpu_ptr(v->common.cpu_bstats), skb);
36 action = v->tcf_action;
37 36
38 /* Ensure 'data' points at mac_header prior calling vlan manipulating 37 /* Ensure 'data' points at mac_header prior calling vlan manipulating
39 * functions. 38 * functions.
@@ -41,15 +40,21 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
41 if (skb_at_tc_ingress(skb)) 40 if (skb_at_tc_ingress(skb))
42 skb_push_rcsum(skb, skb->mac_len); 41 skb_push_rcsum(skb, skb->mac_len);
43 42
44 switch (v->tcfv_action) { 43 rcu_read_lock();
44
45 action = READ_ONCE(v->tcf_action);
46
47 p = rcu_dereference(v->vlan_p);
48
49 switch (p->tcfv_action) {
45 case TCA_VLAN_ACT_POP: 50 case TCA_VLAN_ACT_POP:
46 err = skb_vlan_pop(skb); 51 err = skb_vlan_pop(skb);
47 if (err) 52 if (err)
48 goto drop; 53 goto drop;
49 break; 54 break;
50 case TCA_VLAN_ACT_PUSH: 55 case TCA_VLAN_ACT_PUSH:
51 err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid | 56 err = skb_vlan_push(skb, p->tcfv_push_proto, p->tcfv_push_vid |
52 (v->tcfv_push_prio << VLAN_PRIO_SHIFT)); 57 (p->tcfv_push_prio << VLAN_PRIO_SHIFT));
53 if (err) 58 if (err)
54 goto drop; 59 goto drop;
55 break; 60 break;
@@ -68,14 +73,14 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
68 goto drop; 73 goto drop;
69 } 74 }
70 /* replace the vid */ 75 /* replace the vid */
71 tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid; 76 tci = (tci & ~VLAN_VID_MASK) | p->tcfv_push_vid;
72 /* replace prio bits, if tcfv_push_prio specified */ 77 /* replace prio bits, if tcfv_push_prio specified */
73 if (v->tcfv_push_prio) { 78 if (p->tcfv_push_prio) {
74 tci &= ~VLAN_PRIO_MASK; 79 tci &= ~VLAN_PRIO_MASK;
75 tci |= v->tcfv_push_prio << VLAN_PRIO_SHIFT; 80 tci |= p->tcfv_push_prio << VLAN_PRIO_SHIFT;
76 } 81 }
77 /* put updated tci as hwaccel tag */ 82 /* put updated tci as hwaccel tag */
78 __vlan_hwaccel_put_tag(skb, v->tcfv_push_proto, tci); 83 __vlan_hwaccel_put_tag(skb, p->tcfv_push_proto, tci);
79 break; 84 break;
80 default: 85 default:
81 BUG(); 86 BUG();
@@ -85,12 +90,13 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
85 90
86drop: 91drop:
87 action = TC_ACT_SHOT; 92 action = TC_ACT_SHOT;
88 v->tcf_qstats.drops++; 93 qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
94
89unlock: 95unlock:
96 rcu_read_unlock();
90 if (skb_at_tc_ingress(skb)) 97 if (skb_at_tc_ingress(skb))
91 skb_pull_rcsum(skb, skb->mac_len); 98 skb_pull_rcsum(skb, skb->mac_len);
92 99
93 spin_unlock(&v->tcf_lock);
94 return action; 100 return action;
95} 101}
96 102
@@ -107,6 +113,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
107{ 113{
108 struct tc_action_net *tn = net_generic(net, vlan_net_id); 114 struct tc_action_net *tn = net_generic(net, vlan_net_id);
109 struct nlattr *tb[TCA_VLAN_MAX + 1]; 115 struct nlattr *tb[TCA_VLAN_MAX + 1];
116 struct tcf_vlan_params *p, *p_old;
110 struct tc_vlan *parm; 117 struct tc_vlan *parm;
111 struct tcf_vlan *v; 118 struct tcf_vlan *v;
112 int action; 119 int action;
@@ -172,7 +179,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
172 179
173 if (!exists) { 180 if (!exists) {
174 ret = tcf_idr_create(tn, parm->index, est, a, 181 ret = tcf_idr_create(tn, parm->index, est, a,
175 &act_vlan_ops, bind, false); 182 &act_vlan_ops, bind, true);
176 if (ret) 183 if (ret)
177 return ret; 184 return ret;
178 185
@@ -185,46 +192,67 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
185 192
186 v = to_vlan(*a); 193 v = to_vlan(*a);
187 194
188 spin_lock_bh(&v->tcf_lock); 195 ASSERT_RTNL();
189 196 p = kzalloc(sizeof(*p), GFP_KERNEL);
190 v->tcfv_action = action; 197 if (!p) {
191 v->tcfv_push_vid = push_vid; 198 if (ovr)
192 v->tcfv_push_prio = push_prio; 199 tcf_idr_release(*a, bind);
193 v->tcfv_push_proto = push_proto; 200 return -ENOMEM;
201 }
194 202
195 v->tcf_action = parm->action; 203 v->tcf_action = parm->action;
196 204
197 spin_unlock_bh(&v->tcf_lock); 205 p_old = rtnl_dereference(v->vlan_p);
206
207 p->tcfv_action = action;
208 p->tcfv_push_vid = push_vid;
209 p->tcfv_push_prio = push_prio;
210 p->tcfv_push_proto = push_proto;
211
212 rcu_assign_pointer(v->vlan_p, p);
213
214 if (p_old)
215 kfree_rcu(p_old, rcu);
198 216
199 if (ret == ACT_P_CREATED) 217 if (ret == ACT_P_CREATED)
200 tcf_idr_insert(tn, *a); 218 tcf_idr_insert(tn, *a);
201 return ret; 219 return ret;
202} 220}
203 221
222static void tcf_vlan_cleanup(struct tc_action *a, int bind)
223{
224 struct tcf_vlan *v = to_vlan(a);
225 struct tcf_vlan_params *p;
226
227 p = rcu_dereference_protected(v->vlan_p, 1);
228 kfree_rcu(p, rcu);
229}
230
204static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, 231static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
205 int bind, int ref) 232 int bind, int ref)
206{ 233{
207 unsigned char *b = skb_tail_pointer(skb); 234 unsigned char *b = skb_tail_pointer(skb);
208 struct tcf_vlan *v = to_vlan(a); 235 struct tcf_vlan *v = to_vlan(a);
236 struct tcf_vlan_params *p = rtnl_dereference(v->vlan_p);
209 struct tc_vlan opt = { 237 struct tc_vlan opt = {
210 .index = v->tcf_index, 238 .index = v->tcf_index,
211 .refcnt = v->tcf_refcnt - ref, 239 .refcnt = v->tcf_refcnt - ref,
212 .bindcnt = v->tcf_bindcnt - bind, 240 .bindcnt = v->tcf_bindcnt - bind,
213 .action = v->tcf_action, 241 .action = v->tcf_action,
214 .v_action = v->tcfv_action, 242 .v_action = p->tcfv_action,
215 }; 243 };
216 struct tcf_t t; 244 struct tcf_t t;
217 245
218 if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt)) 246 if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt))
219 goto nla_put_failure; 247 goto nla_put_failure;
220 248
221 if ((v->tcfv_action == TCA_VLAN_ACT_PUSH || 249 if ((p->tcfv_action == TCA_VLAN_ACT_PUSH ||
222 v->tcfv_action == TCA_VLAN_ACT_MODIFY) && 250 p->tcfv_action == TCA_VLAN_ACT_MODIFY) &&
223 (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) || 251 (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, p->tcfv_push_vid) ||
224 nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL, 252 nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL,
225 v->tcfv_push_proto) || 253 p->tcfv_push_proto) ||
226 (nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY, 254 (nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY,
227 v->tcfv_push_prio)))) 255 p->tcfv_push_prio))))
228 goto nla_put_failure; 256 goto nla_put_failure;
229 257
230 tcf_tm_dump(&t, &v->tcf_tm); 258 tcf_tm_dump(&t, &v->tcf_tm);
@@ -260,6 +288,7 @@ static struct tc_action_ops act_vlan_ops = {
260 .act = tcf_vlan, 288 .act = tcf_vlan,
261 .dump = tcf_vlan_dump, 289 .dump = tcf_vlan_dump,
262 .init = tcf_vlan_init, 290 .init = tcf_vlan_init,
291 .cleanup = tcf_vlan_cleanup,
263 .walk = tcf_vlan_walker, 292 .walk = tcf_vlan_walker,
264 .lookup = tcf_vlan_search, 293 .lookup = tcf_vlan_search,
265 .size = sizeof(struct tcf_vlan), 294 .size = sizeof(struct tcf_vlan),
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 0b2219adf520..7d97f612c9b9 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -77,6 +77,8 @@ out:
77} 77}
78EXPORT_SYMBOL(register_tcf_proto_ops); 78EXPORT_SYMBOL(register_tcf_proto_ops);
79 79
80static struct workqueue_struct *tc_filter_wq;
81
80int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) 82int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
81{ 83{
82 struct tcf_proto_ops *t; 84 struct tcf_proto_ops *t;
@@ -86,6 +88,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
86 * tcf_proto_ops's destroy() handler. 88 * tcf_proto_ops's destroy() handler.
87 */ 89 */
88 rcu_barrier(); 90 rcu_barrier();
91 flush_workqueue(tc_filter_wq);
89 92
90 write_lock(&cls_mod_lock); 93 write_lock(&cls_mod_lock);
91 list_for_each_entry(t, &tcf_proto_base, head) { 94 list_for_each_entry(t, &tcf_proto_base, head) {
@@ -100,6 +103,12 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
100} 103}
101EXPORT_SYMBOL(unregister_tcf_proto_ops); 104EXPORT_SYMBOL(unregister_tcf_proto_ops);
102 105
106bool tcf_queue_work(struct work_struct *work)
107{
108 return queue_work(tc_filter_wq, work);
109}
110EXPORT_SYMBOL(tcf_queue_work);
111
103/* Select new prio value from the range, managed by kernel. */ 112/* Select new prio value from the range, managed by kernel. */
104 113
105static inline u32 tcf_auto_prio(struct tcf_proto *tp) 114static inline u32 tcf_auto_prio(struct tcf_proto *tp)
@@ -186,16 +195,24 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
186 return chain; 195 return chain;
187} 196}
188 197
198static void tcf_chain_head_change(struct tcf_chain *chain,
199 struct tcf_proto *tp_head)
200{
201 if (chain->chain_head_change)
202 chain->chain_head_change(tp_head,
203 chain->chain_head_change_priv);
204}
205
189static void tcf_chain_flush(struct tcf_chain *chain) 206static void tcf_chain_flush(struct tcf_chain *chain)
190{ 207{
191 struct tcf_proto *tp; 208 struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
192 209
193 if (chain->p_filter_chain) 210 tcf_chain_head_change(chain, NULL);
194 RCU_INIT_POINTER(*chain->p_filter_chain, NULL); 211 while (tp) {
195 while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) {
196 RCU_INIT_POINTER(chain->filter_chain, tp->next); 212 RCU_INIT_POINTER(chain->filter_chain, tp->next);
197 tcf_chain_put(chain);
198 tcf_proto_destroy(tp); 213 tcf_proto_destroy(tp);
214 tp = rtnl_dereference(chain->filter_chain);
215 tcf_chain_put(chain);
199 } 216 }
200} 217}
201 218
@@ -233,15 +250,35 @@ void tcf_chain_put(struct tcf_chain *chain)
233} 250}
234EXPORT_SYMBOL(tcf_chain_put); 251EXPORT_SYMBOL(tcf_chain_put);
235 252
236static void 253static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q,
237tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain, 254 struct tcf_block_ext_info *ei,
238 struct tcf_proto __rcu **p_filter_chain) 255 enum tc_block_command command)
239{ 256{
240 chain->p_filter_chain = p_filter_chain; 257 struct net_device *dev = q->dev_queue->dev;
258 struct tc_block_offload bo = {};
259
260 if (!dev->netdev_ops->ndo_setup_tc)
261 return;
262 bo.command = command;
263 bo.binder_type = ei->binder_type;
264 bo.block = block;
265 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
241} 266}
242 267
243int tcf_block_get(struct tcf_block **p_block, 268static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
244 struct tcf_proto __rcu **p_filter_chain) 269 struct tcf_block_ext_info *ei)
270{
271 tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND);
272}
273
274static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
275 struct tcf_block_ext_info *ei)
276{
277 tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND);
278}
279
280int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
281 struct tcf_block_ext_info *ei)
245{ 282{
246 struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL); 283 struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
247 struct tcf_chain *chain; 284 struct tcf_chain *chain;
@@ -250,13 +287,20 @@ int tcf_block_get(struct tcf_block **p_block,
250 if (!block) 287 if (!block)
251 return -ENOMEM; 288 return -ENOMEM;
252 INIT_LIST_HEAD(&block->chain_list); 289 INIT_LIST_HEAD(&block->chain_list);
290 INIT_LIST_HEAD(&block->cb_list);
291
253 /* Create chain 0 by default, it has to be always present. */ 292 /* Create chain 0 by default, it has to be always present. */
254 chain = tcf_chain_create(block, 0); 293 chain = tcf_chain_create(block, 0);
255 if (!chain) { 294 if (!chain) {
256 err = -ENOMEM; 295 err = -ENOMEM;
257 goto err_chain_create; 296 goto err_chain_create;
258 } 297 }
259 tcf_chain_filter_chain_ptr_set(chain, p_filter_chain); 298 WARN_ON(!ei->chain_head_change);
299 chain->chain_head_change = ei->chain_head_change;
300 chain->chain_head_change_priv = ei->chain_head_change_priv;
301 block->net = qdisc_net(q);
302 block->q = q;
303 tcf_block_offload_bind(block, q, ei);
260 *p_block = block; 304 *p_block = block;
261 return 0; 305 return 0;
262 306
@@ -264,43 +308,178 @@ err_chain_create:
264 kfree(block); 308 kfree(block);
265 return err; 309 return err;
266} 310}
311EXPORT_SYMBOL(tcf_block_get_ext);
312
313static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
314{
315 struct tcf_proto __rcu **p_filter_chain = priv;
316
317 rcu_assign_pointer(*p_filter_chain, tp_head);
318}
319
320int tcf_block_get(struct tcf_block **p_block,
321 struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q)
322{
323 struct tcf_block_ext_info ei = {
324 .chain_head_change = tcf_chain_head_change_dflt,
325 .chain_head_change_priv = p_filter_chain,
326 };
327
328 WARN_ON(!p_filter_chain);
329 return tcf_block_get_ext(p_block, q, &ei);
330}
267EXPORT_SYMBOL(tcf_block_get); 331EXPORT_SYMBOL(tcf_block_get);
268 332
269void tcf_block_put(struct tcf_block *block) 333static void tcf_block_put_final(struct work_struct *work)
270{ 334{
335 struct tcf_block *block = container_of(work, struct tcf_block, work);
271 struct tcf_chain *chain, *tmp; 336 struct tcf_chain *chain, *tmp;
272 337
338 rtnl_lock();
339 /* Only chain 0 should be still here. */
340 list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
341 tcf_chain_put(chain);
342 rtnl_unlock();
343 kfree(block);
344}
345
346/* XXX: Standalone actions are not allowed to jump to any chain, and bound
347 * actions should be all removed after flushing. However, filters are now
348 * destroyed in tc filter workqueue with RTNL lock, they can not race here.
349 */
350void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
351 struct tcf_block_ext_info *ei)
352{
353 struct tcf_chain *chain, *tmp;
354
355 list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
356 tcf_chain_flush(chain);
357
358 tcf_block_offload_unbind(block, q, ei);
359
360 INIT_WORK(&block->work, tcf_block_put_final);
361 /* Wait for existing RCU callbacks to cool down, make sure their works
362 * have been queued before this. We can not flush pending works here
363 * because we are holding the RTNL lock.
364 */
365 rcu_barrier();
366 tcf_queue_work(&block->work);
367}
368EXPORT_SYMBOL(tcf_block_put_ext);
369
370void tcf_block_put(struct tcf_block *block)
371{
372 struct tcf_block_ext_info ei = {0, };
373
273 if (!block) 374 if (!block)
274 return; 375 return;
376 tcf_block_put_ext(block, block->q, &ei);
377}
275 378
276 /* XXX: Standalone actions are not allowed to jump to any chain, and 379EXPORT_SYMBOL(tcf_block_put);
277 * bound actions should be all removed after flushing. However,
278 * filters are destroyed in RCU callbacks, we have to hold the chains
279 * first, otherwise we would always race with RCU callbacks on this list
280 * without proper locking.
281 */
282 380
283 /* Wait for existing RCU callbacks to cool down. */ 381struct tcf_block_cb {
284 rcu_barrier(); 382 struct list_head list;
383 tc_setup_cb_t *cb;
384 void *cb_ident;
385 void *cb_priv;
386 unsigned int refcnt;
387};
285 388
286 /* Hold a refcnt for all chains, except 0, in case they are gone. */ 389void *tcf_block_cb_priv(struct tcf_block_cb *block_cb)
287 list_for_each_entry(chain, &block->chain_list, list) 390{
288 if (chain->index) 391 return block_cb->cb_priv;
289 tcf_chain_hold(chain); 392}
393EXPORT_SYMBOL(tcf_block_cb_priv);
290 394
291 /* No race on the list, because no chain could be destroyed. */ 395struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
292 list_for_each_entry(chain, &block->chain_list, list) 396 tc_setup_cb_t *cb, void *cb_ident)
293 tcf_chain_flush(chain); 397{ struct tcf_block_cb *block_cb;
294 398
295 /* Wait for RCU callbacks to release the reference count. */ 399 list_for_each_entry(block_cb, &block->cb_list, list)
296 rcu_barrier(); 400 if (block_cb->cb == cb && block_cb->cb_ident == cb_ident)
401 return block_cb;
402 return NULL;
403}
404EXPORT_SYMBOL(tcf_block_cb_lookup);
297 405
298 /* At this point, all the chains should have refcnt == 1. */ 406void tcf_block_cb_incref(struct tcf_block_cb *block_cb)
299 list_for_each_entry_safe(chain, tmp, &block->chain_list, list) 407{
300 tcf_chain_put(chain); 408 block_cb->refcnt++;
301 kfree(block); 409}
410EXPORT_SYMBOL(tcf_block_cb_incref);
411
412unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
413{
414 return --block_cb->refcnt;
415}
416EXPORT_SYMBOL(tcf_block_cb_decref);
417
418struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
419 tc_setup_cb_t *cb, void *cb_ident,
420 void *cb_priv)
421{
422 struct tcf_block_cb *block_cb;
423
424 block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
425 if (!block_cb)
426 return NULL;
427 block_cb->cb = cb;
428 block_cb->cb_ident = cb_ident;
429 block_cb->cb_priv = cb_priv;
430 list_add(&block_cb->list, &block->cb_list);
431 return block_cb;
432}
433EXPORT_SYMBOL(__tcf_block_cb_register);
434
435int tcf_block_cb_register(struct tcf_block *block,
436 tc_setup_cb_t *cb, void *cb_ident,
437 void *cb_priv)
438{
439 struct tcf_block_cb *block_cb;
440
441 block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv);
442 return block_cb ? 0 : -ENOMEM;
443}
444EXPORT_SYMBOL(tcf_block_cb_register);
445
446void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb)
447{
448 list_del(&block_cb->list);
449 kfree(block_cb);
450}
451EXPORT_SYMBOL(__tcf_block_cb_unregister);
452
453void tcf_block_cb_unregister(struct tcf_block *block,
454 tc_setup_cb_t *cb, void *cb_ident)
455{
456 struct tcf_block_cb *block_cb;
457
458 block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
459 if (!block_cb)
460 return;
461 __tcf_block_cb_unregister(block_cb);
462}
463EXPORT_SYMBOL(tcf_block_cb_unregister);
464
465static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type,
466 void *type_data, bool err_stop)
467{
468 struct tcf_block_cb *block_cb;
469 int ok_count = 0;
470 int err;
471
472 list_for_each_entry(block_cb, &block->cb_list, list) {
473 err = block_cb->cb(type, type_data, block_cb->cb_priv);
474 if (err) {
475 if (err_stop)
476 return err;
477 } else {
478 ok_count++;
479 }
480 }
481 return ok_count;
302} 482}
303EXPORT_SYMBOL(tcf_block_put);
304 483
305/* Main classifier routine: scans classifier chain attached 484/* Main classifier routine: scans classifier chain attached
306 * to this qdisc, (optionally) tests for protocol and asks 485 * to this qdisc, (optionally) tests for protocol and asks
@@ -370,9 +549,8 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain,
370 struct tcf_chain_info *chain_info, 549 struct tcf_chain_info *chain_info,
371 struct tcf_proto *tp) 550 struct tcf_proto *tp)
372{ 551{
373 if (chain->p_filter_chain && 552 if (*chain_info->pprev == chain->filter_chain)
374 *chain_info->pprev == chain->filter_chain) 553 tcf_chain_head_change(chain, tp);
375 rcu_assign_pointer(*chain->p_filter_chain, tp);
376 RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info)); 554 RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
377 rcu_assign_pointer(*chain_info->pprev, tp); 555 rcu_assign_pointer(*chain_info->pprev, tp);
378 tcf_chain_hold(chain); 556 tcf_chain_hold(chain);
@@ -384,8 +562,8 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain,
384{ 562{
385 struct tcf_proto *next = rtnl_dereference(chain_info->next); 563 struct tcf_proto *next = rtnl_dereference(chain_info->next);
386 564
387 if (chain->p_filter_chain && tp == chain->filter_chain) 565 if (tp == chain->filter_chain)
388 RCU_INIT_POINTER(*chain->p_filter_chain, next); 566 tcf_chain_head_change(chain, next);
389 RCU_INIT_POINTER(*chain_info->pprev, next); 567 RCU_INIT_POINTER(*chain_info->pprev, next);
390 tcf_chain_put(chain); 568 tcf_chain_put(chain);
391} 569}
@@ -418,8 +596,8 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
418} 596}
419 597
420static int tcf_fill_node(struct net *net, struct sk_buff *skb, 598static int tcf_fill_node(struct net *net, struct sk_buff *skb,
421 struct tcf_proto *tp, void *fh, u32 portid, 599 struct tcf_proto *tp, struct Qdisc *q, u32 parent,
422 u32 seq, u16 flags, int event) 600 void *fh, u32 portid, u32 seq, u16 flags, int event)
423{ 601{
424 struct tcmsg *tcm; 602 struct tcmsg *tcm;
425 struct nlmsghdr *nlh; 603 struct nlmsghdr *nlh;
@@ -432,8 +610,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
432 tcm->tcm_family = AF_UNSPEC; 610 tcm->tcm_family = AF_UNSPEC;
433 tcm->tcm__pad1 = 0; 611 tcm->tcm__pad1 = 0;
434 tcm->tcm__pad2 = 0; 612 tcm->tcm__pad2 = 0;
435 tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex; 613 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
436 tcm->tcm_parent = tp->classid; 614 tcm->tcm_parent = parent;
437 tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); 615 tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
438 if (nla_put_string(skb, TCA_KIND, tp->ops->kind)) 616 if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
439 goto nla_put_failure; 617 goto nla_put_failure;
@@ -456,6 +634,7 @@ nla_put_failure:
456 634
457static int tfilter_notify(struct net *net, struct sk_buff *oskb, 635static int tfilter_notify(struct net *net, struct sk_buff *oskb,
458 struct nlmsghdr *n, struct tcf_proto *tp, 636 struct nlmsghdr *n, struct tcf_proto *tp,
637 struct Qdisc *q, u32 parent,
459 void *fh, int event, bool unicast) 638 void *fh, int event, bool unicast)
460{ 639{
461 struct sk_buff *skb; 640 struct sk_buff *skb;
@@ -465,7 +644,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
465 if (!skb) 644 if (!skb)
466 return -ENOBUFS; 645 return -ENOBUFS;
467 646
468 if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, 647 if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq,
469 n->nlmsg_flags, event) <= 0) { 648 n->nlmsg_flags, event) <= 0) {
470 kfree_skb(skb); 649 kfree_skb(skb);
471 return -EINVAL; 650 return -EINVAL;
@@ -480,6 +659,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
480 659
481static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, 660static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
482 struct nlmsghdr *n, struct tcf_proto *tp, 661 struct nlmsghdr *n, struct tcf_proto *tp,
662 struct Qdisc *q, u32 parent,
483 void *fh, bool unicast, bool *last) 663 void *fh, bool unicast, bool *last)
484{ 664{
485 struct sk_buff *skb; 665 struct sk_buff *skb;
@@ -490,7 +670,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
490 if (!skb) 670 if (!skb)
491 return -ENOBUFS; 671 return -ENOBUFS;
492 672
493 if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, 673 if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq,
494 n->nlmsg_flags, RTM_DELTFILTER) <= 0) { 674 n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
495 kfree_skb(skb); 675 kfree_skb(skb);
496 return -EINVAL; 676 return -EINVAL;
@@ -510,6 +690,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
510} 690}
511 691
512static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, 692static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
693 struct Qdisc *q, u32 parent,
513 struct nlmsghdr *n, 694 struct nlmsghdr *n,
514 struct tcf_chain *chain, int event) 695 struct tcf_chain *chain, int event)
515{ 696{
@@ -517,7 +698,7 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
517 698
518 for (tp = rtnl_dereference(chain->filter_chain); 699 for (tp = rtnl_dereference(chain->filter_chain);
519 tp; tp = rtnl_dereference(tp->next)) 700 tp; tp = rtnl_dereference(tp->next))
520 tfilter_notify(net, oskb, n, tp, 0, event, false); 701 tfilter_notify(net, oskb, n, tp, q, parent, 0, event, false);
521} 702}
522 703
523/* Add/change/delete/get a filter node */ 704/* Add/change/delete/get a filter node */
@@ -636,7 +817,8 @@ replay:
636 } 817 }
637 818
638 if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) { 819 if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
639 tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER); 820 tfilter_notify_chain(net, skb, q, parent, n,
821 chain, RTM_DELTFILTER);
640 tcf_chain_flush(chain); 822 tcf_chain_flush(chain);
641 err = 0; 823 err = 0;
642 goto errout; 824 goto errout;
@@ -683,7 +865,7 @@ replay:
683 if (!fh) { 865 if (!fh) {
684 if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { 866 if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
685 tcf_chain_tp_remove(chain, &chain_info, tp); 867 tcf_chain_tp_remove(chain, &chain_info, tp);
686 tfilter_notify(net, skb, n, tp, fh, 868 tfilter_notify(net, skb, n, tp, q, parent, fh,
687 RTM_DELTFILTER, false); 869 RTM_DELTFILTER, false);
688 tcf_proto_destroy(tp); 870 tcf_proto_destroy(tp);
689 err = 0; 871 err = 0;
@@ -708,8 +890,8 @@ replay:
708 } 890 }
709 break; 891 break;
710 case RTM_DELTFILTER: 892 case RTM_DELTFILTER:
711 err = tfilter_del_notify(net, skb, n, tp, fh, false, 893 err = tfilter_del_notify(net, skb, n, tp, q, parent,
712 &last); 894 fh, false, &last);
713 if (err) 895 if (err)
714 goto errout; 896 goto errout;
715 if (last) { 897 if (last) {
@@ -718,7 +900,7 @@ replay:
718 } 900 }
719 goto errout; 901 goto errout;
720 case RTM_GETTFILTER: 902 case RTM_GETTFILTER:
721 err = tfilter_notify(net, skb, n, tp, fh, 903 err = tfilter_notify(net, skb, n, tp, q, parent, fh,
722 RTM_NEWTFILTER, true); 904 RTM_NEWTFILTER, true);
723 goto errout; 905 goto errout;
724 default: 906 default:
@@ -732,7 +914,8 @@ replay:
732 if (err == 0) { 914 if (err == 0) {
733 if (tp_created) 915 if (tp_created)
734 tcf_chain_tp_insert(chain, &chain_info, tp); 916 tcf_chain_tp_insert(chain, &chain_info, tp);
735 tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false); 917 tfilter_notify(net, skb, n, tp, q, parent, fh,
918 RTM_NEWTFILTER, false);
736 } else { 919 } else {
737 if (tp_created) 920 if (tp_created)
738 tcf_proto_destroy(tp); 921 tcf_proto_destroy(tp);
@@ -751,6 +934,8 @@ struct tcf_dump_args {
751 struct tcf_walker w; 934 struct tcf_walker w;
752 struct sk_buff *skb; 935 struct sk_buff *skb;
753 struct netlink_callback *cb; 936 struct netlink_callback *cb;
937 struct Qdisc *q;
938 u32 parent;
754}; 939};
755 940
756static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg) 941static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
@@ -758,13 +943,14 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
758 struct tcf_dump_args *a = (void *)arg; 943 struct tcf_dump_args *a = (void *)arg;
759 struct net *net = sock_net(a->skb->sk); 944 struct net *net = sock_net(a->skb->sk);
760 945
761 return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, 946 return tcf_fill_node(net, a->skb, tp, a->q, a->parent,
947 n, NETLINK_CB(a->cb->skb).portid,
762 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, 948 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
763 RTM_NEWTFILTER); 949 RTM_NEWTFILTER);
764} 950}
765 951
766static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb, 952static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
767 struct netlink_callback *cb, 953 struct sk_buff *skb, struct netlink_callback *cb,
768 long index_start, long *p_index) 954 long index_start, long *p_index)
769{ 955{
770 struct net *net = sock_net(skb->sk); 956 struct net *net = sock_net(skb->sk);
@@ -786,7 +972,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
786 memset(&cb->args[1], 0, 972 memset(&cb->args[1], 0,
787 sizeof(cb->args) - sizeof(cb->args[0])); 973 sizeof(cb->args) - sizeof(cb->args[0]));
788 if (cb->args[1] == 0) { 974 if (cb->args[1] == 0) {
789 if (tcf_fill_node(net, skb, tp, 0, 975 if (tcf_fill_node(net, skb, tp, q, parent, 0,
790 NETLINK_CB(cb->skb).portid, 976 NETLINK_CB(cb->skb).portid,
791 cb->nlh->nlmsg_seq, NLM_F_MULTI, 977 cb->nlh->nlmsg_seq, NLM_F_MULTI,
792 RTM_NEWTFILTER) <= 0) 978 RTM_NEWTFILTER) <= 0)
@@ -799,6 +985,8 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
799 arg.w.fn = tcf_node_dump; 985 arg.w.fn = tcf_node_dump;
800 arg.skb = skb; 986 arg.skb = skb;
801 arg.cb = cb; 987 arg.cb = cb;
988 arg.q = q;
989 arg.parent = parent;
802 arg.w.stop = 0; 990 arg.w.stop = 0;
803 arg.w.skip = cb->args[1] - 1; 991 arg.w.skip = cb->args[1] - 1;
804 arg.w.count = 0; 992 arg.w.count = 0;
@@ -824,6 +1012,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
824 const struct Qdisc_class_ops *cops; 1012 const struct Qdisc_class_ops *cops;
825 long index_start; 1013 long index_start;
826 long index; 1014 long index;
1015 u32 parent;
827 int err; 1016 int err;
828 1017
829 if (nlmsg_len(cb->nlh) < sizeof(*tcm)) 1018 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
@@ -837,10 +1026,13 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
837 if (!dev) 1026 if (!dev)
838 return skb->len; 1027 return skb->len;
839 1028
840 if (!tcm->tcm_parent) 1029 parent = tcm->tcm_parent;
1030 if (!parent) {
841 q = dev->qdisc; 1031 q = dev->qdisc;
842 else 1032 parent = q->handle;
1033 } else {
843 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); 1034 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
1035 }
844 if (!q) 1036 if (!q)
845 goto out; 1037 goto out;
846 cops = q->ops->cl_ops; 1038 cops = q->ops->cl_ops;
@@ -864,7 +1056,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
864 if (tca[TCA_CHAIN] && 1056 if (tca[TCA_CHAIN] &&
865 nla_get_u32(tca[TCA_CHAIN]) != chain->index) 1057 nla_get_u32(tca[TCA_CHAIN]) != chain->index)
866 continue; 1058 continue;
867 if (!tcf_chain_dump(chain, skb, cb, index_start, &index)) 1059 if (!tcf_chain_dump(chain, q, parent, skb, cb,
1060 index_start, &index))
868 break; 1061 break;
869 } 1062 }
870 1063
@@ -879,6 +1072,7 @@ void tcf_exts_destroy(struct tcf_exts *exts)
879#ifdef CONFIG_NET_CLS_ACT 1072#ifdef CONFIG_NET_CLS_ACT
880 LIST_HEAD(actions); 1073 LIST_HEAD(actions);
881 1074
1075 ASSERT_RTNL();
882 tcf_exts_to_list(exts, &actions); 1076 tcf_exts_to_list(exts, &actions);
883 tcf_action_destroy(&actions, TCA_ACT_UNBIND); 1077 tcf_action_destroy(&actions, TCA_ACT_UNBIND);
884 kfree(exts->actions); 1078 kfree(exts->actions);
@@ -917,6 +1111,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
917 exts->actions[i++] = act; 1111 exts->actions[i++] = act;
918 exts->nr_actions = i; 1112 exts->nr_actions = i;
919 } 1113 }
1114 exts->net = net;
920 } 1115 }
921#else 1116#else
922 if ((exts->action && tb[exts->action]) || 1117 if ((exts->action && tb[exts->action]) ||
@@ -1004,32 +1199,63 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
1004} 1199}
1005EXPORT_SYMBOL(tcf_exts_dump_stats); 1200EXPORT_SYMBOL(tcf_exts_dump_stats);
1006 1201
1007int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, 1202static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts,
1008 struct net_device **hw_dev) 1203 enum tc_setup_type type,
1204 void *type_data, bool err_stop)
1009{ 1205{
1206 int ok_count = 0;
1010#ifdef CONFIG_NET_CLS_ACT 1207#ifdef CONFIG_NET_CLS_ACT
1011 const struct tc_action *a; 1208 const struct tc_action *a;
1012 LIST_HEAD(actions); 1209 struct net_device *dev;
1210 int i, ret;
1013 1211
1014 if (!tcf_exts_has_actions(exts)) 1212 if (!tcf_exts_has_actions(exts))
1015 return -EINVAL; 1213 return 0;
1016 1214
1017 tcf_exts_to_list(exts, &actions); 1215 for (i = 0; i < exts->nr_actions; i++) {
1018 list_for_each_entry(a, &actions, list) { 1216 a = exts->actions[i];
1019 if (a->ops->get_dev) { 1217 if (!a->ops->get_dev)
1020 a->ops->get_dev(a, dev_net(dev), hw_dev); 1218 continue;
1021 break; 1219 dev = a->ops->get_dev(a);
1022 } 1220 if (!dev)
1221 continue;
1222 ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop);
1223 if (ret < 0)
1224 return ret;
1225 ok_count += ret;
1023 } 1226 }
1024 if (*hw_dev)
1025 return 0;
1026#endif 1227#endif
1027 return -EOPNOTSUPP; 1228 return ok_count;
1028} 1229}
1029EXPORT_SYMBOL(tcf_exts_get_dev); 1230
1231int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
1232 enum tc_setup_type type, void *type_data, bool err_stop)
1233{
1234 int ok_count;
1235 int ret;
1236
1237 ret = tcf_block_cb_call(block, type, type_data, err_stop);
1238 if (ret < 0)
1239 return ret;
1240 ok_count = ret;
1241
1242 if (!exts)
1243 return ok_count;
1244 ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop);
1245 if (ret < 0)
1246 return ret;
1247 ok_count += ret;
1248
1249 return ok_count;
1250}
1251EXPORT_SYMBOL(tc_setup_cb_call);
1030 1252
1031static int __init tc_filter_init(void) 1253static int __init tc_filter_init(void)
1032{ 1254{
1255 tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
1256 if (!tc_filter_wq)
1257 return -ENOMEM;
1258
1033 rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0); 1259 rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
1034 rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0); 1260 rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
1035 rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter, 1261 rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index d89ebafd2239..5f169ded347e 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -17,13 +17,14 @@
17#include <linux/errno.h> 17#include <linux/errno.h>
18#include <linux/rtnetlink.h> 18#include <linux/rtnetlink.h>
19#include <linux/skbuff.h> 19#include <linux/skbuff.h>
20#include <linux/idr.h>
20#include <net/netlink.h> 21#include <net/netlink.h>
21#include <net/act_api.h> 22#include <net/act_api.h>
22#include <net/pkt_cls.h> 23#include <net/pkt_cls.h>
23 24
24struct basic_head { 25struct basic_head {
25 u32 hgenerator;
26 struct list_head flist; 26 struct list_head flist;
27 struct idr handle_idr;
27 struct rcu_head rcu; 28 struct rcu_head rcu;
28}; 29};
29 30
@@ -34,7 +35,10 @@ struct basic_filter {
34 struct tcf_result res; 35 struct tcf_result res;
35 struct tcf_proto *tp; 36 struct tcf_proto *tp;
36 struct list_head link; 37 struct list_head link;
37 struct rcu_head rcu; 38 union {
39 struct work_struct work;
40 struct rcu_head rcu;
41 };
38}; 42};
39 43
40static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, 44static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -78,19 +82,36 @@ static int basic_init(struct tcf_proto *tp)
78 if (head == NULL) 82 if (head == NULL)
79 return -ENOBUFS; 83 return -ENOBUFS;
80 INIT_LIST_HEAD(&head->flist); 84 INIT_LIST_HEAD(&head->flist);
85 idr_init(&head->handle_idr);
81 rcu_assign_pointer(tp->root, head); 86 rcu_assign_pointer(tp->root, head);
82 return 0; 87 return 0;
83} 88}
84 89
85static void basic_delete_filter(struct rcu_head *head) 90static void __basic_delete_filter(struct basic_filter *f)
86{ 91{
87 struct basic_filter *f = container_of(head, struct basic_filter, rcu);
88
89 tcf_exts_destroy(&f->exts); 92 tcf_exts_destroy(&f->exts);
90 tcf_em_tree_destroy(&f->ematches); 93 tcf_em_tree_destroy(&f->ematches);
94 tcf_exts_put_net(&f->exts);
91 kfree(f); 95 kfree(f);
92} 96}
93 97
98static void basic_delete_filter_work(struct work_struct *work)
99{
100 struct basic_filter *f = container_of(work, struct basic_filter, work);
101
102 rtnl_lock();
103 __basic_delete_filter(f);
104 rtnl_unlock();
105}
106
107static void basic_delete_filter(struct rcu_head *head)
108{
109 struct basic_filter *f = container_of(head, struct basic_filter, rcu);
110
111 INIT_WORK(&f->work, basic_delete_filter_work);
112 tcf_queue_work(&f->work);
113}
114
94static void basic_destroy(struct tcf_proto *tp) 115static void basic_destroy(struct tcf_proto *tp)
95{ 116{
96 struct basic_head *head = rtnl_dereference(tp->root); 117 struct basic_head *head = rtnl_dereference(tp->root);
@@ -99,8 +120,13 @@ static void basic_destroy(struct tcf_proto *tp)
99 list_for_each_entry_safe(f, n, &head->flist, link) { 120 list_for_each_entry_safe(f, n, &head->flist, link) {
100 list_del_rcu(&f->link); 121 list_del_rcu(&f->link);
101 tcf_unbind_filter(tp, &f->res); 122 tcf_unbind_filter(tp, &f->res);
102 call_rcu(&f->rcu, basic_delete_filter); 123 idr_remove_ext(&head->handle_idr, f->handle);
124 if (tcf_exts_get_net(&f->exts))
125 call_rcu(&f->rcu, basic_delete_filter);
126 else
127 __basic_delete_filter(f);
103 } 128 }
129 idr_destroy(&head->handle_idr);
104 kfree_rcu(head, rcu); 130 kfree_rcu(head, rcu);
105} 131}
106 132
@@ -111,6 +137,8 @@ static int basic_delete(struct tcf_proto *tp, void *arg, bool *last)
111 137
112 list_del_rcu(&f->link); 138 list_del_rcu(&f->link);
113 tcf_unbind_filter(tp, &f->res); 139 tcf_unbind_filter(tp, &f->res);
140 idr_remove_ext(&head->handle_idr, f->handle);
141 tcf_exts_get_net(&f->exts);
114 call_rcu(&f->rcu, basic_delete_filter); 142 call_rcu(&f->rcu, basic_delete_filter);
115 *last = list_empty(&head->flist); 143 *last = list_empty(&head->flist);
116 return 0; 144 return 0;
@@ -154,6 +182,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
154 struct nlattr *tb[TCA_BASIC_MAX + 1]; 182 struct nlattr *tb[TCA_BASIC_MAX + 1];
155 struct basic_filter *fold = (struct basic_filter *) *arg; 183 struct basic_filter *fold = (struct basic_filter *) *arg;
156 struct basic_filter *fnew; 184 struct basic_filter *fnew;
185 unsigned long idr_index;
157 186
158 if (tca[TCA_OPTIONS] == NULL) 187 if (tca[TCA_OPTIONS] == NULL)
159 return -EINVAL; 188 return -EINVAL;
@@ -176,35 +205,36 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
176 if (err < 0) 205 if (err < 0)
177 goto errout; 206 goto errout;
178 207
179 err = -EINVAL;
180 if (handle) { 208 if (handle) {
181 fnew->handle = handle; 209 fnew->handle = handle;
182 } else if (fold) { 210 if (!fold) {
183 fnew->handle = fold->handle; 211 err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
212 handle, handle + 1, GFP_KERNEL);
213 if (err)
214 goto errout;
215 }
184 } else { 216 } else {
185 unsigned int i = 0x80000000; 217 err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
186 do { 218 1, 0x7FFFFFFF, GFP_KERNEL);
187 if (++head->hgenerator == 0x7FFFFFFF) 219 if (err)
188 head->hgenerator = 1;
189 } while (--i > 0 && basic_get(tp, head->hgenerator));
190
191 if (i <= 0) {
192 pr_err("Insufficient number of handles\n");
193 goto errout; 220 goto errout;
194 } 221 fnew->handle = idr_index;
195
196 fnew->handle = head->hgenerator;
197 } 222 }
198 223
199 err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr); 224 err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr);
200 if (err < 0) 225 if (err < 0) {
226 if (!fold)
227 idr_remove_ext(&head->handle_idr, fnew->handle);
201 goto errout; 228 goto errout;
229 }
202 230
203 *arg = fnew; 231 *arg = fnew;
204 232
205 if (fold) { 233 if (fold) {
234 idr_replace_ext(&head->handle_idr, fnew, fnew->handle);
206 list_replace_rcu(&fold->link, &fnew->link); 235 list_replace_rcu(&fold->link, &fnew->link);
207 tcf_unbind_filter(tp, &fold->res); 236 tcf_unbind_filter(tp, &fold->res);
237 tcf_exts_get_net(&fold->exts);
208 call_rcu(&fold->rcu, basic_delete_filter); 238 call_rcu(&fold->rcu, basic_delete_filter);
209 } else { 239 } else {
210 list_add_rcu(&fnew->link, &head->flist); 240 list_add_rcu(&fnew->link, &head->flist);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 520c5027646a..a9f3e317055c 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -17,6 +17,7 @@
17#include <linux/skbuff.h> 17#include <linux/skbuff.h>
18#include <linux/filter.h> 18#include <linux/filter.h>
19#include <linux/bpf.h> 19#include <linux/bpf.h>
20#include <linux/idr.h>
20 21
21#include <net/rtnetlink.h> 22#include <net/rtnetlink.h>
22#include <net/pkt_cls.h> 23#include <net/pkt_cls.h>
@@ -32,7 +33,7 @@ MODULE_DESCRIPTION("TC BPF based classifier");
32 33
33struct cls_bpf_head { 34struct cls_bpf_head {
34 struct list_head plist; 35 struct list_head plist;
35 u32 hgen; 36 struct idr handle_idr;
36 struct rcu_head rcu; 37 struct rcu_head rcu;
37}; 38};
38 39
@@ -49,7 +50,10 @@ struct cls_bpf_prog {
49 struct sock_filter *bpf_ops; 50 struct sock_filter *bpf_ops;
50 const char *bpf_name; 51 const char *bpf_name;
51 struct tcf_proto *tp; 52 struct tcf_proto *tp;
52 struct rcu_head rcu; 53 union {
54 struct work_struct work;
55 struct rcu_head rcu;
56 };
53}; 57};
54 58
55static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { 59static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
@@ -99,11 +103,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
99 } else if (at_ingress) { 103 } else if (at_ingress) {
100 /* It is safe to push/pull even if skb_shared() */ 104 /* It is safe to push/pull even if skb_shared() */
101 __skb_push(skb, skb->mac_len); 105 __skb_push(skb, skb->mac_len);
102 bpf_compute_data_end(skb); 106 bpf_compute_data_pointers(skb);
103 filter_res = BPF_PROG_RUN(prog->filter, skb); 107 filter_res = BPF_PROG_RUN(prog->filter, skb);
104 __skb_pull(skb, skb->mac_len); 108 __skb_pull(skb, skb->mac_len);
105 } else { 109 } else {
106 bpf_compute_data_end(skb); 110 bpf_compute_data_pointers(skb);
107 filter_res = BPF_PROG_RUN(prog->filter, skb); 111 filter_res = BPF_PROG_RUN(prog->filter, skb);
108 } 112 }
109 113
@@ -146,7 +150,9 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
146static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, 150static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
147 enum tc_clsbpf_command cmd) 151 enum tc_clsbpf_command cmd)
148{ 152{
149 struct net_device *dev = tp->q->dev_queue->dev; 153 bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE;
154 struct tcf_block *block = tp->chain->block;
155 bool skip_sw = tc_skip_sw(prog->gen_flags);
150 struct tc_cls_bpf_offload cls_bpf = {}; 156 struct tc_cls_bpf_offload cls_bpf = {};
151 int err; 157 int err;
152 158
@@ -158,17 +164,25 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
158 cls_bpf.exts_integrated = prog->exts_integrated; 164 cls_bpf.exts_integrated = prog->exts_integrated;
159 cls_bpf.gen_flags = prog->gen_flags; 165 cls_bpf.gen_flags = prog->gen_flags;
160 166
161 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSBPF, &cls_bpf); 167 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
162 if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE)) 168 if (addorrep) {
163 prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; 169 if (err < 0) {
170 cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
171 return err;
172 } else if (err > 0) {
173 prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
174 }
175 }
176
177 if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
178 return -EINVAL;
164 179
165 return err; 180 return 0;
166} 181}
167 182
168static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, 183static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
169 struct cls_bpf_prog *oldprog) 184 struct cls_bpf_prog *oldprog)
170{ 185{
171 struct net_device *dev = tp->q->dev_queue->dev;
172 struct cls_bpf_prog *obj = prog; 186 struct cls_bpf_prog *obj = prog;
173 enum tc_clsbpf_command cmd; 187 enum tc_clsbpf_command cmd;
174 bool skip_sw; 188 bool skip_sw;
@@ -178,7 +192,7 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
178 (oldprog && tc_skip_sw(oldprog->gen_flags)); 192 (oldprog && tc_skip_sw(oldprog->gen_flags));
179 193
180 if (oldprog && oldprog->offloaded) { 194 if (oldprog && oldprog->offloaded) {
181 if (tc_should_offload(dev, prog->gen_flags)) { 195 if (!tc_skip_hw(prog->gen_flags)) {
182 cmd = TC_CLSBPF_REPLACE; 196 cmd = TC_CLSBPF_REPLACE;
183 } else if (!tc_skip_sw(prog->gen_flags)) { 197 } else if (!tc_skip_sw(prog->gen_flags)) {
184 obj = oldprog; 198 obj = oldprog;
@@ -187,14 +201,14 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
187 return -EINVAL; 201 return -EINVAL;
188 } 202 }
189 } else { 203 } else {
190 if (!tc_should_offload(dev, prog->gen_flags)) 204 if (tc_skip_hw(prog->gen_flags))
191 return skip_sw ? -EINVAL : 0; 205 return skip_sw ? -EINVAL : 0;
192 cmd = TC_CLSBPF_ADD; 206 cmd = TC_CLSBPF_ADD;
193 } 207 }
194 208
195 ret = cls_bpf_offload_cmd(tp, obj, cmd); 209 ret = cls_bpf_offload_cmd(tp, obj, cmd);
196 if (ret) 210 if (ret)
197 return skip_sw ? ret : 0; 211 return ret;
198 212
199 obj->offloaded = true; 213 obj->offloaded = true;
200 if (oldprog) 214 if (oldprog)
@@ -238,6 +252,7 @@ static int cls_bpf_init(struct tcf_proto *tp)
238 return -ENOBUFS; 252 return -ENOBUFS;
239 253
240 INIT_LIST_HEAD_RCU(&head->plist); 254 INIT_LIST_HEAD_RCU(&head->plist);
255 idr_init(&head->handle_idr);
241 rcu_assign_pointer(tp->root, head); 256 rcu_assign_pointer(tp->root, head);
242 257
243 return 0; 258 return 0;
@@ -246,6 +261,7 @@ static int cls_bpf_init(struct tcf_proto *tp)
246static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) 261static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
247{ 262{
248 tcf_exts_destroy(&prog->exts); 263 tcf_exts_destroy(&prog->exts);
264 tcf_exts_put_net(&prog->exts);
249 265
250 if (cls_bpf_is_ebpf(prog)) 266 if (cls_bpf_is_ebpf(prog))
251 bpf_prog_put(prog->filter); 267 bpf_prog_put(prog->filter);
@@ -257,17 +273,35 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
257 kfree(prog); 273 kfree(prog);
258} 274}
259 275
276static void cls_bpf_delete_prog_work(struct work_struct *work)
277{
278 struct cls_bpf_prog *prog = container_of(work, struct cls_bpf_prog, work);
279
280 rtnl_lock();
281 __cls_bpf_delete_prog(prog);
282 rtnl_unlock();
283}
284
260static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu) 285static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
261{ 286{
262 __cls_bpf_delete_prog(container_of(rcu, struct cls_bpf_prog, rcu)); 287 struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu);
288
289 INIT_WORK(&prog->work, cls_bpf_delete_prog_work);
290 tcf_queue_work(&prog->work);
263} 291}
264 292
265static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) 293static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
266{ 294{
295 struct cls_bpf_head *head = rtnl_dereference(tp->root);
296
297 idr_remove_ext(&head->handle_idr, prog->handle);
267 cls_bpf_stop_offload(tp, prog); 298 cls_bpf_stop_offload(tp, prog);
268 list_del_rcu(&prog->link); 299 list_del_rcu(&prog->link);
269 tcf_unbind_filter(tp, &prog->res); 300 tcf_unbind_filter(tp, &prog->res);
270 call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu); 301 if (tcf_exts_get_net(&prog->exts))
302 call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
303 else
304 __cls_bpf_delete_prog(prog);
271} 305}
272 306
273static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last) 307static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last)
@@ -287,6 +321,7 @@ static void cls_bpf_destroy(struct tcf_proto *tp)
287 list_for_each_entry_safe(prog, tmp, &head->plist, link) 321 list_for_each_entry_safe(prog, tmp, &head->plist, link)
288 __cls_bpf_delete(tp, prog); 322 __cls_bpf_delete(tp, prog);
289 323
324 idr_destroy(&head->handle_idr);
290 kfree_rcu(head, rcu); 325 kfree_rcu(head, rcu);
291} 326}
292 327
@@ -343,15 +378,17 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
343} 378}
344 379
345static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, 380static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
346 const struct tcf_proto *tp) 381 u32 gen_flags, const struct tcf_proto *tp)
347{ 382{
348 struct bpf_prog *fp; 383 struct bpf_prog *fp;
349 char *name = NULL; 384 char *name = NULL;
385 bool skip_sw;
350 u32 bpf_fd; 386 u32 bpf_fd;
351 387
352 bpf_fd = nla_get_u32(tb[TCA_BPF_FD]); 388 bpf_fd = nla_get_u32(tb[TCA_BPF_FD]);
389 skip_sw = gen_flags & TCA_CLS_FLAGS_SKIP_SW;
353 390
354 fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS); 391 fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_CLS, skip_sw);
355 if (IS_ERR(fp)) 392 if (IS_ERR(fp))
356 return PTR_ERR(fp); 393 return PTR_ERR(fp);
357 394
@@ -409,7 +446,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
409 prog->gen_flags = gen_flags; 446 prog->gen_flags = gen_flags;
410 447
411 ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) : 448 ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
412 cls_bpf_prog_from_efd(tb, prog, tp); 449 cls_bpf_prog_from_efd(tb, prog, gen_flags, tp);
413 if (ret < 0) 450 if (ret < 0)
414 return ret; 451 return ret;
415 452
@@ -421,27 +458,6 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
421 return 0; 458 return 0;
422} 459}
423 460
424static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
425 struct cls_bpf_head *head)
426{
427 unsigned int i = 0x80000000;
428 u32 handle;
429
430 do {
431 if (++head->hgen == 0x7FFFFFFF)
432 head->hgen = 1;
433 } while (--i > 0 && cls_bpf_get(tp, head->hgen));
434
435 if (unlikely(i == 0)) {
436 pr_err("Insufficient number of handles\n");
437 handle = 0;
438 } else {
439 handle = head->hgen;
440 }
441
442 return handle;
443}
444
445static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, 461static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
446 struct tcf_proto *tp, unsigned long base, 462 struct tcf_proto *tp, unsigned long base,
447 u32 handle, struct nlattr **tca, 463 u32 handle, struct nlattr **tca,
@@ -451,6 +467,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
451 struct cls_bpf_prog *oldprog = *arg; 467 struct cls_bpf_prog *oldprog = *arg;
452 struct nlattr *tb[TCA_BPF_MAX + 1]; 468 struct nlattr *tb[TCA_BPF_MAX + 1];
453 struct cls_bpf_prog *prog; 469 struct cls_bpf_prog *prog;
470 unsigned long idr_index;
454 int ret; 471 int ret;
455 472
456 if (tca[TCA_OPTIONS] == NULL) 473 if (tca[TCA_OPTIONS] == NULL)
@@ -476,21 +493,30 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
476 } 493 }
477 } 494 }
478 495
479 if (handle == 0) 496 if (handle == 0) {
480 prog->handle = cls_bpf_grab_new_handle(tp, head); 497 ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index,
481 else 498 1, 0x7FFFFFFF, GFP_KERNEL);
499 if (ret)
500 goto errout;
501 prog->handle = idr_index;
502 } else {
503 if (!oldprog) {
504 ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index,
505 handle, handle + 1, GFP_KERNEL);
506 if (ret)
507 goto errout;
508 }
482 prog->handle = handle; 509 prog->handle = handle;
483 if (prog->handle == 0) {
484 ret = -EINVAL;
485 goto errout;
486 } 510 }
487 511
488 ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr); 512 ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr);
489 if (ret < 0) 513 if (ret < 0)
490 goto errout; 514 goto errout_idr;
491 515
492 ret = cls_bpf_offload(tp, prog, oldprog); 516 ret = cls_bpf_offload(tp, prog, oldprog);
493 if (ret) { 517 if (ret) {
518 if (!oldprog)
519 idr_remove_ext(&head->handle_idr, prog->handle);
494 __cls_bpf_delete_prog(prog); 520 __cls_bpf_delete_prog(prog);
495 return ret; 521 return ret;
496 } 522 }
@@ -499,8 +525,10 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
499 prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; 525 prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;
500 526
501 if (oldprog) { 527 if (oldprog) {
528 idr_replace_ext(&head->handle_idr, prog, handle);
502 list_replace_rcu(&oldprog->link, &prog->link); 529 list_replace_rcu(&oldprog->link, &prog->link);
503 tcf_unbind_filter(tp, &oldprog->res); 530 tcf_unbind_filter(tp, &oldprog->res);
531 tcf_exts_get_net(&oldprog->exts);
504 call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu); 532 call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu);
505 } else { 533 } else {
506 list_add_rcu(&prog->link, &head->plist); 534 list_add_rcu(&prog->link, &head->plist);
@@ -509,6 +537,9 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
509 *arg = prog; 537 *arg = prog;
510 return 0; 538 return 0;
511 539
540errout_idr:
541 if (!oldprog)
542 idr_remove_ext(&head->handle_idr, prog->handle);
512errout: 543errout:
513 tcf_exts_destroy(&prog->exts); 544 tcf_exts_destroy(&prog->exts);
514 kfree(prog); 545 kfree(prog);
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index d48452f87975..309d5899265f 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -23,7 +23,10 @@ struct cls_cgroup_head {
23 struct tcf_exts exts; 23 struct tcf_exts exts;
24 struct tcf_ematch_tree ematches; 24 struct tcf_ematch_tree ematches;
25 struct tcf_proto *tp; 25 struct tcf_proto *tp;
26 struct rcu_head rcu; 26 union {
27 struct work_struct work;
28 struct rcu_head rcu;
29 };
27}; 30};
28 31
29static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp, 32static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -57,15 +60,32 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
57 [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, 60 [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED },
58}; 61};
59 62
63static void __cls_cgroup_destroy(struct cls_cgroup_head *head)
64{
65 tcf_exts_destroy(&head->exts);
66 tcf_em_tree_destroy(&head->ematches);
67 tcf_exts_put_net(&head->exts);
68 kfree(head);
69}
70
71static void cls_cgroup_destroy_work(struct work_struct *work)
72{
73 struct cls_cgroup_head *head = container_of(work,
74 struct cls_cgroup_head,
75 work);
76 rtnl_lock();
77 __cls_cgroup_destroy(head);
78 rtnl_unlock();
79}
80
60static void cls_cgroup_destroy_rcu(struct rcu_head *root) 81static void cls_cgroup_destroy_rcu(struct rcu_head *root)
61{ 82{
62 struct cls_cgroup_head *head = container_of(root, 83 struct cls_cgroup_head *head = container_of(root,
63 struct cls_cgroup_head, 84 struct cls_cgroup_head,
64 rcu); 85 rcu);
65 86
66 tcf_exts_destroy(&head->exts); 87 INIT_WORK(&head->work, cls_cgroup_destroy_work);
67 tcf_em_tree_destroy(&head->ematches); 88 tcf_queue_work(&head->work);
68 kfree(head);
69} 89}
70 90
71static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, 91static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
@@ -110,8 +130,10 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
110 goto errout; 130 goto errout;
111 131
112 rcu_assign_pointer(tp->root, new); 132 rcu_assign_pointer(tp->root, new);
113 if (head) 133 if (head) {
134 tcf_exts_get_net(&head->exts);
114 call_rcu(&head->rcu, cls_cgroup_destroy_rcu); 135 call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
136 }
115 return 0; 137 return 0;
116errout: 138errout:
117 tcf_exts_destroy(&new->exts); 139 tcf_exts_destroy(&new->exts);
@@ -124,8 +146,12 @@ static void cls_cgroup_destroy(struct tcf_proto *tp)
124 struct cls_cgroup_head *head = rtnl_dereference(tp->root); 146 struct cls_cgroup_head *head = rtnl_dereference(tp->root);
125 147
126 /* Head can still be NULL due to cls_cgroup_init(). */ 148 /* Head can still be NULL due to cls_cgroup_init(). */
127 if (head) 149 if (head) {
128 call_rcu(&head->rcu, cls_cgroup_destroy_rcu); 150 if (tcf_exts_get_net(&head->exts))
151 call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
152 else
153 __cls_cgroup_destroy(head);
154 }
129} 155}
130 156
131static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last) 157static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last)
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 2a3a60ec5b86..25c2a888e1f0 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -57,7 +57,10 @@ struct flow_filter {
57 u32 divisor; 57 u32 divisor;
58 u32 baseclass; 58 u32 baseclass;
59 u32 hashrnd; 59 u32 hashrnd;
60 struct rcu_head rcu; 60 union {
61 struct work_struct work;
62 struct rcu_head rcu;
63 };
61}; 64};
62 65
63static inline u32 addr_fold(void *addr) 66static inline u32 addr_fold(void *addr)
@@ -345,9 +348,9 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
345 return -1; 348 return -1;
346} 349}
347 350
348static void flow_perturbation(unsigned long arg) 351static void flow_perturbation(struct timer_list *t)
349{ 352{
350 struct flow_filter *f = (struct flow_filter *)arg; 353 struct flow_filter *f = from_timer(f, t, perturb_timer);
351 354
352 get_random_bytes(&f->hashrnd, 4); 355 get_random_bytes(&f->hashrnd, 4);
353 if (f->perturb_period) 356 if (f->perturb_period)
@@ -369,16 +372,32 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
369 [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, 372 [TCA_FLOW_PERTURB] = { .type = NLA_U32 },
370}; 373};
371 374
372static void flow_destroy_filter(struct rcu_head *head) 375static void __flow_destroy_filter(struct flow_filter *f)
373{ 376{
374 struct flow_filter *f = container_of(head, struct flow_filter, rcu);
375
376 del_timer_sync(&f->perturb_timer); 377 del_timer_sync(&f->perturb_timer);
377 tcf_exts_destroy(&f->exts); 378 tcf_exts_destroy(&f->exts);
378 tcf_em_tree_destroy(&f->ematches); 379 tcf_em_tree_destroy(&f->ematches);
380 tcf_exts_put_net(&f->exts);
379 kfree(f); 381 kfree(f);
380} 382}
381 383
384static void flow_destroy_filter_work(struct work_struct *work)
385{
386 struct flow_filter *f = container_of(work, struct flow_filter, work);
387
388 rtnl_lock();
389 __flow_destroy_filter(f);
390 rtnl_unlock();
391}
392
393static void flow_destroy_filter(struct rcu_head *head)
394{
395 struct flow_filter *f = container_of(head, struct flow_filter, rcu);
396
397 INIT_WORK(&f->work, flow_destroy_filter_work);
398 tcf_queue_work(&f->work);
399}
400
382static int flow_change(struct net *net, struct sk_buff *in_skb, 401static int flow_change(struct net *net, struct sk_buff *in_skb,
383 struct tcf_proto *tp, unsigned long base, 402 struct tcf_proto *tp, unsigned long base,
384 u32 handle, struct nlattr **tca, 403 u32 handle, struct nlattr **tca,
@@ -491,8 +510,11 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
491 perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; 510 perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
492 } 511 }
493 512
494 if (TC_H_MAJ(baseclass) == 0) 513 if (TC_H_MAJ(baseclass) == 0) {
495 baseclass = TC_H_MAKE(tp->q->handle, baseclass); 514 struct Qdisc *q = tcf_block_q(tp->chain->block);
515
516 baseclass = TC_H_MAKE(q->handle, baseclass);
517 }
496 if (TC_H_MIN(baseclass) == 0) 518 if (TC_H_MIN(baseclass) == 0)
497 baseclass = TC_H_MAKE(baseclass, 1); 519 baseclass = TC_H_MAKE(baseclass, 1);
498 520
@@ -502,8 +524,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
502 get_random_bytes(&fnew->hashrnd, 4); 524 get_random_bytes(&fnew->hashrnd, 4);
503 } 525 }
504 526
505 setup_deferrable_timer(&fnew->perturb_timer, flow_perturbation, 527 timer_setup(&fnew->perturb_timer, flow_perturbation, TIMER_DEFERRABLE);
506 (unsigned long)fnew);
507 528
508 netif_keep_dst(qdisc_dev(tp->q)); 529 netif_keep_dst(qdisc_dev(tp->q));
509 530
@@ -539,8 +560,10 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
539 560
540 *arg = fnew; 561 *arg = fnew;
541 562
542 if (fold) 563 if (fold) {
564 tcf_exts_get_net(&fold->exts);
543 call_rcu(&fold->rcu, flow_destroy_filter); 565 call_rcu(&fold->rcu, flow_destroy_filter);
566 }
544 return 0; 567 return 0;
545 568
546err2: 569err2:
@@ -557,6 +580,7 @@ static int flow_delete(struct tcf_proto *tp, void *arg, bool *last)
557 struct flow_filter *f = arg; 580 struct flow_filter *f = arg;
558 581
559 list_del_rcu(&f->list); 582 list_del_rcu(&f->list);
583 tcf_exts_get_net(&f->exts);
560 call_rcu(&f->rcu, flow_destroy_filter); 584 call_rcu(&f->rcu, flow_destroy_filter);
561 *last = list_empty(&head->filters); 585 *last = list_empty(&head->filters);
562 return 0; 586 return 0;
@@ -581,7 +605,10 @@ static void flow_destroy(struct tcf_proto *tp)
581 605
582 list_for_each_entry_safe(f, next, &head->filters, list) { 606 list_for_each_entry_safe(f, next, &head->filters, list) {
583 list_del_rcu(&f->list); 607 list_del_rcu(&f->list);
584 call_rcu(&f->rcu, flow_destroy_filter); 608 if (tcf_exts_get_net(&f->exts))
609 call_rcu(&f->rcu, flow_destroy_filter);
610 else
611 __flow_destroy_filter(f);
585 } 612 }
586 kfree_rcu(head, rcu); 613 kfree_rcu(head, rcu);
587} 614}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 1a267e77c6de..543a3e875d05 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -87,7 +87,10 @@ struct cls_fl_filter {
87 struct list_head list; 87 struct list_head list;
88 u32 handle; 88 u32 handle;
89 u32 flags; 89 u32 flags;
90 struct rcu_head rcu; 90 union {
91 struct work_struct work;
92 struct rcu_head rcu;
93 };
91 struct net_device *hw_dev; 94 struct net_device *hw_dev;
92}; 95};
93 96
@@ -152,37 +155,12 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
152 struct cls_fl_filter *f; 155 struct cls_fl_filter *f;
153 struct fl_flow_key skb_key; 156 struct fl_flow_key skb_key;
154 struct fl_flow_key skb_mkey; 157 struct fl_flow_key skb_mkey;
155 struct ip_tunnel_info *info;
156 158
157 if (!atomic_read(&head->ht.nelems)) 159 if (!atomic_read(&head->ht.nelems))
158 return -1; 160 return -1;
159 161
160 fl_clear_masked_range(&skb_key, &head->mask); 162 fl_clear_masked_range(&skb_key, &head->mask);
161 163
162 info = skb_tunnel_info(skb);
163 if (info) {
164 struct ip_tunnel_key *key = &info->key;
165
166 switch (ip_tunnel_info_af(info)) {
167 case AF_INET:
168 skb_key.enc_control.addr_type =
169 FLOW_DISSECTOR_KEY_IPV4_ADDRS;
170 skb_key.enc_ipv4.src = key->u.ipv4.src;
171 skb_key.enc_ipv4.dst = key->u.ipv4.dst;
172 break;
173 case AF_INET6:
174 skb_key.enc_control.addr_type =
175 FLOW_DISSECTOR_KEY_IPV6_ADDRS;
176 skb_key.enc_ipv6.src = key->u.ipv6.src;
177 skb_key.enc_ipv6.dst = key->u.ipv6.dst;
178 break;
179 }
180
181 skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id);
182 skb_key.enc_tp.src = key->tp_src;
183 skb_key.enc_tp.dst = key->tp_dst;
184 }
185
186 skb_key.indev_ifindex = skb->skb_iif; 164 skb_key.indev_ifindex = skb->skb_iif;
187 /* skb_flow_dissect() does not set n_proto in case an unknown protocol, 165 /* skb_flow_dissect() does not set n_proto in case an unknown protocol,
188 * so do it rather here. 166 * so do it rather here.
@@ -215,27 +193,41 @@ static int fl_init(struct tcf_proto *tp)
215 return 0; 193 return 0;
216} 194}
217 195
196static void __fl_destroy_filter(struct cls_fl_filter *f)
197{
198 tcf_exts_destroy(&f->exts);
199 tcf_exts_put_net(&f->exts);
200 kfree(f);
201}
202
203static void fl_destroy_filter_work(struct work_struct *work)
204{
205 struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, work);
206
207 rtnl_lock();
208 __fl_destroy_filter(f);
209 rtnl_unlock();
210}
211
218static void fl_destroy_filter(struct rcu_head *head) 212static void fl_destroy_filter(struct rcu_head *head)
219{ 213{
220 struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu); 214 struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
221 215
222 tcf_exts_destroy(&f->exts); 216 INIT_WORK(&f->work, fl_destroy_filter_work);
223 kfree(f); 217 tcf_queue_work(&f->work);
224} 218}
225 219
226static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) 220static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
227{ 221{
228 struct tc_cls_flower_offload cls_flower = {}; 222 struct tc_cls_flower_offload cls_flower = {};
229 struct net_device *dev = f->hw_dev; 223 struct tcf_block *block = tp->chain->block;
230
231 if (!tc_can_offload(dev))
232 return;
233 224
234 tc_cls_common_offload_init(&cls_flower.common, tp); 225 tc_cls_common_offload_init(&cls_flower.common, tp);
235 cls_flower.command = TC_CLSFLOWER_DESTROY; 226 cls_flower.command = TC_CLSFLOWER_DESTROY;
236 cls_flower.cookie = (unsigned long) f; 227 cls_flower.cookie = (unsigned long) f;
237 228
238 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, &cls_flower); 229 tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
230 &cls_flower, false);
239} 231}
240 232
241static int fl_hw_replace_filter(struct tcf_proto *tp, 233static int fl_hw_replace_filter(struct tcf_proto *tp,
@@ -243,22 +235,11 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
243 struct fl_flow_key *mask, 235 struct fl_flow_key *mask,
244 struct cls_fl_filter *f) 236 struct cls_fl_filter *f)
245{ 237{
246 struct net_device *dev = tp->q->dev_queue->dev;
247 struct tc_cls_flower_offload cls_flower = {}; 238 struct tc_cls_flower_offload cls_flower = {};
239 struct tcf_block *block = tp->chain->block;
240 bool skip_sw = tc_skip_sw(f->flags);
248 int err; 241 int err;
249 242
250 if (!tc_can_offload(dev)) {
251 if (tcf_exts_get_dev(dev, &f->exts, &f->hw_dev) ||
252 (f->hw_dev && !tc_can_offload(f->hw_dev))) {
253 f->hw_dev = dev;
254 return tc_skip_sw(f->flags) ? -EINVAL : 0;
255 }
256 dev = f->hw_dev;
257 cls_flower.egress_dev = true;
258 } else {
259 f->hw_dev = dev;
260 }
261
262 tc_cls_common_offload_init(&cls_flower.common, tp); 243 tc_cls_common_offload_init(&cls_flower.common, tp);
263 cls_flower.command = TC_CLSFLOWER_REPLACE; 244 cls_flower.command = TC_CLSFLOWER_REPLACE;
264 cls_flower.cookie = (unsigned long) f; 245 cls_flower.cookie = (unsigned long) f;
@@ -266,32 +247,36 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
266 cls_flower.mask = mask; 247 cls_flower.mask = mask;
267 cls_flower.key = &f->mkey; 248 cls_flower.key = &f->mkey;
268 cls_flower.exts = &f->exts; 249 cls_flower.exts = &f->exts;
250 cls_flower.classid = f->res.classid;
269 251
270 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, 252 err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
271 &cls_flower); 253 &cls_flower, skip_sw);
272 if (!err) 254 if (err < 0) {
255 fl_hw_destroy_filter(tp, f);
256 return err;
257 } else if (err > 0) {
273 f->flags |= TCA_CLS_FLAGS_IN_HW; 258 f->flags |= TCA_CLS_FLAGS_IN_HW;
259 }
260
261 if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW))
262 return -EINVAL;
274 263
275 if (tc_skip_sw(f->flags))
276 return err;
277 return 0; 264 return 0;
278} 265}
279 266
280static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) 267static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
281{ 268{
282 struct tc_cls_flower_offload cls_flower = {}; 269 struct tc_cls_flower_offload cls_flower = {};
283 struct net_device *dev = f->hw_dev; 270 struct tcf_block *block = tp->chain->block;
284
285 if (!tc_can_offload(dev))
286 return;
287 271
288 tc_cls_common_offload_init(&cls_flower.common, tp); 272 tc_cls_common_offload_init(&cls_flower.common, tp);
289 cls_flower.command = TC_CLSFLOWER_STATS; 273 cls_flower.command = TC_CLSFLOWER_STATS;
290 cls_flower.cookie = (unsigned long) f; 274 cls_flower.cookie = (unsigned long) f;
291 cls_flower.exts = &f->exts; 275 cls_flower.exts = &f->exts;
276 cls_flower.classid = f->res.classid;
292 277
293 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, 278 tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
294 &cls_flower); 279 &cls_flower, false);
295} 280}
296 281
297static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) 282static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
@@ -303,7 +288,10 @@ static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
303 if (!tc_skip_hw(f->flags)) 288 if (!tc_skip_hw(f->flags))
304 fl_hw_destroy_filter(tp, f); 289 fl_hw_destroy_filter(tp, f);
305 tcf_unbind_filter(tp, &f->res); 290 tcf_unbind_filter(tp, &f->res);
306 call_rcu(&f->rcu, fl_destroy_filter); 291 if (tcf_exts_get_net(&f->exts))
292 call_rcu(&f->rcu, fl_destroy_filter);
293 else
294 __fl_destroy_filter(f);
307} 295}
308 296
309static void fl_destroy_sleepable(struct work_struct *work) 297static void fl_destroy_sleepable(struct work_struct *work)
@@ -922,28 +910,28 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
922 910
923 if (!tc_flags_valid(fnew->flags)) { 911 if (!tc_flags_valid(fnew->flags)) {
924 err = -EINVAL; 912 err = -EINVAL;
925 goto errout; 913 goto errout_idr;
926 } 914 }
927 } 915 }
928 916
929 err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr); 917 err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
930 if (err) 918 if (err)
931 goto errout; 919 goto errout_idr;
932 920
933 err = fl_check_assign_mask(head, &mask); 921 err = fl_check_assign_mask(head, &mask);
934 if (err) 922 if (err)
935 goto errout; 923 goto errout_idr;
936 924
937 if (!tc_skip_sw(fnew->flags)) { 925 if (!tc_skip_sw(fnew->flags)) {
938 if (!fold && fl_lookup(head, &fnew->mkey)) { 926 if (!fold && fl_lookup(head, &fnew->mkey)) {
939 err = -EEXIST; 927 err = -EEXIST;
940 goto errout; 928 goto errout_idr;
941 } 929 }
942 930
943 err = rhashtable_insert_fast(&head->ht, &fnew->ht_node, 931 err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
944 head->ht_params); 932 head->ht_params);
945 if (err) 933 if (err)
946 goto errout; 934 goto errout_idr;
947 } 935 }
948 936
949 if (!tc_skip_hw(fnew->flags)) { 937 if (!tc_skip_hw(fnew->flags)) {
@@ -952,7 +940,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
952 &mask.key, 940 &mask.key,
953 fnew); 941 fnew);
954 if (err) 942 if (err)
955 goto errout; 943 goto errout_idr;
956 } 944 }
957 945
958 if (!tc_in_hw(fnew->flags)) 946 if (!tc_in_hw(fnew->flags))
@@ -973,6 +961,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
973 idr_replace_ext(&head->handle_idr, fnew, fnew->handle); 961 idr_replace_ext(&head->handle_idr, fnew, fnew->handle);
974 list_replace_rcu(&fold->list, &fnew->list); 962 list_replace_rcu(&fold->list, &fnew->list);
975 tcf_unbind_filter(tp, &fold->res); 963 tcf_unbind_filter(tp, &fold->res);
964 tcf_exts_get_net(&fold->exts);
976 call_rcu(&fold->rcu, fl_destroy_filter); 965 call_rcu(&fold->rcu, fl_destroy_filter);
977 } else { 966 } else {
978 list_add_tail_rcu(&fnew->list, &head->filters); 967 list_add_tail_rcu(&fnew->list, &head->filters);
@@ -981,6 +970,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
981 kfree(tb); 970 kfree(tb);
982 return 0; 971 return 0;
983 972
973errout_idr:
974 if (fnew->handle)
975 idr_remove_ext(&head->handle_idr, fnew->handle);
984errout: 976errout:
985 tcf_exts_destroy(&fnew->exts); 977 tcf_exts_destroy(&fnew->exts);
986 kfree(fnew); 978 kfree(fnew);
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 941245ad07fd..20f0de1a960a 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -28,6 +28,7 @@
28#include <net/netlink.h> 28#include <net/netlink.h>
29#include <net/act_api.h> 29#include <net/act_api.h>
30#include <net/pkt_cls.h> 30#include <net/pkt_cls.h>
31#include <net/sch_generic.h>
31 32
32#define HTSIZE 256 33#define HTSIZE 256
33 34
@@ -46,7 +47,10 @@ struct fw_filter {
46#endif /* CONFIG_NET_CLS_IND */ 47#endif /* CONFIG_NET_CLS_IND */
47 struct tcf_exts exts; 48 struct tcf_exts exts;
48 struct tcf_proto *tp; 49 struct tcf_proto *tp;
49 struct rcu_head rcu; 50 union {
51 struct work_struct work;
52 struct rcu_head rcu;
53 };
50}; 54};
51 55
52static u32 fw_hash(u32 handle) 56static u32 fw_hash(u32 handle)
@@ -83,9 +87,11 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
83 } 87 }
84 } 88 }
85 } else { 89 } else {
90 struct Qdisc *q = tcf_block_q(tp->chain->block);
91
86 /* Old method: classify the packet using its skb mark. */ 92 /* Old method: classify the packet using its skb mark. */
87 if (id && (TC_H_MAJ(id) == 0 || 93 if (id && (TC_H_MAJ(id) == 0 ||
88 !(TC_H_MAJ(id ^ tp->q->handle)))) { 94 !(TC_H_MAJ(id ^ q->handle)))) {
89 res->classid = id; 95 res->classid = id;
90 res->class = 0; 96 res->class = 0;
91 return 0; 97 return 0;
@@ -119,12 +125,28 @@ static int fw_init(struct tcf_proto *tp)
119 return 0; 125 return 0;
120} 126}
121 127
128static void __fw_delete_filter(struct fw_filter *f)
129{
130 tcf_exts_destroy(&f->exts);
131 tcf_exts_put_net(&f->exts);
132 kfree(f);
133}
134
135static void fw_delete_filter_work(struct work_struct *work)
136{
137 struct fw_filter *f = container_of(work, struct fw_filter, work);
138
139 rtnl_lock();
140 __fw_delete_filter(f);
141 rtnl_unlock();
142}
143
122static void fw_delete_filter(struct rcu_head *head) 144static void fw_delete_filter(struct rcu_head *head)
123{ 145{
124 struct fw_filter *f = container_of(head, struct fw_filter, rcu); 146 struct fw_filter *f = container_of(head, struct fw_filter, rcu);
125 147
126 tcf_exts_destroy(&f->exts); 148 INIT_WORK(&f->work, fw_delete_filter_work);
127 kfree(f); 149 tcf_queue_work(&f->work);
128} 150}
129 151
130static void fw_destroy(struct tcf_proto *tp) 152static void fw_destroy(struct tcf_proto *tp)
@@ -141,7 +163,10 @@ static void fw_destroy(struct tcf_proto *tp)
141 RCU_INIT_POINTER(head->ht[h], 163 RCU_INIT_POINTER(head->ht[h],
142 rtnl_dereference(f->next)); 164 rtnl_dereference(f->next));
143 tcf_unbind_filter(tp, &f->res); 165 tcf_unbind_filter(tp, &f->res);
144 call_rcu(&f->rcu, fw_delete_filter); 166 if (tcf_exts_get_net(&f->exts))
167 call_rcu(&f->rcu, fw_delete_filter);
168 else
169 __fw_delete_filter(f);
145 } 170 }
146 } 171 }
147 kfree_rcu(head, rcu); 172 kfree_rcu(head, rcu);
@@ -166,6 +191,7 @@ static int fw_delete(struct tcf_proto *tp, void *arg, bool *last)
166 if (pfp == f) { 191 if (pfp == f) {
167 RCU_INIT_POINTER(*fp, rtnl_dereference(f->next)); 192 RCU_INIT_POINTER(*fp, rtnl_dereference(f->next));
168 tcf_unbind_filter(tp, &f->res); 193 tcf_unbind_filter(tp, &f->res);
194 tcf_exts_get_net(&f->exts);
169 call_rcu(&f->rcu, fw_delete_filter); 195 call_rcu(&f->rcu, fw_delete_filter);
170 ret = 0; 196 ret = 0;
171 break; 197 break;
@@ -286,6 +312,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
286 RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next)); 312 RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next));
287 rcu_assign_pointer(*fp, fnew); 313 rcu_assign_pointer(*fp, fnew);
288 tcf_unbind_filter(tp, &f->res); 314 tcf_unbind_filter(tp, &f->res);
315 tcf_exts_get_net(&f->exts);
289 call_rcu(&f->rcu, fw_delete_filter); 316 call_rcu(&f->rcu, fw_delete_filter);
290 317
291 *arg = fnew; 318 *arg = fnew;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 21cc45caf842..66d4e0099158 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -21,7 +21,10 @@ struct cls_mall_head {
21 struct tcf_result res; 21 struct tcf_result res;
22 u32 handle; 22 u32 handle;
23 u32 flags; 23 u32 flags;
24 struct rcu_head rcu; 24 union {
25 struct work_struct work;
26 struct rcu_head rcu;
27 };
25}; 28};
26 29
27static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, 30static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -32,6 +35,7 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
32 if (tc_skip_sw(head->flags)) 35 if (tc_skip_sw(head->flags))
33 return -1; 36 return -1;
34 37
38 *res = head->res;
35 return tcf_exts_exec(skb, &head->exts, res); 39 return tcf_exts_exec(skb, &head->exts, res);
36} 40}
37 41
@@ -40,21 +44,52 @@ static int mall_init(struct tcf_proto *tp)
40 return 0; 44 return 0;
41} 45}
42 46
47static void __mall_destroy(struct cls_mall_head *head)
48{
49 tcf_exts_destroy(&head->exts);
50 tcf_exts_put_net(&head->exts);
51 kfree(head);
52}
53
54static void mall_destroy_work(struct work_struct *work)
55{
56 struct cls_mall_head *head = container_of(work, struct cls_mall_head,
57 work);
58 rtnl_lock();
59 __mall_destroy(head);
60 rtnl_unlock();
61}
62
43static void mall_destroy_rcu(struct rcu_head *rcu) 63static void mall_destroy_rcu(struct rcu_head *rcu)
44{ 64{
45 struct cls_mall_head *head = container_of(rcu, struct cls_mall_head, 65 struct cls_mall_head *head = container_of(rcu, struct cls_mall_head,
46 rcu); 66 rcu);
47 67
48 tcf_exts_destroy(&head->exts); 68 INIT_WORK(&head->work, mall_destroy_work);
49 kfree(head); 69 tcf_queue_work(&head->work);
70}
71
72static void mall_destroy_hw_filter(struct tcf_proto *tp,
73 struct cls_mall_head *head,
74 unsigned long cookie)
75{
76 struct tc_cls_matchall_offload cls_mall = {};
77 struct tcf_block *block = tp->chain->block;
78
79 tc_cls_common_offload_init(&cls_mall.common, tp);
80 cls_mall.command = TC_CLSMATCHALL_DESTROY;
81 cls_mall.cookie = cookie;
82
83 tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, false);
50} 84}
51 85
52static int mall_replace_hw_filter(struct tcf_proto *tp, 86static int mall_replace_hw_filter(struct tcf_proto *tp,
53 struct cls_mall_head *head, 87 struct cls_mall_head *head,
54 unsigned long cookie) 88 unsigned long cookie)
55{ 89{
56 struct net_device *dev = tp->q->dev_queue->dev;
57 struct tc_cls_matchall_offload cls_mall = {}; 90 struct tc_cls_matchall_offload cls_mall = {};
91 struct tcf_block *block = tp->chain->block;
92 bool skip_sw = tc_skip_sw(head->flags);
58 int err; 93 int err;
59 94
60 tc_cls_common_offload_init(&cls_mall.common, tp); 95 tc_cls_common_offload_init(&cls_mall.common, tp);
@@ -62,40 +97,35 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
62 cls_mall.exts = &head->exts; 97 cls_mall.exts = &head->exts;
63 cls_mall.cookie = cookie; 98 cls_mall.cookie = cookie;
64 99
65 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL, 100 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL,
66 &cls_mall); 101 &cls_mall, skip_sw);
67 if (!err) 102 if (err < 0) {
103 mall_destroy_hw_filter(tp, head, cookie);
104 return err;
105 } else if (err > 0) {
68 head->flags |= TCA_CLS_FLAGS_IN_HW; 106 head->flags |= TCA_CLS_FLAGS_IN_HW;
107 }
69 108
70 return err; 109 if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW))
71} 110 return -EINVAL;
72
73static void mall_destroy_hw_filter(struct tcf_proto *tp,
74 struct cls_mall_head *head,
75 unsigned long cookie)
76{
77 struct net_device *dev = tp->q->dev_queue->dev;
78 struct tc_cls_matchall_offload cls_mall = {};
79
80 tc_cls_common_offload_init(&cls_mall.common, tp);
81 cls_mall.command = TC_CLSMATCHALL_DESTROY;
82 cls_mall.cookie = cookie;
83 111
84 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL, &cls_mall); 112 return 0;
85} 113}
86 114
87static void mall_destroy(struct tcf_proto *tp) 115static void mall_destroy(struct tcf_proto *tp)
88{ 116{
89 struct cls_mall_head *head = rtnl_dereference(tp->root); 117 struct cls_mall_head *head = rtnl_dereference(tp->root);
90 struct net_device *dev = tp->q->dev_queue->dev;
91 118
92 if (!head) 119 if (!head)
93 return; 120 return;
94 121
95 if (tc_should_offload(dev, head->flags)) 122 if (!tc_skip_hw(head->flags))
96 mall_destroy_hw_filter(tp, head, (unsigned long) head); 123 mall_destroy_hw_filter(tp, head, (unsigned long) head);
97 124
98 call_rcu(&head->rcu, mall_destroy_rcu); 125 if (tcf_exts_get_net(&head->exts))
126 call_rcu(&head->rcu, mall_destroy_rcu);
127 else
128 __mall_destroy(head);
99} 129}
100 130
101static void *mall_get(struct tcf_proto *tp, u32 handle) 131static void *mall_get(struct tcf_proto *tp, u32 handle)
@@ -132,7 +162,6 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
132 void **arg, bool ovr) 162 void **arg, bool ovr)
133{ 163{
134 struct cls_mall_head *head = rtnl_dereference(tp->root); 164 struct cls_mall_head *head = rtnl_dereference(tp->root);
135 struct net_device *dev = tp->q->dev_queue->dev;
136 struct nlattr *tb[TCA_MATCHALL_MAX + 1]; 165 struct nlattr *tb[TCA_MATCHALL_MAX + 1];
137 struct cls_mall_head *new; 166 struct cls_mall_head *new;
138 u32 flags = 0; 167 u32 flags = 0;
@@ -172,14 +201,10 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
172 if (err) 201 if (err)
173 goto err_set_parms; 202 goto err_set_parms;
174 203
175 if (tc_should_offload(dev, flags)) { 204 if (!tc_skip_hw(new->flags)) {
176 err = mall_replace_hw_filter(tp, new, (unsigned long) new); 205 err = mall_replace_hw_filter(tp, new, (unsigned long) new);
177 if (err) { 206 if (err)
178 if (tc_skip_sw(flags)) 207 goto err_replace_hw_filter;
179 goto err_replace_hw_filter;
180 else
181 err = 0;
182 }
183 } 208 }
184 209
185 if (!tc_in_hw(new->flags)) 210 if (!tc_in_hw(new->flags))
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 9ddde65915d2..ac9a5b8825b9 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -57,7 +57,10 @@ struct route4_filter {
57 u32 handle; 57 u32 handle;
58 struct route4_bucket *bkt; 58 struct route4_bucket *bkt;
59 struct tcf_proto *tp; 59 struct tcf_proto *tp;
60 struct rcu_head rcu; 60 union {
61 struct work_struct work;
62 struct rcu_head rcu;
63 };
61}; 64};
62 65
63#define ROUTE4_FAILURE ((struct route4_filter *)(-1L)) 66#define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
@@ -254,12 +257,28 @@ static int route4_init(struct tcf_proto *tp)
254 return 0; 257 return 0;
255} 258}
256 259
260static void __route4_delete_filter(struct route4_filter *f)
261{
262 tcf_exts_destroy(&f->exts);
263 tcf_exts_put_net(&f->exts);
264 kfree(f);
265}
266
267static void route4_delete_filter_work(struct work_struct *work)
268{
269 struct route4_filter *f = container_of(work, struct route4_filter, work);
270
271 rtnl_lock();
272 __route4_delete_filter(f);
273 rtnl_unlock();
274}
275
257static void route4_delete_filter(struct rcu_head *head) 276static void route4_delete_filter(struct rcu_head *head)
258{ 277{
259 struct route4_filter *f = container_of(head, struct route4_filter, rcu); 278 struct route4_filter *f = container_of(head, struct route4_filter, rcu);
260 279
261 tcf_exts_destroy(&f->exts); 280 INIT_WORK(&f->work, route4_delete_filter_work);
262 kfree(f); 281 tcf_queue_work(&f->work);
263} 282}
264 283
265static void route4_destroy(struct tcf_proto *tp) 284static void route4_destroy(struct tcf_proto *tp)
@@ -284,7 +303,10 @@ static void route4_destroy(struct tcf_proto *tp)
284 next = rtnl_dereference(f->next); 303 next = rtnl_dereference(f->next);
285 RCU_INIT_POINTER(b->ht[h2], next); 304 RCU_INIT_POINTER(b->ht[h2], next);
286 tcf_unbind_filter(tp, &f->res); 305 tcf_unbind_filter(tp, &f->res);
287 call_rcu(&f->rcu, route4_delete_filter); 306 if (tcf_exts_get_net(&f->exts))
307 call_rcu(&f->rcu, route4_delete_filter);
308 else
309 __route4_delete_filter(f);
288 } 310 }
289 } 311 }
290 RCU_INIT_POINTER(head->table[h1], NULL); 312 RCU_INIT_POINTER(head->table[h1], NULL);
@@ -325,6 +347,7 @@ static int route4_delete(struct tcf_proto *tp, void *arg, bool *last)
325 347
326 /* Delete it */ 348 /* Delete it */
327 tcf_unbind_filter(tp, &f->res); 349 tcf_unbind_filter(tp, &f->res);
350 tcf_exts_get_net(&f->exts);
328 call_rcu(&f->rcu, route4_delete_filter); 351 call_rcu(&f->rcu, route4_delete_filter);
329 352
330 /* Strip RTNL protected tree */ 353 /* Strip RTNL protected tree */
@@ -528,6 +551,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
528 *arg = f; 551 *arg = f;
529 if (fold) { 552 if (fold) {
530 tcf_unbind_filter(tp, &fold->res); 553 tcf_unbind_filter(tp, &fold->res);
554 tcf_exts_get_net(&fold->exts);
531 call_rcu(&fold->rcu, route4_delete_filter); 555 call_rcu(&fold->rcu, route4_delete_filter);
532 } 556 }
533 return 0; 557 return 0;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index b1f6ed48bc72..cf325625c99d 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -97,7 +97,10 @@ struct rsvp_filter {
97 97
98 u32 handle; 98 u32 handle;
99 struct rsvp_session *sess; 99 struct rsvp_session *sess;
100 struct rcu_head rcu; 100 union {
101 struct work_struct work;
102 struct rcu_head rcu;
103 };
101}; 104};
102 105
103static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) 106static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
@@ -282,12 +285,28 @@ static int rsvp_init(struct tcf_proto *tp)
282 return -ENOBUFS; 285 return -ENOBUFS;
283} 286}
284 287
288static void __rsvp_delete_filter(struct rsvp_filter *f)
289{
290 tcf_exts_destroy(&f->exts);
291 tcf_exts_put_net(&f->exts);
292 kfree(f);
293}
294
295static void rsvp_delete_filter_work(struct work_struct *work)
296{
297 struct rsvp_filter *f = container_of(work, struct rsvp_filter, work);
298
299 rtnl_lock();
300 __rsvp_delete_filter(f);
301 rtnl_unlock();
302}
303
285static void rsvp_delete_filter_rcu(struct rcu_head *head) 304static void rsvp_delete_filter_rcu(struct rcu_head *head)
286{ 305{
287 struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu); 306 struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu);
288 307
289 tcf_exts_destroy(&f->exts); 308 INIT_WORK(&f->work, rsvp_delete_filter_work);
290 kfree(f); 309 tcf_queue_work(&f->work);
291} 310}
292 311
293static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) 312static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
@@ -297,7 +316,10 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
297 * grace period, since converted-to-rcu actions are relying on that 316 * grace period, since converted-to-rcu actions are relying on that
298 * in cleanup() callback 317 * in cleanup() callback
299 */ 318 */
300 call_rcu(&f->rcu, rsvp_delete_filter_rcu); 319 if (tcf_exts_get_net(&f->exts))
320 call_rcu(&f->rcu, rsvp_delete_filter_rcu);
321 else
322 __rsvp_delete_filter(f);
301} 323}
302 324
303static void rsvp_destroy(struct tcf_proto *tp) 325static void rsvp_destroy(struct tcf_proto *tp)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 14a7e08b2fa9..67467ae24c97 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -13,6 +13,7 @@
13#include <net/act_api.h> 13#include <net/act_api.h>
14#include <net/netlink.h> 14#include <net/netlink.h>
15#include <net/pkt_cls.h> 15#include <net/pkt_cls.h>
16#include <net/sch_generic.h>
16 17
17/* 18/*
18 * Passing parameters to the root seems to be done more awkwardly than really 19 * Passing parameters to the root seems to be done more awkwardly than really
@@ -27,14 +28,20 @@
27struct tcindex_filter_result { 28struct tcindex_filter_result {
28 struct tcf_exts exts; 29 struct tcf_exts exts;
29 struct tcf_result res; 30 struct tcf_result res;
30 struct rcu_head rcu; 31 union {
32 struct work_struct work;
33 struct rcu_head rcu;
34 };
31}; 35};
32 36
33struct tcindex_filter { 37struct tcindex_filter {
34 u16 key; 38 u16 key;
35 struct tcindex_filter_result result; 39 struct tcindex_filter_result result;
36 struct tcindex_filter __rcu *next; 40 struct tcindex_filter __rcu *next;
37 struct rcu_head rcu; 41 union {
42 struct work_struct work;
43 struct rcu_head rcu;
44 };
38}; 45};
39 46
40 47
@@ -90,9 +97,11 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
90 97
91 f = tcindex_lookup(p, key); 98 f = tcindex_lookup(p, key);
92 if (!f) { 99 if (!f) {
100 struct Qdisc *q = tcf_block_q(tp->chain->block);
101
93 if (!p->fall_through) 102 if (!p->fall_through)
94 return -1; 103 return -1;
95 res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key); 104 res->classid = TC_H_MAKE(TC_H_MAJ(q->handle), key);
96 res->class = 0; 105 res->class = 0;
97 pr_debug("alg 0x%x\n", res->classid); 106 pr_debug("alg 0x%x\n", res->classid);
98 return 0; 107 return 0;
@@ -133,12 +142,46 @@ static int tcindex_init(struct tcf_proto *tp)
133 return 0; 142 return 0;
134} 143}
135 144
145static void __tcindex_destroy_rexts(struct tcindex_filter_result *r)
146{
147 tcf_exts_destroy(&r->exts);
148 tcf_exts_put_net(&r->exts);
149}
150
151static void tcindex_destroy_rexts_work(struct work_struct *work)
152{
153 struct tcindex_filter_result *r;
154
155 r = container_of(work, struct tcindex_filter_result, work);
156 rtnl_lock();
157 __tcindex_destroy_rexts(r);
158 rtnl_unlock();
159}
160
136static void tcindex_destroy_rexts(struct rcu_head *head) 161static void tcindex_destroy_rexts(struct rcu_head *head)
137{ 162{
138 struct tcindex_filter_result *r; 163 struct tcindex_filter_result *r;
139 164
140 r = container_of(head, struct tcindex_filter_result, rcu); 165 r = container_of(head, struct tcindex_filter_result, rcu);
141 tcf_exts_destroy(&r->exts); 166 INIT_WORK(&r->work, tcindex_destroy_rexts_work);
167 tcf_queue_work(&r->work);
168}
169
170static void __tcindex_destroy_fexts(struct tcindex_filter *f)
171{
172 tcf_exts_destroy(&f->result.exts);
173 tcf_exts_put_net(&f->result.exts);
174 kfree(f);
175}
176
177static void tcindex_destroy_fexts_work(struct work_struct *work)
178{
179 struct tcindex_filter *f = container_of(work, struct tcindex_filter,
180 work);
181
182 rtnl_lock();
183 __tcindex_destroy_fexts(f);
184 rtnl_unlock();
142} 185}
143 186
144static void tcindex_destroy_fexts(struct rcu_head *head) 187static void tcindex_destroy_fexts(struct rcu_head *head)
@@ -146,8 +189,8 @@ static void tcindex_destroy_fexts(struct rcu_head *head)
146 struct tcindex_filter *f = container_of(head, struct tcindex_filter, 189 struct tcindex_filter *f = container_of(head, struct tcindex_filter,
147 rcu); 190 rcu);
148 191
149 tcf_exts_destroy(&f->result.exts); 192 INIT_WORK(&f->work, tcindex_destroy_fexts_work);
150 kfree(f); 193 tcf_queue_work(&f->work);
151} 194}
152 195
153static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last) 196static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last)
@@ -182,10 +225,17 @@ found:
182 * grace period, since converted-to-rcu actions are relying on that 225 * grace period, since converted-to-rcu actions are relying on that
183 * in cleanup() callback 226 * in cleanup() callback
184 */ 227 */
185 if (f) 228 if (f) {
186 call_rcu(&f->rcu, tcindex_destroy_fexts); 229 if (tcf_exts_get_net(&f->result.exts))
187 else 230 call_rcu(&f->rcu, tcindex_destroy_fexts);
188 call_rcu(&r->rcu, tcindex_destroy_rexts); 231 else
232 __tcindex_destroy_fexts(f);
233 } else {
234 if (tcf_exts_get_net(&r->exts))
235 call_rcu(&r->rcu, tcindex_destroy_rexts);
236 else
237 __tcindex_destroy_rexts(r);
238 }
189 239
190 *last = false; 240 *last = false;
191 return 0; 241 return 0;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 10b8d851fc6b..ac152b4f4247 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -46,6 +46,7 @@
46#include <net/act_api.h> 46#include <net/act_api.h>
47#include <net/pkt_cls.h> 47#include <net/pkt_cls.h>
48#include <linux/netdevice.h> 48#include <linux/netdevice.h>
49#include <linux/idr.h>
49 50
50struct tc_u_knode { 51struct tc_u_knode {
51 struct tc_u_knode __rcu *next; 52 struct tc_u_knode __rcu *next;
@@ -68,7 +69,10 @@ struct tc_u_knode {
68 u32 __percpu *pcpu_success; 69 u32 __percpu *pcpu_success;
69#endif 70#endif
70 struct tcf_proto *tp; 71 struct tcf_proto *tp;
71 struct rcu_head rcu; 72 union {
73 struct work_struct work;
74 struct rcu_head rcu;
75 };
72 /* The 'sel' field MUST be the last field in structure to allow for 76 /* The 'sel' field MUST be the last field in structure to allow for
73 * tc_u32_keys allocated at end of structure. 77 * tc_u32_keys allocated at end of structure.
74 */ 78 */
@@ -82,6 +86,7 @@ struct tc_u_hnode {
82 struct tc_u_common *tp_c; 86 struct tc_u_common *tp_c;
83 int refcnt; 87 int refcnt;
84 unsigned int divisor; 88 unsigned int divisor;
89 struct idr handle_idr;
85 struct rcu_head rcu; 90 struct rcu_head rcu;
86 /* The 'ht' field MUST be the last field in structure to allow for 91 /* The 'ht' field MUST be the last field in structure to allow for
87 * more entries allocated at end of structure. 92 * more entries allocated at end of structure.
@@ -91,9 +96,9 @@ struct tc_u_hnode {
91 96
92struct tc_u_common { 97struct tc_u_common {
93 struct tc_u_hnode __rcu *hlist; 98 struct tc_u_hnode __rcu *hlist;
94 struct Qdisc *q; 99 struct tcf_block *block;
95 int refcnt; 100 int refcnt;
96 u32 hgenerator; 101 struct idr handle_idr;
97 struct hlist_node hnode; 102 struct hlist_node hnode;
98 struct rcu_head rcu; 103 struct rcu_head rcu;
99}; 104};
@@ -311,19 +316,19 @@ static void *u32_get(struct tcf_proto *tp, u32 handle)
311 return u32_lookup_key(ht, handle); 316 return u32_lookup_key(ht, handle);
312} 317}
313 318
314static u32 gen_new_htid(struct tc_u_common *tp_c) 319static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr)
315{ 320{
316 int i = 0x800; 321 unsigned long idr_index;
322 int err;
317 323
318 /* hgenerator only used inside rtnl lock it is safe to increment 324 /* This is only used inside rtnl lock it is safe to increment
319 * without read _copy_ update semantics 325 * without read _copy_ update semantics
320 */ 326 */
321 do { 327 err = idr_alloc_ext(&tp_c->handle_idr, ptr, &idr_index,
322 if (++tp_c->hgenerator == 0x7FF) 328 1, 0x7FF, GFP_KERNEL);
323 tp_c->hgenerator = 1; 329 if (err)
324 } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20)); 330 return 0;
325 331 return (u32)(idr_index | 0x800) << 20;
326 return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
327} 332}
328 333
329static struct hlist_head *tc_u_common_hash; 334static struct hlist_head *tc_u_common_hash;
@@ -333,11 +338,7 @@ static struct hlist_head *tc_u_common_hash;
333 338
334static unsigned int tc_u_hash(const struct tcf_proto *tp) 339static unsigned int tc_u_hash(const struct tcf_proto *tp)
335{ 340{
336 struct net_device *dev = tp->q->dev_queue->dev; 341 return hash_ptr(tp->chain->block, U32_HASH_SHIFT);
337 u32 qhandle = tp->q->handle;
338 int ifindex = dev->ifindex;
339
340 return hash_64((u64)ifindex << 32 | qhandle, U32_HASH_SHIFT);
341} 342}
342 343
343static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp) 344static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
@@ -347,7 +348,7 @@ static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
347 348
348 h = tc_u_hash(tp); 349 h = tc_u_hash(tp);
349 hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) { 350 hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
350 if (tc->q == tp->q) 351 if (tc->block == tp->chain->block)
351 return tc; 352 return tc;
352 } 353 }
353 return NULL; 354 return NULL;
@@ -366,8 +367,9 @@ static int u32_init(struct tcf_proto *tp)
366 return -ENOBUFS; 367 return -ENOBUFS;
367 368
368 root_ht->refcnt++; 369 root_ht->refcnt++;
369 root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000; 370 root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
370 root_ht->prio = tp->prio; 371 root_ht->prio = tp->prio;
372 idr_init(&root_ht->handle_idr);
371 373
372 if (tp_c == NULL) { 374 if (tp_c == NULL) {
373 tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL); 375 tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
@@ -375,8 +377,9 @@ static int u32_init(struct tcf_proto *tp)
375 kfree(root_ht); 377 kfree(root_ht);
376 return -ENOBUFS; 378 return -ENOBUFS;
377 } 379 }
378 tp_c->q = tp->q; 380 tp_c->block = tp->chain->block;
379 INIT_HLIST_NODE(&tp_c->hnode); 381 INIT_HLIST_NODE(&tp_c->hnode);
382 idr_init(&tp_c->handle_idr);
380 383
381 h = tc_u_hash(tp); 384 h = tc_u_hash(tp);
382 hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]); 385 hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]);
@@ -396,6 +399,7 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
396 bool free_pf) 399 bool free_pf)
397{ 400{
398 tcf_exts_destroy(&n->exts); 401 tcf_exts_destroy(&n->exts);
402 tcf_exts_put_net(&n->exts);
399 if (n->ht_down) 403 if (n->ht_down)
400 n->ht_down->refcnt--; 404 n->ht_down->refcnt--;
401#ifdef CONFIG_CLS_U32_PERF 405#ifdef CONFIG_CLS_U32_PERF
@@ -418,11 +422,21 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
418 * this the u32_delete_key_rcu variant does not free the percpu 422 * this the u32_delete_key_rcu variant does not free the percpu
419 * statistics. 423 * statistics.
420 */ 424 */
425static void u32_delete_key_work(struct work_struct *work)
426{
427 struct tc_u_knode *key = container_of(work, struct tc_u_knode, work);
428
429 rtnl_lock();
430 u32_destroy_key(key->tp, key, false);
431 rtnl_unlock();
432}
433
421static void u32_delete_key_rcu(struct rcu_head *rcu) 434static void u32_delete_key_rcu(struct rcu_head *rcu)
422{ 435{
423 struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); 436 struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
424 437
425 u32_destroy_key(key->tp, key, false); 438 INIT_WORK(&key->work, u32_delete_key_work);
439 tcf_queue_work(&key->work);
426} 440}
427 441
428/* u32_delete_key_freepf_rcu is the rcu callback variant 442/* u32_delete_key_freepf_rcu is the rcu callback variant
@@ -432,11 +446,21 @@ static void u32_delete_key_rcu(struct rcu_head *rcu)
432 * for the variant that should be used with keys return from 446 * for the variant that should be used with keys return from
433 * u32_init_knode() 447 * u32_init_knode()
434 */ 448 */
449static void u32_delete_key_freepf_work(struct work_struct *work)
450{
451 struct tc_u_knode *key = container_of(work, struct tc_u_knode, work);
452
453 rtnl_lock();
454 u32_destroy_key(key->tp, key, true);
455 rtnl_unlock();
456}
457
435static void u32_delete_key_freepf_rcu(struct rcu_head *rcu) 458static void u32_delete_key_freepf_rcu(struct rcu_head *rcu)
436{ 459{
437 struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); 460 struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
438 461
439 u32_destroy_key(key->tp, key, true); 462 INIT_WORK(&key->work, u32_delete_key_freepf_work);
463 tcf_queue_work(&key->work);
440} 464}
441 465
442static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) 466static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
@@ -453,6 +477,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
453 RCU_INIT_POINTER(*kp, key->next); 477 RCU_INIT_POINTER(*kp, key->next);
454 478
455 tcf_unbind_filter(tp, &key->res); 479 tcf_unbind_filter(tp, &key->res);
480 tcf_exts_get_net(&key->exts);
456 call_rcu(&key->rcu, u32_delete_key_freepf_rcu); 481 call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
457 return 0; 482 return 0;
458 } 483 }
@@ -462,71 +487,69 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
462 return 0; 487 return 0;
463} 488}
464 489
465static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) 490static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
466{ 491{
467 struct net_device *dev = tp->q->dev_queue->dev; 492 struct tcf_block *block = tp->chain->block;
468 struct tc_cls_u32_offload cls_u32 = {}; 493 struct tc_cls_u32_offload cls_u32 = {};
469 494
470 if (!tc_should_offload(dev, 0))
471 return;
472
473 tc_cls_common_offload_init(&cls_u32.common, tp); 495 tc_cls_common_offload_init(&cls_u32.common, tp);
474 cls_u32.command = TC_CLSU32_DELETE_KNODE; 496 cls_u32.command = TC_CLSU32_DELETE_HNODE;
475 cls_u32.knode.handle = handle; 497 cls_u32.hnode.divisor = h->divisor;
498 cls_u32.hnode.handle = h->handle;
499 cls_u32.hnode.prio = h->prio;
476 500
477 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); 501 tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
478} 502}
479 503
480static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, 504static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
481 u32 flags) 505 u32 flags)
482{ 506{
483 struct net_device *dev = tp->q->dev_queue->dev; 507 struct tcf_block *block = tp->chain->block;
484 struct tc_cls_u32_offload cls_u32 = {}; 508 struct tc_cls_u32_offload cls_u32 = {};
509 bool skip_sw = tc_skip_sw(flags);
510 bool offloaded = false;
485 int err; 511 int err;
486 512
487 if (!tc_should_offload(dev, flags))
488 return tc_skip_sw(flags) ? -EINVAL : 0;
489
490 tc_cls_common_offload_init(&cls_u32.common, tp); 513 tc_cls_common_offload_init(&cls_u32.common, tp);
491 cls_u32.command = TC_CLSU32_NEW_HNODE; 514 cls_u32.command = TC_CLSU32_NEW_HNODE;
492 cls_u32.hnode.divisor = h->divisor; 515 cls_u32.hnode.divisor = h->divisor;
493 cls_u32.hnode.handle = h->handle; 516 cls_u32.hnode.handle = h->handle;
494 cls_u32.hnode.prio = h->prio; 517 cls_u32.hnode.prio = h->prio;
495 518
496 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); 519 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
497 if (tc_skip_sw(flags)) 520 if (err < 0) {
521 u32_clear_hw_hnode(tp, h);
498 return err; 522 return err;
523 } else if (err > 0) {
524 offloaded = true;
525 }
526
527 if (skip_sw && !offloaded)
528 return -EINVAL;
499 529
500 return 0; 530 return 0;
501} 531}
502 532
503static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) 533static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
504{ 534{
505 struct net_device *dev = tp->q->dev_queue->dev; 535 struct tcf_block *block = tp->chain->block;
506 struct tc_cls_u32_offload cls_u32 = {}; 536 struct tc_cls_u32_offload cls_u32 = {};
507 537
508 if (!tc_should_offload(dev, 0))
509 return;
510
511 tc_cls_common_offload_init(&cls_u32.common, tp); 538 tc_cls_common_offload_init(&cls_u32.common, tp);
512 cls_u32.command = TC_CLSU32_DELETE_HNODE; 539 cls_u32.command = TC_CLSU32_DELETE_KNODE;
513 cls_u32.hnode.divisor = h->divisor; 540 cls_u32.knode.handle = handle;
514 cls_u32.hnode.handle = h->handle;
515 cls_u32.hnode.prio = h->prio;
516 541
517 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); 542 tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
518} 543}
519 544
520static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, 545static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
521 u32 flags) 546 u32 flags)
522{ 547{
523 struct net_device *dev = tp->q->dev_queue->dev; 548 struct tcf_block *block = tp->chain->block;
524 struct tc_cls_u32_offload cls_u32 = {}; 549 struct tc_cls_u32_offload cls_u32 = {};
550 bool skip_sw = tc_skip_sw(flags);
525 int err; 551 int err;
526 552
527 if (!tc_should_offload(dev, flags))
528 return tc_skip_sw(flags) ? -EINVAL : 0;
529
530 tc_cls_common_offload_init(&cls_u32.common, tp); 553 tc_cls_common_offload_init(&cls_u32.common, tp);
531 cls_u32.command = TC_CLSU32_REPLACE_KNODE; 554 cls_u32.command = TC_CLSU32_REPLACE_KNODE;
532 cls_u32.knode.handle = n->handle; 555 cls_u32.knode.handle = n->handle;
@@ -543,13 +566,16 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
543 if (n->ht_down) 566 if (n->ht_down)
544 cls_u32.knode.link_handle = n->ht_down->handle; 567 cls_u32.knode.link_handle = n->ht_down->handle;
545 568
546 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); 569 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
547 570 if (err < 0) {
548 if (!err) 571 u32_remove_hw_knode(tp, n->handle);
572 return err;
573 } else if (err > 0) {
549 n->flags |= TCA_CLS_FLAGS_IN_HW; 574 n->flags |= TCA_CLS_FLAGS_IN_HW;
575 }
550 576
551 if (tc_skip_sw(flags)) 577 if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW))
552 return err; 578 return -EINVAL;
553 579
554 return 0; 580 return 0;
555} 581}
@@ -565,7 +591,11 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
565 rtnl_dereference(n->next)); 591 rtnl_dereference(n->next));
566 tcf_unbind_filter(tp, &n->res); 592 tcf_unbind_filter(tp, &n->res);
567 u32_remove_hw_knode(tp, n->handle); 593 u32_remove_hw_knode(tp, n->handle);
568 call_rcu(&n->rcu, u32_delete_key_freepf_rcu); 594 idr_remove_ext(&ht->handle_idr, n->handle);
595 if (tcf_exts_get_net(&n->exts))
596 call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
597 else
598 u32_destroy_key(n->tp, n, true);
569 } 599 }
570 } 600 }
571} 601}
@@ -586,6 +616,8 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
586 hn = &phn->next, phn = rtnl_dereference(*hn)) { 616 hn = &phn->next, phn = rtnl_dereference(*hn)) {
587 if (phn == ht) { 617 if (phn == ht) {
588 u32_clear_hw_hnode(tp, ht); 618 u32_clear_hw_hnode(tp, ht);
619 idr_destroy(&ht->handle_idr);
620 idr_remove_ext(&tp_c->handle_idr, ht->handle);
589 RCU_INIT_POINTER(*hn, ht->next); 621 RCU_INIT_POINTER(*hn, ht->next);
590 kfree_rcu(ht, rcu); 622 kfree_rcu(ht, rcu);
591 return 0; 623 return 0;
@@ -633,6 +665,7 @@ static void u32_destroy(struct tcf_proto *tp)
633 kfree_rcu(ht, rcu); 665 kfree_rcu(ht, rcu);
634 } 666 }
635 667
668 idr_destroy(&tp_c->handle_idr);
636 kfree(tp_c); 669 kfree(tp_c);
637 } 670 }
638 671
@@ -701,27 +734,21 @@ ret:
701 return ret; 734 return ret;
702} 735}
703 736
704#define NR_U32_NODE (1<<12) 737static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid)
705static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
706{ 738{
707 struct tc_u_knode *n; 739 unsigned long idr_index;
708 unsigned long i; 740 u32 start = htid | 0x800;
709 unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long), 741 u32 max = htid | 0xFFF;
710 GFP_KERNEL); 742 u32 min = htid;
711 if (!bitmap) 743
712 return handle | 0xFFF; 744 if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
713 745 start, max + 1, GFP_KERNEL)) {
714 for (n = rtnl_dereference(ht->ht[TC_U32_HASH(handle)]); 746 if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
715 n; 747 min + 1, max + 1, GFP_KERNEL))
716 n = rtnl_dereference(n->next)) 748 return max;
717 set_bit(TC_U32_NODE(n->handle), bitmap); 749 }
718
719 i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
720 if (i >= NR_U32_NODE)
721 i = find_next_zero_bit(bitmap, NR_U32_NODE, 1);
722 750
723 kfree(bitmap); 751 return (u32)idr_index;
724 return handle | (i >= NR_U32_NODE ? 0xFFF : i);
725} 752}
726 753
727static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { 754static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
@@ -806,6 +833,7 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
806 if (pins->handle == n->handle) 833 if (pins->handle == n->handle)
807 break; 834 break;
808 835
836 idr_replace_ext(&ht->handle_idr, n, n->handle);
809 RCU_INIT_POINTER(n->next, pins->next); 837 RCU_INIT_POINTER(n->next, pins->next);
810 rcu_assign_pointer(*ins, n); 838 rcu_assign_pointer(*ins, n);
811} 839}
@@ -926,6 +954,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
926 954
927 u32_replace_knode(tp, tp_c, new); 955 u32_replace_knode(tp, tp_c, new);
928 tcf_unbind_filter(tp, &n->res); 956 tcf_unbind_filter(tp, &n->res);
957 tcf_exts_get_net(&n->exts);
929 call_rcu(&n->rcu, u32_delete_key_rcu); 958 call_rcu(&n->rcu, u32_delete_key_rcu);
930 return 0; 959 return 0;
931 } 960 }
@@ -937,22 +966,33 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
937 return -EINVAL; 966 return -EINVAL;
938 if (TC_U32_KEY(handle)) 967 if (TC_U32_KEY(handle))
939 return -EINVAL; 968 return -EINVAL;
940 if (handle == 0) {
941 handle = gen_new_htid(tp->data);
942 if (handle == 0)
943 return -ENOMEM;
944 }
945 ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL); 969 ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
946 if (ht == NULL) 970 if (ht == NULL)
947 return -ENOBUFS; 971 return -ENOBUFS;
972 if (handle == 0) {
973 handle = gen_new_htid(tp->data, ht);
974 if (handle == 0) {
975 kfree(ht);
976 return -ENOMEM;
977 }
978 } else {
979 err = idr_alloc_ext(&tp_c->handle_idr, ht, NULL,
980 handle, handle + 1, GFP_KERNEL);
981 if (err) {
982 kfree(ht);
983 return err;
984 }
985 }
948 ht->tp_c = tp_c; 986 ht->tp_c = tp_c;
949 ht->refcnt = 1; 987 ht->refcnt = 1;
950 ht->divisor = divisor; 988 ht->divisor = divisor;
951 ht->handle = handle; 989 ht->handle = handle;
952 ht->prio = tp->prio; 990 ht->prio = tp->prio;
991 idr_init(&ht->handle_idr);
953 992
954 err = u32_replace_hw_hnode(tp, ht, flags); 993 err = u32_replace_hw_hnode(tp, ht, flags);
955 if (err) { 994 if (err) {
995 idr_remove_ext(&tp_c->handle_idr, handle);
956 kfree(ht); 996 kfree(ht);
957 return err; 997 return err;
958 } 998 }
@@ -986,24 +1026,33 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
986 if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid)) 1026 if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
987 return -EINVAL; 1027 return -EINVAL;
988 handle = htid | TC_U32_NODE(handle); 1028 handle = htid | TC_U32_NODE(handle);
1029 err = idr_alloc_ext(&ht->handle_idr, NULL, NULL,
1030 handle, handle + 1,
1031 GFP_KERNEL);
1032 if (err)
1033 return err;
989 } else 1034 } else
990 handle = gen_new_kid(ht, htid); 1035 handle = gen_new_kid(ht, htid);
991 1036
992 if (tb[TCA_U32_SEL] == NULL) 1037 if (tb[TCA_U32_SEL] == NULL) {
993 return -EINVAL; 1038 err = -EINVAL;
1039 goto erridr;
1040 }
994 1041
995 s = nla_data(tb[TCA_U32_SEL]); 1042 s = nla_data(tb[TCA_U32_SEL]);
996 1043
997 n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); 1044 n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
998 if (n == NULL) 1045 if (n == NULL) {
999 return -ENOBUFS; 1046 err = -ENOBUFS;
1047 goto erridr;
1048 }
1000 1049
1001#ifdef CONFIG_CLS_U32_PERF 1050#ifdef CONFIG_CLS_U32_PERF
1002 size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64); 1051 size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
1003 n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt)); 1052 n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
1004 if (!n->pf) { 1053 if (!n->pf) {
1005 kfree(n); 1054 err = -ENOBUFS;
1006 return -ENOBUFS; 1055 goto errfree;
1007 } 1056 }
1008#endif 1057#endif
1009 1058
@@ -1066,9 +1115,12 @@ errhw:
1066errout: 1115errout:
1067 tcf_exts_destroy(&n->exts); 1116 tcf_exts_destroy(&n->exts);
1068#ifdef CONFIG_CLS_U32_PERF 1117#ifdef CONFIG_CLS_U32_PERF
1118errfree:
1069 free_percpu(n->pf); 1119 free_percpu(n->pf);
1070#endif 1120#endif
1071 kfree(n); 1121 kfree(n);
1122erridr:
1123 idr_remove_ext(&ht->handle_idr, handle);
1072 return err; 1124 return err;
1073} 1125}
1074 1126
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 03b677bc0700..1331a4c2d8ff 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -178,7 +178,7 @@ static int tcf_em_validate(struct tcf_proto *tp,
178 struct tcf_ematch_hdr *em_hdr = nla_data(nla); 178 struct tcf_ematch_hdr *em_hdr = nla_data(nla);
179 int data_len = nla_len(nla) - sizeof(*em_hdr); 179 int data_len = nla_len(nla) - sizeof(*em_hdr);
180 void *data = (void *) em_hdr + sizeof(*em_hdr); 180 void *data = (void *) em_hdr + sizeof(*em_hdr);
181 struct net *net = dev_net(qdisc_dev(tp->q)); 181 struct net *net = tp->chain->block->net;
182 182
183 if (!TCF_EM_REL_VALID(em_hdr->flags)) 183 if (!TCF_EM_REL_VALID(em_hdr->flags))
184 goto errout; 184 goto errout;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c6deb74e3d2f..b6c4f536876b 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -301,6 +301,8 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
301{ 301{
302 struct Qdisc *q; 302 struct Qdisc *q;
303 303
304 if (!handle)
305 return NULL;
304 q = qdisc_match_from_root(dev->qdisc, handle); 306 q = qdisc_match_from_root(dev->qdisc, handle);
305 if (q) 307 if (q)
306 goto out; 308 goto out;
@@ -1500,7 +1502,6 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1500 int s_idx, s_q_idx; 1502 int s_idx, s_q_idx;
1501 struct net_device *dev; 1503 struct net_device *dev;
1502 const struct nlmsghdr *nlh = cb->nlh; 1504 const struct nlmsghdr *nlh = cb->nlh;
1503 struct tcmsg *tcm = nlmsg_data(nlh);
1504 struct nlattr *tca[TCA_MAX + 1]; 1505 struct nlattr *tca[TCA_MAX + 1];
1505 int err; 1506 int err;
1506 1507
@@ -1510,7 +1511,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1510 idx = 0; 1511 idx = 0;
1511 ASSERT_RTNL(); 1512 ASSERT_RTNL();
1512 1513
1513 err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL); 1514 err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL);
1514 if (err < 0) 1515 if (err < 0)
1515 return err; 1516 return err;
1516 1517
@@ -1662,9 +1663,11 @@ static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1662 struct tcf_bind_args *a = (void *)arg; 1663 struct tcf_bind_args *a = (void *)arg;
1663 1664
1664 if (tp->ops->bind_class) { 1665 if (tp->ops->bind_class) {
1665 tcf_tree_lock(tp); 1666 struct Qdisc *q = tcf_block_q(tp->chain->block);
1667
1668 sch_tree_lock(q);
1666 tp->ops->bind_class(n, a->classid, a->cl); 1669 tp->ops->bind_class(n, a->classid, a->cl);
1667 tcf_tree_unlock(tp); 1670 sch_tree_unlock(q);
1668 } 1671 }
1669 return 0; 1672 return 0;
1670} 1673}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index c5fcdf1a58a0..2dbd249c0b2f 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -281,7 +281,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
281 goto err_out; 281 goto err_out;
282 } 282 }
283 283
284 error = tcf_block_get(&flow->block, &flow->filter_list); 284 error = tcf_block_get(&flow->block, &flow->filter_list, sch);
285 if (error) { 285 if (error) {
286 kfree(flow); 286 kfree(flow);
287 goto err_out; 287 goto err_out;
@@ -546,7 +546,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
546 p->link.q = &noop_qdisc; 546 p->link.q = &noop_qdisc;
547 pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); 547 pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
548 548
549 err = tcf_block_get(&p->link.block, &p->link.filter_list); 549 err = tcf_block_get(&p->link.block, &p->link.filter_list, sch);
550 if (err) 550 if (err)
551 return err; 551 return err;
552 552
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index dcef97fa8047..6361be7881f1 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -255,6 +255,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
255 case TC_ACT_STOLEN: 255 case TC_ACT_STOLEN:
256 case TC_ACT_TRAP: 256 case TC_ACT_TRAP:
257 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 257 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
258 /* fall through */
258 case TC_ACT_SHOT: 259 case TC_ACT_SHOT:
259 return NULL; 260 return NULL;
260 case TC_ACT_RECLASSIFY: 261 case TC_ACT_RECLASSIFY:
@@ -1566,7 +1567,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1566 if (cl == NULL) 1567 if (cl == NULL)
1567 goto failure; 1568 goto failure;
1568 1569
1569 err = tcf_block_get(&cl->block, &cl->filter_list); 1570 err = tcf_block_get(&cl->block, &cl->filter_list, sch);
1570 if (err) { 1571 if (err) {
1571 kfree(cl); 1572 kfree(cl);
1572 return err; 1573 return err;
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
new file mode 100644
index 000000000000..7a72980c1509
--- /dev/null
+++ b/net/sched/sch_cbs.c
@@ -0,0 +1,373 @@
1/*
2 * net/sched/sch_cbs.c Credit Based Shaper
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com>
10 *
11 */
12
13/* Credit Based Shaper (CBS)
14 * =========================
15 *
16 * This is a simple rate-limiting shaper aimed at TSN applications on
17 * systems with known traffic workloads.
18 *
19 * Its algorithm is defined by the IEEE 802.1Q-2014 Specification,
20 * Section 8.6.8.2, and explained in more detail in the Annex L of the
21 * same specification.
22 *
23 * There are four tunables to be considered:
24 *
25 * 'idleslope': Idleslope is the rate of credits that is
26 * accumulated (in kilobits per second) when there is at least
27 * one packet waiting for transmission. Packets are transmitted
28 * when the current value of credits is equal or greater than
29 * zero. When there is no packet to be transmitted the amount of
30 * credits is set to zero. This is the main tunable of the CBS
31 * algorithm.
32 *
33 * 'sendslope':
34 * Sendslope is the rate of credits that is depleted (it should be a
35 * negative number of kilobits per second) when a transmission is
36 * ocurring. It can be calculated as follows, (IEEE 802.1Q-2014 Section
37 * 8.6.8.2 item g):
38 *
39 * sendslope = idleslope - port_transmit_rate
40 *
41 * 'hicredit': Hicredit defines the maximum amount of credits (in
42 * bytes) that can be accumulated. Hicredit depends on the
43 * characteristics of interfering traffic,
44 * 'max_interference_size' is the maximum size of any burst of
45 * traffic that can delay the transmission of a frame that is
46 * available for transmission for this traffic class, (IEEE
47 * 802.1Q-2014 Annex L, Equation L-3):
48 *
49 * hicredit = max_interference_size * (idleslope / port_transmit_rate)
50 *
51 * 'locredit': Locredit is the minimum amount of credits that can
52 * be reached. It is a function of the traffic flowing through
53 * this qdisc (IEEE 802.1Q-2014 Annex L, Equation L-2):
54 *
55 * locredit = max_frame_size * (sendslope / port_transmit_rate)
56 */
57
58#include <linux/module.h>
59#include <linux/types.h>
60#include <linux/kernel.h>
61#include <linux/string.h>
62#include <linux/errno.h>
63#include <linux/skbuff.h>
64#include <net/netlink.h>
65#include <net/sch_generic.h>
66#include <net/pkt_sched.h>
67
68#define BYTES_PER_KBIT (1000LL / 8)
69
70struct cbs_sched_data {
71 bool offload;
72 int queue;
73 s64 port_rate; /* in bytes/s */
74 s64 last; /* timestamp in ns */
75 s64 credits; /* in bytes */
76 s32 locredit; /* in bytes */
77 s32 hicredit; /* in bytes */
78 s64 sendslope; /* in bytes/s */
79 s64 idleslope; /* in bytes/s */
80 struct qdisc_watchdog watchdog;
81 int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch);
82 struct sk_buff *(*dequeue)(struct Qdisc *sch);
83};
84
85static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch)
86{
87 return qdisc_enqueue_tail(skb, sch);
88}
89
90static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch)
91{
92 struct cbs_sched_data *q = qdisc_priv(sch);
93
94 if (sch->q.qlen == 0 && q->credits > 0) {
95 /* We need to stop accumulating credits when there's
96 * no enqueued packets and q->credits is positive.
97 */
98 q->credits = 0;
99 q->last = ktime_get_ns();
100 }
101
102 return qdisc_enqueue_tail(skb, sch);
103}
104
105static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
106 struct sk_buff **to_free)
107{
108 struct cbs_sched_data *q = qdisc_priv(sch);
109
110 return q->enqueue(skb, sch);
111}
112
113/* timediff is in ns, slope is in bytes/s */
114static s64 timediff_to_credits(s64 timediff, s64 slope)
115{
116 return div64_s64(timediff * slope, NSEC_PER_SEC);
117}
118
119static s64 delay_from_credits(s64 credits, s64 slope)
120{
121 if (unlikely(slope == 0))
122 return S64_MAX;
123
124 return div64_s64(-credits * NSEC_PER_SEC, slope);
125}
126
127static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate)
128{
129 if (unlikely(port_rate == 0))
130 return S64_MAX;
131
132 return div64_s64(len * slope, port_rate);
133}
134
135static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
136{
137 struct cbs_sched_data *q = qdisc_priv(sch);
138 s64 now = ktime_get_ns();
139 struct sk_buff *skb;
140 s64 credits;
141 int len;
142
143 if (q->credits < 0) {
144 credits = timediff_to_credits(now - q->last, q->idleslope);
145
146 credits = q->credits + credits;
147 q->credits = min_t(s64, credits, q->hicredit);
148
149 if (q->credits < 0) {
150 s64 delay;
151
152 delay = delay_from_credits(q->credits, q->idleslope);
153 qdisc_watchdog_schedule_ns(&q->watchdog, now + delay);
154
155 q->last = now;
156
157 return NULL;
158 }
159 }
160
161 skb = qdisc_dequeue_head(sch);
162 if (!skb)
163 return NULL;
164
165 len = qdisc_pkt_len(skb);
166
167 /* As sendslope is a negative number, this will decrease the
168 * amount of q->credits.
169 */
170 credits = credits_from_len(len, q->sendslope, q->port_rate);
171 credits += q->credits;
172
173 q->credits = max_t(s64, credits, q->locredit);
174 q->last = now;
175
176 return skb;
177}
178
179static struct sk_buff *cbs_dequeue_offload(struct Qdisc *sch)
180{
181 return qdisc_dequeue_head(sch);
182}
183
184static struct sk_buff *cbs_dequeue(struct Qdisc *sch)
185{
186 struct cbs_sched_data *q = qdisc_priv(sch);
187
188 return q->dequeue(sch);
189}
190
191static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = {
192 [TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) },
193};
194
195static void cbs_disable_offload(struct net_device *dev,
196 struct cbs_sched_data *q)
197{
198 struct tc_cbs_qopt_offload cbs = { };
199 const struct net_device_ops *ops;
200 int err;
201
202 if (!q->offload)
203 return;
204
205 q->enqueue = cbs_enqueue_soft;
206 q->dequeue = cbs_dequeue_soft;
207
208 ops = dev->netdev_ops;
209 if (!ops->ndo_setup_tc)
210 return;
211
212 cbs.queue = q->queue;
213 cbs.enable = 0;
214
215 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
216 if (err < 0)
217 pr_warn("Couldn't disable CBS offload for queue %d\n",
218 cbs.queue);
219}
220
221static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q,
222 const struct tc_cbs_qopt *opt)
223{
224 const struct net_device_ops *ops = dev->netdev_ops;
225 struct tc_cbs_qopt_offload cbs = { };
226 int err;
227
228 if (!ops->ndo_setup_tc)
229 return -EOPNOTSUPP;
230
231 cbs.queue = q->queue;
232
233 cbs.enable = 1;
234 cbs.hicredit = opt->hicredit;
235 cbs.locredit = opt->locredit;
236 cbs.idleslope = opt->idleslope;
237 cbs.sendslope = opt->sendslope;
238
239 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
240 if (err < 0)
241 return err;
242
243 q->enqueue = cbs_enqueue_offload;
244 q->dequeue = cbs_dequeue_offload;
245
246 return 0;
247}
248
249static int cbs_change(struct Qdisc *sch, struct nlattr *opt)
250{
251 struct cbs_sched_data *q = qdisc_priv(sch);
252 struct net_device *dev = qdisc_dev(sch);
253 struct nlattr *tb[TCA_CBS_MAX + 1];
254 struct tc_cbs_qopt *qopt;
255 int err;
256
257 err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, NULL);
258 if (err < 0)
259 return err;
260
261 if (!tb[TCA_CBS_PARMS])
262 return -EINVAL;
263
264 qopt = nla_data(tb[TCA_CBS_PARMS]);
265
266 if (!qopt->offload) {
267 struct ethtool_link_ksettings ecmd;
268 s64 link_speed;
269
270 if (!__ethtool_get_link_ksettings(dev, &ecmd))
271 link_speed = ecmd.base.speed;
272 else
273 link_speed = SPEED_1000;
274
275 q->port_rate = link_speed * 1000 * BYTES_PER_KBIT;
276
277 cbs_disable_offload(dev, q);
278 } else {
279 err = cbs_enable_offload(dev, q, qopt);
280 if (err < 0)
281 return err;
282 }
283
284 /* Everything went OK, save the parameters used. */
285 q->hicredit = qopt->hicredit;
286 q->locredit = qopt->locredit;
287 q->idleslope = qopt->idleslope * BYTES_PER_KBIT;
288 q->sendslope = qopt->sendslope * BYTES_PER_KBIT;
289 q->offload = qopt->offload;
290
291 return 0;
292}
293
294static int cbs_init(struct Qdisc *sch, struct nlattr *opt)
295{
296 struct cbs_sched_data *q = qdisc_priv(sch);
297 struct net_device *dev = qdisc_dev(sch);
298
299 if (!opt)
300 return -EINVAL;
301
302 q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
303
304 q->enqueue = cbs_enqueue_soft;
305 q->dequeue = cbs_dequeue_soft;
306
307 qdisc_watchdog_init(&q->watchdog, sch);
308
309 return cbs_change(sch, opt);
310}
311
312static void cbs_destroy(struct Qdisc *sch)
313{
314 struct cbs_sched_data *q = qdisc_priv(sch);
315 struct net_device *dev = qdisc_dev(sch);
316
317 qdisc_watchdog_cancel(&q->watchdog);
318
319 cbs_disable_offload(dev, q);
320}
321
322static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
323{
324 struct cbs_sched_data *q = qdisc_priv(sch);
325 struct tc_cbs_qopt opt = { };
326 struct nlattr *nest;
327
328 nest = nla_nest_start(skb, TCA_OPTIONS);
329 if (!nest)
330 goto nla_put_failure;
331
332 opt.hicredit = q->hicredit;
333 opt.locredit = q->locredit;
334 opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT);
335 opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT);
336 opt.offload = q->offload;
337
338 if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt))
339 goto nla_put_failure;
340
341 return nla_nest_end(skb, nest);
342
343nla_put_failure:
344 nla_nest_cancel(skb, nest);
345 return -1;
346}
347
348static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
349 .id = "cbs",
350 .priv_size = sizeof(struct cbs_sched_data),
351 .enqueue = cbs_enqueue,
352 .dequeue = cbs_dequeue,
353 .peek = qdisc_peek_dequeued,
354 .init = cbs_init,
355 .reset = qdisc_reset_queue,
356 .destroy = cbs_destroy,
357 .change = cbs_change,
358 .dump = cbs_dump,
359 .owner = THIS_MODULE,
360};
361
362static int __init cbs_module_init(void)
363{
364 return register_qdisc(&cbs_qdisc_ops);
365}
366
367static void __exit cbs_module_exit(void)
368{
369 unregister_qdisc(&cbs_qdisc_ops);
370}
371module_init(cbs_module_init)
372module_exit(cbs_module_exit)
373MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 2d0e8d4bdc29..5bbcef3dcd8c 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -321,6 +321,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
321 case TC_ACT_STOLEN: 321 case TC_ACT_STOLEN:
322 case TC_ACT_TRAP: 322 case TC_ACT_TRAP:
323 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 323 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
324 /* fall through */
324 case TC_ACT_SHOT: 325 case TC_ACT_SHOT:
325 return NULL; 326 return NULL;
326 } 327 }
@@ -412,7 +413,7 @@ static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
412 struct drr_sched *q = qdisc_priv(sch); 413 struct drr_sched *q = qdisc_priv(sch);
413 int err; 414 int err;
414 415
415 err = tcf_block_get(&q->block, &q->filter_list); 416 err = tcf_block_get(&q->block, &q->filter_list, sch);
416 if (err) 417 if (err)
417 return err; 418 return err;
418 err = qdisc_class_hash_init(&q->clhash); 419 err = qdisc_class_hash_init(&q->clhash);
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 2836c80c7aa5..fb4fb71c68cf 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -344,7 +344,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
344 if (!opt) 344 if (!opt)
345 goto errout; 345 goto errout;
346 346
347 err = tcf_block_get(&p->block, &p->filter_list); 347 err = tcf_block_get(&p->block, &p->filter_list, sch);
348 if (err) 348 if (err)
349 return err; 349 return err;
350 350
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index de3b57ceca7b..0305d791ea94 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -105,6 +105,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
105 case TC_ACT_QUEUED: 105 case TC_ACT_QUEUED:
106 case TC_ACT_TRAP: 106 case TC_ACT_TRAP:
107 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 107 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
108 /* fall through */
108 case TC_ACT_SHOT: 109 case TC_ACT_SHOT:
109 return 0; 110 return 0;
110 } 111 }
@@ -481,7 +482,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
481 return err; 482 return err;
482 } 483 }
483 484
484 err = tcf_block_get(&q->block, &q->filter_list); 485 err = tcf_block_get(&q->block, &q->filter_list, sch);
485 if (err) 486 if (err)
486 return err; 487 return err;
487 488
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 92237e75dbbc..3839cbbdc32b 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -288,9 +288,9 @@ unsigned long dev_trans_start(struct net_device *dev)
288} 288}
289EXPORT_SYMBOL(dev_trans_start); 289EXPORT_SYMBOL(dev_trans_start);
290 290
291static void dev_watchdog(unsigned long arg) 291static void dev_watchdog(struct timer_list *t)
292{ 292{
293 struct net_device *dev = (struct net_device *)arg; 293 struct net_device *dev = from_timer(dev, t, watchdog_timer);
294 294
295 netif_tx_lock(dev); 295 netif_tx_lock(dev);
296 if (!qdisc_tx_is_noop(dev)) { 296 if (!qdisc_tx_is_noop(dev)) {
@@ -603,8 +603,14 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
603 struct Qdisc *sch; 603 struct Qdisc *sch;
604 unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size; 604 unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
605 int err = -ENOBUFS; 605 int err = -ENOBUFS;
606 struct net_device *dev = dev_queue->dev; 606 struct net_device *dev;
607
608 if (!dev_queue) {
609 err = -EINVAL;
610 goto errout;
611 }
607 612
613 dev = dev_queue->dev;
608 p = kzalloc_node(size, GFP_KERNEL, 614 p = kzalloc_node(size, GFP_KERNEL,
609 netdev_queue_numa_node_read(dev_queue)); 615 netdev_queue_numa_node_read(dev_queue));
610 616
@@ -685,13 +691,12 @@ void qdisc_reset(struct Qdisc *qdisc)
685 qdisc->gso_skb = NULL; 691 qdisc->gso_skb = NULL;
686 } 692 }
687 qdisc->q.qlen = 0; 693 qdisc->q.qlen = 0;
694 qdisc->qstats.backlog = 0;
688} 695}
689EXPORT_SYMBOL(qdisc_reset); 696EXPORT_SYMBOL(qdisc_reset);
690 697
691static void qdisc_rcu_free(struct rcu_head *head) 698static void qdisc_free(struct Qdisc *qdisc)
692{ 699{
693 struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
694
695 if (qdisc_is_percpu_stats(qdisc)) { 700 if (qdisc_is_percpu_stats(qdisc)) {
696 free_percpu(qdisc->cpu_bstats); 701 free_percpu(qdisc->cpu_bstats);
697 free_percpu(qdisc->cpu_qstats); 702 free_percpu(qdisc->cpu_qstats);
@@ -724,11 +729,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
724 729
725 kfree_skb_list(qdisc->gso_skb); 730 kfree_skb_list(qdisc->gso_skb);
726 kfree_skb(qdisc->skb_bad_txq); 731 kfree_skb(qdisc->skb_bad_txq);
727 /* 732 qdisc_free(qdisc);
728 * gen_estimator est_timer() might access qdisc->q.lock,
729 * wait a RCU grace period before freeing qdisc.
730 */
731 call_rcu(&qdisc->rcu_head, qdisc_rcu_free);
732} 733}
733EXPORT_SYMBOL(qdisc_destroy); 734EXPORT_SYMBOL(qdisc_destroy);
734 735
@@ -959,7 +960,7 @@ void dev_init_scheduler(struct net_device *dev)
959 if (dev_ingress_queue(dev)) 960 if (dev_ingress_queue(dev))
960 dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc); 961 dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
961 962
962 setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); 963 timer_setup(&dev->watchdog_timer, dev_watchdog, 0);
963} 964}
964 965
965static void shutdown_scheduler_queue(struct net_device *dev, 966static void shutdown_scheduler_queue(struct net_device *dev,
@@ -1023,3 +1024,49 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
1023 } 1024 }
1024} 1025}
1025EXPORT_SYMBOL(psched_ratecfg_precompute); 1026EXPORT_SYMBOL(psched_ratecfg_precompute);
1027
1028static void mini_qdisc_rcu_func(struct rcu_head *head)
1029{
1030}
1031
1032void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
1033 struct tcf_proto *tp_head)
1034{
1035 struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq);
1036 struct mini_Qdisc *miniq;
1037
1038 if (!tp_head) {
1039 RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
1040 return;
1041 }
1042
1043 miniq = !miniq_old || miniq_old == &miniqp->miniq2 ?
1044 &miniqp->miniq1 : &miniqp->miniq2;
1045
1046 /* We need to make sure that readers won't see the miniq
1047 * we are about to modify. So wait until previous call_rcu_bh callback
1048 * is done.
1049 */
1050 rcu_barrier_bh();
1051 miniq->filter_list = tp_head;
1052 rcu_assign_pointer(*miniqp->p_miniq, miniq);
1053
1054 if (miniq_old)
1055 /* This is counterpart of the rcu barrier above. We need to
1056 * block potential new user of miniq_old until all readers
1057 * are not seeing it.
1058 */
1059 call_rcu_bh(&miniq_old->rcu, mini_qdisc_rcu_func);
1060}
1061EXPORT_SYMBOL(mini_qdisc_pair_swap);
1062
1063void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
1064 struct mini_Qdisc __rcu **p_miniq)
1065{
1066 miniqp->miniq1.cpu_bstats = qdisc->cpu_bstats;
1067 miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats;
1068 miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats;
1069 miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats;
1070 miniqp->p_miniq = p_miniq;
1071}
1072EXPORT_SYMBOL(mini_qdisc_pair_init);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index daaf214e5201..d04068a97d81 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -958,6 +958,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
958 } 958 }
959 959
960 if (cl != NULL) { 960 if (cl != NULL) {
961 int old_flags;
962
961 if (parentid) { 963 if (parentid) {
962 if (cl->cl_parent && 964 if (cl->cl_parent &&
963 cl->cl_parent->cl_common.classid != parentid) 965 cl->cl_parent->cl_common.classid != parentid)
@@ -978,6 +980,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
978 } 980 }
979 981
980 sch_tree_lock(sch); 982 sch_tree_lock(sch);
983 old_flags = cl->cl_flags;
984
981 if (rsc != NULL) 985 if (rsc != NULL)
982 hfsc_change_rsc(cl, rsc, cur_time); 986 hfsc_change_rsc(cl, rsc, cur_time);
983 if (fsc != NULL) 987 if (fsc != NULL)
@@ -986,10 +990,21 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
986 hfsc_change_usc(cl, usc, cur_time); 990 hfsc_change_usc(cl, usc, cur_time);
987 991
988 if (cl->qdisc->q.qlen != 0) { 992 if (cl->qdisc->q.qlen != 0) {
989 if (cl->cl_flags & HFSC_RSC) 993 int len = qdisc_peek_len(cl->qdisc);
990 update_ed(cl, qdisc_peek_len(cl->qdisc)); 994
991 if (cl->cl_flags & HFSC_FSC) 995 if (cl->cl_flags & HFSC_RSC) {
992 update_vf(cl, 0, cur_time); 996 if (old_flags & HFSC_RSC)
997 update_ed(cl, len);
998 else
999 init_ed(cl, len);
1000 }
1001
1002 if (cl->cl_flags & HFSC_FSC) {
1003 if (old_flags & HFSC_FSC)
1004 update_vf(cl, 0, cur_time);
1005 else
1006 init_vf(cl, len);
1007 }
993 } 1008 }
994 sch_tree_unlock(sch); 1009 sch_tree_unlock(sch);
995 1010
@@ -1018,7 +1033,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1018 if (cl == NULL) 1033 if (cl == NULL)
1019 return -ENOBUFS; 1034 return -ENOBUFS;
1020 1035
1021 err = tcf_block_get(&cl->block, &cl->filter_list); 1036 err = tcf_block_get(&cl->block, &cl->filter_list, sch);
1022 if (err) { 1037 if (err) {
1023 kfree(cl); 1038 kfree(cl);
1024 return err; 1039 return err;
@@ -1129,6 +1144,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
1129 case TC_ACT_STOLEN: 1144 case TC_ACT_STOLEN:
1130 case TC_ACT_TRAP: 1145 case TC_ACT_TRAP:
1131 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 1146 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
1147 /* fall through */
1132 case TC_ACT_SHOT: 1148 case TC_ACT_SHOT:
1133 return NULL; 1149 return NULL;
1134 } 1150 }
@@ -1390,7 +1406,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
1390 return err; 1406 return err;
1391 q->eligible = RB_ROOT; 1407 q->eligible = RB_ROOT;
1392 1408
1393 err = tcf_block_get(&q->root.block, &q->root.filter_list); 1409 err = tcf_block_get(&q->root.block, &q->root.filter_list, sch);
1394 if (err) 1410 if (err)
1395 return err; 1411 return err;
1396 1412
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 7e148376ba52..fa0380730ff0 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -142,6 +142,7 @@ struct htb_class {
142 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ 142 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
143 143
144 unsigned int drops ____cacheline_aligned_in_smp; 144 unsigned int drops ____cacheline_aligned_in_smp;
145 unsigned int overlimits;
145}; 146};
146 147
147struct htb_level { 148struct htb_level {
@@ -243,6 +244,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
243 case TC_ACT_STOLEN: 244 case TC_ACT_STOLEN:
244 case TC_ACT_TRAP: 245 case TC_ACT_TRAP:
245 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 246 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
247 /* fall through */
246 case TC_ACT_SHOT: 248 case TC_ACT_SHOT:
247 return NULL; 249 return NULL;
248 } 250 }
@@ -533,6 +535,9 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
533 if (new_mode == cl->cmode) 535 if (new_mode == cl->cmode)
534 return; 536 return;
535 537
538 if (new_mode == HTB_CANT_SEND)
539 cl->overlimits++;
540
536 if (cl->prio_activity) { /* not necessary: speed optimization */ 541 if (cl->prio_activity) { /* not necessary: speed optimization */
537 if (cl->cmode != HTB_CANT_SEND) 542 if (cl->cmode != HTB_CANT_SEND)
538 htb_deactivate_prios(q, cl); 543 htb_deactivate_prios(q, cl);
@@ -1026,7 +1031,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1026 if (!opt) 1031 if (!opt)
1027 return -EINVAL; 1032 return -EINVAL;
1028 1033
1029 err = tcf_block_get(&q->block, &q->filter_list); 1034 err = tcf_block_get(&q->block, &q->filter_list, sch);
1030 if (err) 1035 if (err)
1031 return err; 1036 return err;
1032 1037
@@ -1143,6 +1148,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1143 struct htb_class *cl = (struct htb_class *)arg; 1148 struct htb_class *cl = (struct htb_class *)arg;
1144 struct gnet_stats_queue qs = { 1149 struct gnet_stats_queue qs = {
1145 .drops = cl->drops, 1150 .drops = cl->drops,
1151 .overlimits = cl->overlimits,
1146 }; 1152 };
1147 __u32 qlen = 0; 1153 __u32 qlen = 0;
1148 1154
@@ -1388,7 +1394,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1388 if (!cl) 1394 if (!cl)
1389 goto failure; 1395 goto failure;
1390 1396
1391 err = tcf_block_get(&cl->block, &cl->filter_list); 1397 err = tcf_block_get(&cl->block, &cl->filter_list, sch);
1392 if (err) { 1398 if (err) {
1393 kfree(cl); 1399 kfree(cl);
1394 goto failure; 1400 goto failure;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 44de4ee51ce9..5ecc38f35d47 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -20,6 +20,8 @@
20 20
21struct ingress_sched_data { 21struct ingress_sched_data {
22 struct tcf_block *block; 22 struct tcf_block *block;
23 struct tcf_block_ext_info block_info;
24 struct mini_Qdisc_pair miniqp;
23}; 25};
24 26
25static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) 27static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
@@ -53,13 +55,26 @@ static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl)
53 return q->block; 55 return q->block;
54} 56}
55 57
58static void clsact_chain_head_change(struct tcf_proto *tp_head, void *priv)
59{
60 struct mini_Qdisc_pair *miniqp = priv;
61
62 mini_qdisc_pair_swap(miniqp, tp_head);
63}
64
56static int ingress_init(struct Qdisc *sch, struct nlattr *opt) 65static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
57{ 66{
58 struct ingress_sched_data *q = qdisc_priv(sch); 67 struct ingress_sched_data *q = qdisc_priv(sch);
59 struct net_device *dev = qdisc_dev(sch); 68 struct net_device *dev = qdisc_dev(sch);
60 int err; 69 int err;
61 70
62 err = tcf_block_get(&q->block, &dev->ingress_cl_list); 71 mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
72
73 q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
74 q->block_info.chain_head_change = clsact_chain_head_change;
75 q->block_info.chain_head_change_priv = &q->miniqp;
76
77 err = tcf_block_get_ext(&q->block, sch, &q->block_info);
63 if (err) 78 if (err)
64 return err; 79 return err;
65 80
@@ -73,7 +88,7 @@ static void ingress_destroy(struct Qdisc *sch)
73{ 88{
74 struct ingress_sched_data *q = qdisc_priv(sch); 89 struct ingress_sched_data *q = qdisc_priv(sch);
75 90
76 tcf_block_put(q->block); 91 tcf_block_put_ext(q->block, sch, &q->block_info);
77 net_dec_ingress_queue(); 92 net_dec_ingress_queue();
78} 93}
79 94
@@ -114,6 +129,10 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
114struct clsact_sched_data { 129struct clsact_sched_data {
115 struct tcf_block *ingress_block; 130 struct tcf_block *ingress_block;
116 struct tcf_block *egress_block; 131 struct tcf_block *egress_block;
132 struct tcf_block_ext_info ingress_block_info;
133 struct tcf_block_ext_info egress_block_info;
134 struct mini_Qdisc_pair miniqp_ingress;
135 struct mini_Qdisc_pair miniqp_egress;
117}; 136};
118 137
119static unsigned long clsact_find(struct Qdisc *sch, u32 classid) 138static unsigned long clsact_find(struct Qdisc *sch, u32 classid)
@@ -153,13 +172,25 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
153 struct net_device *dev = qdisc_dev(sch); 172 struct net_device *dev = qdisc_dev(sch);
154 int err; 173 int err;
155 174
156 err = tcf_block_get(&q->ingress_block, &dev->ingress_cl_list); 175 mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
176
177 q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
178 q->ingress_block_info.chain_head_change = clsact_chain_head_change;
179 q->ingress_block_info.chain_head_change_priv = &q->miniqp_ingress;
180
181 err = tcf_block_get_ext(&q->ingress_block, sch, &q->ingress_block_info);
157 if (err) 182 if (err)
158 return err; 183 return err;
159 184
160 err = tcf_block_get(&q->egress_block, &dev->egress_cl_list); 185 mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress);
186
187 q->egress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS;
188 q->egress_block_info.chain_head_change = clsact_chain_head_change;
189 q->egress_block_info.chain_head_change_priv = &q->miniqp_egress;
190
191 err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
161 if (err) 192 if (err)
162 return err; 193 goto err_egress_block_get;
163 194
164 net_inc_ingress_queue(); 195 net_inc_ingress_queue();
165 net_inc_egress_queue(); 196 net_inc_egress_queue();
@@ -167,14 +198,18 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
167 sch->flags |= TCQ_F_CPUSTATS; 198 sch->flags |= TCQ_F_CPUSTATS;
168 199
169 return 0; 200 return 0;
201
202err_egress_block_get:
203 tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
204 return err;
170} 205}
171 206
172static void clsact_destroy(struct Qdisc *sch) 207static void clsact_destroy(struct Qdisc *sch)
173{ 208{
174 struct clsact_sched_data *q = qdisc_priv(sch); 209 struct clsact_sched_data *q = qdisc_priv(sch);
175 210
176 tcf_block_put(q->egress_block); 211 tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
177 tcf_block_put(q->ingress_block); 212 tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
178 213
179 net_dec_ingress_queue(); 214 net_dec_ingress_queue();
180 net_dec_egress_queue(); 215 net_dec_egress_queue();
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index f3a3e507422b..213b586a06a0 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -130,15 +130,7 @@ static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl)
130static struct netdev_queue *mq_select_queue(struct Qdisc *sch, 130static struct netdev_queue *mq_select_queue(struct Qdisc *sch,
131 struct tcmsg *tcm) 131 struct tcmsg *tcm)
132{ 132{
133 unsigned int ntx = TC_H_MIN(tcm->tcm_parent); 133 return mq_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
134 struct netdev_queue *dev_queue = mq_queue_get(sch, ntx);
135
136 if (!dev_queue) {
137 struct net_device *dev = qdisc_dev(sch);
138
139 return netdev_get_tx_queue(dev, 0);
140 }
141 return dev_queue;
142} 134}
143 135
144static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, 136static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 6bcdfe6e7b63..b85885a9d8a1 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -18,10 +18,16 @@
18#include <net/netlink.h> 18#include <net/netlink.h>
19#include <net/pkt_sched.h> 19#include <net/pkt_sched.h>
20#include <net/sch_generic.h> 20#include <net/sch_generic.h>
21#include <net/pkt_cls.h>
21 22
22struct mqprio_sched { 23struct mqprio_sched {
23 struct Qdisc **qdiscs; 24 struct Qdisc **qdiscs;
25 u16 mode;
26 u16 shaper;
24 int hw_offload; 27 int hw_offload;
28 u32 flags;
29 u64 min_rate[TC_QOPT_MAX_QUEUE];
30 u64 max_rate[TC_QOPT_MAX_QUEUE];
25}; 31};
26 32
27static void mqprio_destroy(struct Qdisc *sch) 33static void mqprio_destroy(struct Qdisc *sch)
@@ -39,9 +45,18 @@ static void mqprio_destroy(struct Qdisc *sch)
39 } 45 }
40 46
41 if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) { 47 if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) {
42 struct tc_mqprio_qopt mqprio = {}; 48 struct tc_mqprio_qopt_offload mqprio = { { 0 } };
43 49
44 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO, &mqprio); 50 switch (priv->mode) {
51 case TC_MQPRIO_MODE_DCB:
52 case TC_MQPRIO_MODE_CHANNEL:
53 dev->netdev_ops->ndo_setup_tc(dev,
54 TC_SETUP_QDISC_MQPRIO,
55 &mqprio);
56 break;
57 default:
58 return;
59 }
45 } else { 60 } else {
46 netdev_set_num_tc(dev, 0); 61 netdev_set_num_tc(dev, 0);
47 } 62 }
@@ -97,6 +112,26 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
97 return 0; 112 return 0;
98} 113}
99 114
115static const struct nla_policy mqprio_policy[TCA_MQPRIO_MAX + 1] = {
116 [TCA_MQPRIO_MODE] = { .len = sizeof(u16) },
117 [TCA_MQPRIO_SHAPER] = { .len = sizeof(u16) },
118 [TCA_MQPRIO_MIN_RATE64] = { .type = NLA_NESTED },
119 [TCA_MQPRIO_MAX_RATE64] = { .type = NLA_NESTED },
120};
121
122static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
123 const struct nla_policy *policy, int len)
124{
125 int nested_len = nla_len(nla) - NLA_ALIGN(len);
126
127 if (nested_len >= nla_attr_size(0))
128 return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
129 nested_len, policy, NULL);
130
131 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
132 return 0;
133}
134
100static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) 135static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
101{ 136{
102 struct net_device *dev = qdisc_dev(sch); 137 struct net_device *dev = qdisc_dev(sch);
@@ -105,6 +140,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
105 struct Qdisc *qdisc; 140 struct Qdisc *qdisc;
106 int i, err = -EOPNOTSUPP; 141 int i, err = -EOPNOTSUPP;
107 struct tc_mqprio_qopt *qopt = NULL; 142 struct tc_mqprio_qopt *qopt = NULL;
143 struct nlattr *tb[TCA_MQPRIO_MAX + 1];
144 struct nlattr *attr;
145 int rem;
146 int len;
108 147
109 BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE); 148 BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
110 BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK); 149 BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK);
@@ -115,6 +154,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
115 if (!netif_is_multiqueue(dev)) 154 if (!netif_is_multiqueue(dev))
116 return -EOPNOTSUPP; 155 return -EOPNOTSUPP;
117 156
157 /* make certain can allocate enough classids to handle queues */
158 if (dev->num_tx_queues >= TC_H_MIN_PRIORITY)
159 return -ENOMEM;
160
118 if (!opt || nla_len(opt) < sizeof(*qopt)) 161 if (!opt || nla_len(opt) < sizeof(*qopt))
119 return -EINVAL; 162 return -EINVAL;
120 163
@@ -122,6 +165,59 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
122 if (mqprio_parse_opt(dev, qopt)) 165 if (mqprio_parse_opt(dev, qopt))
123 return -EINVAL; 166 return -EINVAL;
124 167
168 len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt));
169 if (len > 0) {
170 err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy,
171 sizeof(*qopt));
172 if (err < 0)
173 return err;
174
175 if (!qopt->hw)
176 return -EINVAL;
177
178 if (tb[TCA_MQPRIO_MODE]) {
179 priv->flags |= TC_MQPRIO_F_MODE;
180 priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]);
181 }
182
183 if (tb[TCA_MQPRIO_SHAPER]) {
184 priv->flags |= TC_MQPRIO_F_SHAPER;
185 priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]);
186 }
187
188 if (tb[TCA_MQPRIO_MIN_RATE64]) {
189 if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
190 return -EINVAL;
191 i = 0;
192 nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64],
193 rem) {
194 if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64)
195 return -EINVAL;
196 if (i >= qopt->num_tc)
197 break;
198 priv->min_rate[i] = *(u64 *)nla_data(attr);
199 i++;
200 }
201 priv->flags |= TC_MQPRIO_F_MIN_RATE;
202 }
203
204 if (tb[TCA_MQPRIO_MAX_RATE64]) {
205 if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
206 return -EINVAL;
207 i = 0;
208 nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64],
209 rem) {
210 if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64)
211 return -EINVAL;
212 if (i >= qopt->num_tc)
213 break;
214 priv->max_rate[i] = *(u64 *)nla_data(attr);
215 i++;
216 }
217 priv->flags |= TC_MQPRIO_F_MAX_RATE;
218 }
219 }
220
125 /* pre-allocate qdisc, attachment can't fail */ 221 /* pre-allocate qdisc, attachment can't fail */
126 priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), 222 priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
127 GFP_KERNEL); 223 GFP_KERNEL);
@@ -146,14 +242,36 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
146 * supplied and verified mapping 242 * supplied and verified mapping
147 */ 243 */
148 if (qopt->hw) { 244 if (qopt->hw) {
149 struct tc_mqprio_qopt mqprio = *qopt; 245 struct tc_mqprio_qopt_offload mqprio = {.qopt = *qopt};
150 246
151 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO, 247 switch (priv->mode) {
248 case TC_MQPRIO_MODE_DCB:
249 if (priv->shaper != TC_MQPRIO_SHAPER_DCB)
250 return -EINVAL;
251 break;
252 case TC_MQPRIO_MODE_CHANNEL:
253 mqprio.flags = priv->flags;
254 if (priv->flags & TC_MQPRIO_F_MODE)
255 mqprio.mode = priv->mode;
256 if (priv->flags & TC_MQPRIO_F_SHAPER)
257 mqprio.shaper = priv->shaper;
258 if (priv->flags & TC_MQPRIO_F_MIN_RATE)
259 for (i = 0; i < mqprio.qopt.num_tc; i++)
260 mqprio.min_rate[i] = priv->min_rate[i];
261 if (priv->flags & TC_MQPRIO_F_MAX_RATE)
262 for (i = 0; i < mqprio.qopt.num_tc; i++)
263 mqprio.max_rate[i] = priv->max_rate[i];
264 break;
265 default:
266 return -EINVAL;
267 }
268 err = dev->netdev_ops->ndo_setup_tc(dev,
269 TC_SETUP_QDISC_MQPRIO,
152 &mqprio); 270 &mqprio);
153 if (err) 271 if (err)
154 return err; 272 return err;
155 273
156 priv->hw_offload = mqprio.hw; 274 priv->hw_offload = mqprio.qopt.hw;
157 } else { 275 } else {
158 netdev_set_num_tc(dev, qopt->num_tc); 276 netdev_set_num_tc(dev, qopt->num_tc);
159 for (i = 0; i < qopt->num_tc; i++) 277 for (i = 0; i < qopt->num_tc; i++)
@@ -193,7 +311,7 @@ static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
193 unsigned long cl) 311 unsigned long cl)
194{ 312{
195 struct net_device *dev = qdisc_dev(sch); 313 struct net_device *dev = qdisc_dev(sch);
196 unsigned long ntx = cl - 1 - netdev_get_num_tc(dev); 314 unsigned long ntx = cl - 1;
197 315
198 if (ntx >= dev->num_tx_queues) 316 if (ntx >= dev->num_tx_queues)
199 return NULL; 317 return NULL;
@@ -223,11 +341,51 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
223 return 0; 341 return 0;
224} 342}
225 343
344static int dump_rates(struct mqprio_sched *priv,
345 struct tc_mqprio_qopt *opt, struct sk_buff *skb)
346{
347 struct nlattr *nest;
348 int i;
349
350 if (priv->flags & TC_MQPRIO_F_MIN_RATE) {
351 nest = nla_nest_start(skb, TCA_MQPRIO_MIN_RATE64);
352 if (!nest)
353 goto nla_put_failure;
354
355 for (i = 0; i < opt->num_tc; i++) {
356 if (nla_put(skb, TCA_MQPRIO_MIN_RATE64,
357 sizeof(priv->min_rate[i]),
358 &priv->min_rate[i]))
359 goto nla_put_failure;
360 }
361 nla_nest_end(skb, nest);
362 }
363
364 if (priv->flags & TC_MQPRIO_F_MAX_RATE) {
365 nest = nla_nest_start(skb, TCA_MQPRIO_MAX_RATE64);
366 if (!nest)
367 goto nla_put_failure;
368
369 for (i = 0; i < opt->num_tc; i++) {
370 if (nla_put(skb, TCA_MQPRIO_MAX_RATE64,
371 sizeof(priv->max_rate[i]),
372 &priv->max_rate[i]))
373 goto nla_put_failure;
374 }
375 nla_nest_end(skb, nest);
376 }
377 return 0;
378
379nla_put_failure:
380 nla_nest_cancel(skb, nest);
381 return -1;
382}
383
226static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) 384static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
227{ 385{
228 struct net_device *dev = qdisc_dev(sch); 386 struct net_device *dev = qdisc_dev(sch);
229 struct mqprio_sched *priv = qdisc_priv(sch); 387 struct mqprio_sched *priv = qdisc_priv(sch);
230 unsigned char *b = skb_tail_pointer(skb); 388 struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb);
231 struct tc_mqprio_qopt opt = { 0 }; 389 struct tc_mqprio_qopt opt = { 0 };
232 struct Qdisc *qdisc; 390 struct Qdisc *qdisc;
233 unsigned int i; 391 unsigned int i;
@@ -258,12 +416,25 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
258 opt.offset[i] = dev->tc_to_txq[i].offset; 416 opt.offset[i] = dev->tc_to_txq[i].offset;
259 } 417 }
260 418
261 if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) 419 if (nla_put(skb, TCA_OPTIONS, NLA_ALIGN(sizeof(opt)), &opt))
420 goto nla_put_failure;
421
422 if ((priv->flags & TC_MQPRIO_F_MODE) &&
423 nla_put_u16(skb, TCA_MQPRIO_MODE, priv->mode))
262 goto nla_put_failure; 424 goto nla_put_failure;
263 425
264 return skb->len; 426 if ((priv->flags & TC_MQPRIO_F_SHAPER) &&
427 nla_put_u16(skb, TCA_MQPRIO_SHAPER, priv->shaper))
428 goto nla_put_failure;
429
430 if ((priv->flags & TC_MQPRIO_F_MIN_RATE ||
431 priv->flags & TC_MQPRIO_F_MAX_RATE) &&
432 (dump_rates(priv, &opt, skb) != 0))
433 goto nla_put_failure;
434
435 return nla_nest_end(skb, nla);
265nla_put_failure: 436nla_put_failure:
266 nlmsg_trim(skb, b); 437 nlmsg_trim(skb, nla);
267 return -1; 438 return -1;
268} 439}
269 440
@@ -282,38 +453,35 @@ static unsigned long mqprio_find(struct Qdisc *sch, u32 classid)
282 struct net_device *dev = qdisc_dev(sch); 453 struct net_device *dev = qdisc_dev(sch);
283 unsigned int ntx = TC_H_MIN(classid); 454 unsigned int ntx = TC_H_MIN(classid);
284 455
285 if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev)) 456 /* There are essentially two regions here that have valid classid
286 return 0; 457 * values. The first region will have a classid value of 1 through
287 return ntx; 458 * num_tx_queues. All of these are backed by actual Qdiscs.
459 */
460 if (ntx < TC_H_MIN_PRIORITY)
461 return (ntx <= dev->num_tx_queues) ? ntx : 0;
462
463 /* The second region represents the hardware traffic classes. These
464 * are represented by classid values of TC_H_MIN_PRIORITY through
465 * TC_H_MIN_PRIORITY + netdev_get_num_tc - 1
466 */
467 return ((ntx - TC_H_MIN_PRIORITY) < netdev_get_num_tc(dev)) ? ntx : 0;
288} 468}
289 469
290static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl, 470static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
291 struct sk_buff *skb, struct tcmsg *tcm) 471 struct sk_buff *skb, struct tcmsg *tcm)
292{ 472{
293 struct net_device *dev = qdisc_dev(sch); 473 if (cl < TC_H_MIN_PRIORITY) {
474 struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
475 struct net_device *dev = qdisc_dev(sch);
476 int tc = netdev_txq_to_tc(dev, cl - 1);
294 477
295 if (cl <= netdev_get_num_tc(dev)) { 478 tcm->tcm_parent = (tc < 0) ? 0 :
479 TC_H_MAKE(TC_H_MAJ(sch->handle),
480 TC_H_MIN(tc + TC_H_MIN_PRIORITY));
481 tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
482 } else {
296 tcm->tcm_parent = TC_H_ROOT; 483 tcm->tcm_parent = TC_H_ROOT;
297 tcm->tcm_info = 0; 484 tcm->tcm_info = 0;
298 } else {
299 int i;
300 struct netdev_queue *dev_queue;
301
302 dev_queue = mqprio_queue_get(sch, cl);
303 tcm->tcm_parent = 0;
304 for (i = 0; i < netdev_get_num_tc(dev); i++) {
305 struct netdev_tc_txq tc = dev->tc_to_txq[i];
306 int q_idx = cl - netdev_get_num_tc(dev);
307
308 if (q_idx > tc.offset &&
309 q_idx <= tc.offset + tc.count) {
310 tcm->tcm_parent =
311 TC_H_MAKE(TC_H_MAJ(sch->handle),
312 TC_H_MIN(i + 1));
313 break;
314 }
315 }
316 tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
317 } 485 }
318 tcm->tcm_handle |= TC_H_MIN(cl); 486 tcm->tcm_handle |= TC_H_MIN(cl);
319 return 0; 487 return 0;
@@ -324,15 +492,14 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
324 __releases(d->lock) 492 __releases(d->lock)
325 __acquires(d->lock) 493 __acquires(d->lock)
326{ 494{
327 struct net_device *dev = qdisc_dev(sch); 495 if (cl >= TC_H_MIN_PRIORITY) {
328
329 if (cl <= netdev_get_num_tc(dev)) {
330 int i; 496 int i;
331 __u32 qlen = 0; 497 __u32 qlen = 0;
332 struct Qdisc *qdisc; 498 struct Qdisc *qdisc;
333 struct gnet_stats_queue qstats = {0}; 499 struct gnet_stats_queue qstats = {0};
334 struct gnet_stats_basic_packed bstats = {0}; 500 struct gnet_stats_basic_packed bstats = {0};
335 struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1]; 501 struct net_device *dev = qdisc_dev(sch);
502 struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
336 503
337 /* Drop lock here it will be reclaimed before touching 504 /* Drop lock here it will be reclaimed before touching
338 * statistics this is required because the d->lock we 505 * statistics this is required because the d->lock we
@@ -385,17 +552,36 @@ static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
385 552
386 /* Walk hierarchy with a virtual class per tc */ 553 /* Walk hierarchy with a virtual class per tc */
387 arg->count = arg->skip; 554 arg->count = arg->skip;
388 for (ntx = arg->skip; 555 for (ntx = arg->skip; ntx < netdev_get_num_tc(dev); ntx++) {
389 ntx < dev->num_tx_queues + netdev_get_num_tc(dev); 556 if (arg->fn(sch, ntx + TC_H_MIN_PRIORITY, arg) < 0) {
390 ntx++) { 557 arg->stop = 1;
558 return;
559 }
560 arg->count++;
561 }
562
563 /* Pad the values and skip over unused traffic classes */
564 if (ntx < TC_MAX_QUEUE) {
565 arg->count = TC_MAX_QUEUE;
566 ntx = TC_MAX_QUEUE;
567 }
568
569 /* Reset offset, sort out remaining per-queue qdiscs */
570 for (ntx -= TC_MAX_QUEUE; ntx < dev->num_tx_queues; ntx++) {
391 if (arg->fn(sch, ntx + 1, arg) < 0) { 571 if (arg->fn(sch, ntx + 1, arg) < 0) {
392 arg->stop = 1; 572 arg->stop = 1;
393 break; 573 return;
394 } 574 }
395 arg->count++; 575 arg->count++;
396 } 576 }
397} 577}
398 578
579static struct netdev_queue *mqprio_select_queue(struct Qdisc *sch,
580 struct tcmsg *tcm)
581{
582 return mqprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
583}
584
399static const struct Qdisc_class_ops mqprio_class_ops = { 585static const struct Qdisc_class_ops mqprio_class_ops = {
400 .graft = mqprio_graft, 586 .graft = mqprio_graft,
401 .leaf = mqprio_leaf, 587 .leaf = mqprio_leaf,
@@ -403,6 +589,7 @@ static const struct Qdisc_class_ops mqprio_class_ops = {
403 .walk = mqprio_walk, 589 .walk = mqprio_walk,
404 .dump = mqprio_dump_class, 590 .dump = mqprio_dump_class,
405 .dump_stats = mqprio_dump_class_stats, 591 .dump_stats = mqprio_dump_class_stats,
592 .select_queue = mqprio_select_queue,
406}; 593};
407 594
408static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { 595static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index ff4fc3e0facd..012216386c0b 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -54,6 +54,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
54 case TC_ACT_QUEUED: 54 case TC_ACT_QUEUED:
55 case TC_ACT_TRAP: 55 case TC_ACT_TRAP:
56 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 56 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
57 /* fall through */
57 case TC_ACT_SHOT: 58 case TC_ACT_SHOT:
58 return NULL; 59 return NULL;
59 } 60 }
@@ -245,7 +246,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
245 if (opt == NULL) 246 if (opt == NULL)
246 return -EINVAL; 247 return -EINVAL;
247 248
248 err = tcf_block_get(&q->block, &q->filter_list); 249 err = tcf_block_get(&q->block, &q->filter_list, sch);
249 if (err) 250 if (err)
250 return err; 251 return err;
251 252
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index b1266e75ca43..dd70924cbcdf 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -77,8 +77,8 @@ struct netem_sched_data {
77 77
78 struct qdisc_watchdog watchdog; 78 struct qdisc_watchdog watchdog;
79 79
80 psched_tdiff_t latency; 80 s64 latency;
81 psched_tdiff_t jitter; 81 s64 jitter;
82 82
83 u32 loss; 83 u32 loss;
84 u32 ecn; 84 u32 ecn;
@@ -135,6 +135,13 @@ struct netem_sched_data {
135 u32 a5; /* p23 used only in 4-states */ 135 u32 a5; /* p23 used only in 4-states */
136 } clg; 136 } clg;
137 137
138 struct tc_netem_slot slot_config;
139 struct slotstate {
140 u64 slot_next;
141 s32 packets_left;
142 s32 bytes_left;
143 } slot;
144
138}; 145};
139 146
140/* Time stamp put into socket buffer control block 147/* Time stamp put into socket buffer control block
@@ -145,16 +152,9 @@ struct netem_sched_data {
145 * we save skb->tstamp value in skb->cb[] before destroying it. 152 * we save skb->tstamp value in skb->cb[] before destroying it.
146 */ 153 */
147struct netem_skb_cb { 154struct netem_skb_cb {
148 psched_time_t time_to_send; 155 u64 time_to_send;
149 ktime_t tstamp_save;
150}; 156};
151 157
152
153static struct sk_buff *netem_rb_to_skb(struct rb_node *rb)
154{
155 return rb_entry(rb, struct sk_buff, rbnode);
156}
157
158static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) 158static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
159{ 159{
160 /* we assume we can use skb next/prev/tstamp as storage for rb_node */ 160 /* we assume we can use skb next/prev/tstamp as storage for rb_node */
@@ -312,11 +312,11 @@ static bool loss_event(struct netem_sched_data *q)
312 * std deviation sigma. Uses table lookup to approximate the desired 312 * std deviation sigma. Uses table lookup to approximate the desired
313 * distribution, and a uniformly-distributed pseudo-random source. 313 * distribution, and a uniformly-distributed pseudo-random source.
314 */ 314 */
315static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, 315static s64 tabledist(s64 mu, s32 sigma,
316 struct crndstate *state, 316 struct crndstate *state,
317 const struct disttable *dist) 317 const struct disttable *dist)
318{ 318{
319 psched_tdiff_t x; 319 s64 x;
320 long t; 320 long t;
321 u32 rnd; 321 u32 rnd;
322 322
@@ -327,7 +327,7 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
327 327
328 /* default uniform distribution */ 328 /* default uniform distribution */
329 if (dist == NULL) 329 if (dist == NULL)
330 return (rnd % (2*sigma)) - sigma + mu; 330 return (rnd % (2 * sigma)) - sigma + mu;
331 331
332 t = dist->table[rnd % dist->size]; 332 t = dist->table[rnd % dist->size];
333 x = (sigma % NETEM_DIST_SCALE) * t; 333 x = (sigma % NETEM_DIST_SCALE) * t;
@@ -339,10 +339,8 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
339 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; 339 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
340} 340}
341 341
342static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q) 342static u64 packet_time_ns(u64 len, const struct netem_sched_data *q)
343{ 343{
344 u64 ticks;
345
346 len += q->packet_overhead; 344 len += q->packet_overhead;
347 345
348 if (q->cell_size) { 346 if (q->cell_size) {
@@ -353,21 +351,19 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche
353 len = cells * (q->cell_size + q->cell_overhead); 351 len = cells * (q->cell_size + q->cell_overhead);
354 } 352 }
355 353
356 ticks = (u64)len * NSEC_PER_SEC; 354 return div64_u64(len * NSEC_PER_SEC, q->rate);
357
358 do_div(ticks, q->rate);
359 return PSCHED_NS2TICKS(ticks);
360} 355}
361 356
362static void tfifo_reset(struct Qdisc *sch) 357static void tfifo_reset(struct Qdisc *sch)
363{ 358{
364 struct netem_sched_data *q = qdisc_priv(sch); 359 struct netem_sched_data *q = qdisc_priv(sch);
365 struct rb_node *p; 360 struct rb_node *p = rb_first(&q->t_root);
366 361
367 while ((p = rb_first(&q->t_root))) { 362 while (p) {
368 struct sk_buff *skb = netem_rb_to_skb(p); 363 struct sk_buff *skb = rb_to_skb(p);
369 364
370 rb_erase(p, &q->t_root); 365 p = rb_next(p);
366 rb_erase(&skb->rbnode, &q->t_root);
371 rtnl_kfree_skbs(skb, skb); 367 rtnl_kfree_skbs(skb, skb);
372 } 368 }
373} 369}
@@ -375,14 +371,14 @@ static void tfifo_reset(struct Qdisc *sch)
375static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) 371static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
376{ 372{
377 struct netem_sched_data *q = qdisc_priv(sch); 373 struct netem_sched_data *q = qdisc_priv(sch);
378 psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; 374 u64 tnext = netem_skb_cb(nskb)->time_to_send;
379 struct rb_node **p = &q->t_root.rb_node, *parent = NULL; 375 struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
380 376
381 while (*p) { 377 while (*p) {
382 struct sk_buff *skb; 378 struct sk_buff *skb;
383 379
384 parent = *p; 380 parent = *p;
385 skb = netem_rb_to_skb(parent); 381 skb = rb_to_skb(parent);
386 if (tnext >= netem_skb_cb(skb)->time_to_send) 382 if (tnext >= netem_skb_cb(skb)->time_to_send)
387 p = &parent->rb_right; 383 p = &parent->rb_right;
388 else 384 else
@@ -521,13 +517,13 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
521 if (q->gap == 0 || /* not doing reordering */ 517 if (q->gap == 0 || /* not doing reordering */
522 q->counter < q->gap - 1 || /* inside last reordering gap */ 518 q->counter < q->gap - 1 || /* inside last reordering gap */
523 q->reorder < get_crandom(&q->reorder_cor)) { 519 q->reorder < get_crandom(&q->reorder_cor)) {
524 psched_time_t now; 520 u64 now;
525 psched_tdiff_t delay; 521 s64 delay;
526 522
527 delay = tabledist(q->latency, q->jitter, 523 delay = tabledist(q->latency, q->jitter,
528 &q->delay_cor, q->delay_dist); 524 &q->delay_cor, q->delay_dist);
529 525
530 now = psched_get_time(); 526 now = ktime_get_ns();
531 527
532 if (q->rate) { 528 if (q->rate) {
533 struct netem_skb_cb *last = NULL; 529 struct netem_skb_cb *last = NULL;
@@ -538,7 +534,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
538 struct sk_buff *t_skb; 534 struct sk_buff *t_skb;
539 struct netem_skb_cb *t_last; 535 struct netem_skb_cb *t_last;
540 536
541 t_skb = netem_rb_to_skb(rb_last(&q->t_root)); 537 t_skb = skb_rb_last(&q->t_root);
542 t_last = netem_skb_cb(t_skb); 538 t_last = netem_skb_cb(t_skb);
543 if (!last || 539 if (!last ||
544 t_last->time_to_send > last->time_to_send) { 540 t_last->time_to_send > last->time_to_send) {
@@ -553,15 +549,14 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
553 * from delay. 549 * from delay.
554 */ 550 */
555 delay -= last->time_to_send - now; 551 delay -= last->time_to_send - now;
556 delay = max_t(psched_tdiff_t, 0, delay); 552 delay = max_t(s64, 0, delay);
557 now = last->time_to_send; 553 now = last->time_to_send;
558 } 554 }
559 555
560 delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q); 556 delay += packet_time_ns(qdisc_pkt_len(skb), q);
561 } 557 }
562 558
563 cb->time_to_send = now + delay; 559 cb->time_to_send = now + delay;
564 cb->tstamp_save = skb->tstamp;
565 ++q->counter; 560 ++q->counter;
566 tfifo_enqueue(skb, sch); 561 tfifo_enqueue(skb, sch);
567 } else { 562 } else {
@@ -569,7 +564,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
569 * Do re-ordering by putting one out of N packets at the front 564 * Do re-ordering by putting one out of N packets at the front
570 * of the queue. 565 * of the queue.
571 */ 566 */
572 cb->time_to_send = psched_get_time(); 567 cb->time_to_send = ktime_get_ns();
573 q->counter = 0; 568 q->counter = 0;
574 569
575 netem_enqueue_skb_head(&sch->q, skb); 570 netem_enqueue_skb_head(&sch->q, skb);
@@ -600,6 +595,20 @@ finish_segs:
600 return NET_XMIT_SUCCESS; 595 return NET_XMIT_SUCCESS;
601} 596}
602 597
598/* Delay the next round with a new future slot with a
599 * correct number of bytes and packets.
600 */
601
602static void get_slot_next(struct netem_sched_data *q, u64 now)
603{
604 q->slot.slot_next = now + q->slot_config.min_delay +
605 (prandom_u32() *
606 (q->slot_config.max_delay -
607 q->slot_config.min_delay) >> 32);
608 q->slot.packets_left = q->slot_config.max_packets;
609 q->slot.bytes_left = q->slot_config.max_bytes;
610}
611
603static struct sk_buff *netem_dequeue(struct Qdisc *sch) 612static struct sk_buff *netem_dequeue(struct Qdisc *sch)
604{ 613{
605 struct netem_sched_data *q = qdisc_priv(sch); 614 struct netem_sched_data *q = qdisc_priv(sch);
@@ -616,20 +625,26 @@ deliver:
616 } 625 }
617 p = rb_first(&q->t_root); 626 p = rb_first(&q->t_root);
618 if (p) { 627 if (p) {
619 psched_time_t time_to_send; 628 u64 time_to_send;
629 u64 now = ktime_get_ns();
620 630
621 skb = netem_rb_to_skb(p); 631 skb = rb_to_skb(p);
622 632
623 /* if more time remaining? */ 633 /* if more time remaining? */
624 time_to_send = netem_skb_cb(skb)->time_to_send; 634 time_to_send = netem_skb_cb(skb)->time_to_send;
625 if (time_to_send <= psched_get_time()) { 635 if (q->slot.slot_next && q->slot.slot_next < time_to_send)
626 rb_erase(p, &q->t_root); 636 get_slot_next(q, now);
627 637
638 if (time_to_send <= now && q->slot.slot_next <= now) {
639 rb_erase(p, &q->t_root);
628 sch->q.qlen--; 640 sch->q.qlen--;
629 qdisc_qstats_backlog_dec(sch, skb); 641 qdisc_qstats_backlog_dec(sch, skb);
630 skb->next = NULL; 642 skb->next = NULL;
631 skb->prev = NULL; 643 skb->prev = NULL;
632 skb->tstamp = netem_skb_cb(skb)->tstamp_save; 644 /* skb->dev shares skb->rbnode area,
645 * we need to restore its value.
646 */
647 skb->dev = qdisc_dev(sch);
633 648
634#ifdef CONFIG_NET_CLS_ACT 649#ifdef CONFIG_NET_CLS_ACT
635 /* 650 /*
@@ -640,6 +655,14 @@ deliver:
640 skb->tstamp = 0; 655 skb->tstamp = 0;
641#endif 656#endif
642 657
658 if (q->slot.slot_next) {
659 q->slot.packets_left--;
660 q->slot.bytes_left -= qdisc_pkt_len(skb);
661 if (q->slot.packets_left <= 0 ||
662 q->slot.bytes_left <= 0)
663 get_slot_next(q, now);
664 }
665
643 if (q->qdisc) { 666 if (q->qdisc) {
644 unsigned int pkt_len = qdisc_pkt_len(skb); 667 unsigned int pkt_len = qdisc_pkt_len(skb);
645 struct sk_buff *to_free = NULL; 668 struct sk_buff *to_free = NULL;
@@ -663,7 +686,10 @@ deliver:
663 if (skb) 686 if (skb)
664 goto deliver; 687 goto deliver;
665 } 688 }
666 qdisc_watchdog_schedule(&q->watchdog, time_to_send); 689
690 qdisc_watchdog_schedule_ns(&q->watchdog,
691 max(time_to_send,
692 q->slot.slot_next));
667 } 693 }
668 694
669 if (q->qdisc) { 695 if (q->qdisc) {
@@ -694,6 +720,7 @@ static void dist_free(struct disttable *d)
694 * Distribution data is a variable size payload containing 720 * Distribution data is a variable size payload containing
695 * signed 16 bit values. 721 * signed 16 bit values.
696 */ 722 */
723
697static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) 724static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
698{ 725{
699 struct netem_sched_data *q = qdisc_priv(sch); 726 struct netem_sched_data *q = qdisc_priv(sch);
@@ -724,6 +751,23 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
724 return 0; 751 return 0;
725} 752}
726 753
754static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
755{
756 const struct tc_netem_slot *c = nla_data(attr);
757
758 q->slot_config = *c;
759 if (q->slot_config.max_packets == 0)
760 q->slot_config.max_packets = INT_MAX;
761 if (q->slot_config.max_bytes == 0)
762 q->slot_config.max_bytes = INT_MAX;
763 q->slot.packets_left = q->slot_config.max_packets;
764 q->slot.bytes_left = q->slot_config.max_bytes;
765 if (q->slot_config.min_delay | q->slot_config.max_delay)
766 q->slot.slot_next = ktime_get_ns();
767 else
768 q->slot.slot_next = 0;
769}
770
727static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr) 771static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
728{ 772{
729 const struct tc_netem_corr *c = nla_data(attr); 773 const struct tc_netem_corr *c = nla_data(attr);
@@ -825,6 +869,9 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
825 [TCA_NETEM_LOSS] = { .type = NLA_NESTED }, 869 [TCA_NETEM_LOSS] = { .type = NLA_NESTED },
826 [TCA_NETEM_ECN] = { .type = NLA_U32 }, 870 [TCA_NETEM_ECN] = { .type = NLA_U32 },
827 [TCA_NETEM_RATE64] = { .type = NLA_U64 }, 871 [TCA_NETEM_RATE64] = { .type = NLA_U64 },
872 [TCA_NETEM_LATENCY64] = { .type = NLA_S64 },
873 [TCA_NETEM_JITTER64] = { .type = NLA_S64 },
874 [TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) },
828}; 875};
829 876
830static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, 877static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
@@ -892,8 +939,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
892 939
893 sch->limit = qopt->limit; 940 sch->limit = qopt->limit;
894 941
895 q->latency = qopt->latency; 942 q->latency = PSCHED_TICKS2NS(qopt->latency);
896 q->jitter = qopt->jitter; 943 q->jitter = PSCHED_TICKS2NS(qopt->jitter);
897 q->limit = qopt->limit; 944 q->limit = qopt->limit;
898 q->gap = qopt->gap; 945 q->gap = qopt->gap;
899 q->counter = 0; 946 q->counter = 0;
@@ -922,9 +969,18 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
922 q->rate = max_t(u64, q->rate, 969 q->rate = max_t(u64, q->rate,
923 nla_get_u64(tb[TCA_NETEM_RATE64])); 970 nla_get_u64(tb[TCA_NETEM_RATE64]));
924 971
972 if (tb[TCA_NETEM_LATENCY64])
973 q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]);
974
975 if (tb[TCA_NETEM_JITTER64])
976 q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]);
977
925 if (tb[TCA_NETEM_ECN]) 978 if (tb[TCA_NETEM_ECN])
926 q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]); 979 q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
927 980
981 if (tb[TCA_NETEM_SLOT])
982 get_slot(q, tb[TCA_NETEM_SLOT]);
983
928 return ret; 984 return ret;
929} 985}
930 986
@@ -1014,9 +1070,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
1014 struct tc_netem_reorder reorder; 1070 struct tc_netem_reorder reorder;
1015 struct tc_netem_corrupt corrupt; 1071 struct tc_netem_corrupt corrupt;
1016 struct tc_netem_rate rate; 1072 struct tc_netem_rate rate;
1073 struct tc_netem_slot slot;
1017 1074
1018 qopt.latency = q->latency; 1075 qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency),
1019 qopt.jitter = q->jitter; 1076 UINT_MAX);
1077 qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter),
1078 UINT_MAX);
1020 qopt.limit = q->limit; 1079 qopt.limit = q->limit;
1021 qopt.loss = q->loss; 1080 qopt.loss = q->loss;
1022 qopt.gap = q->gap; 1081 qopt.gap = q->gap;
@@ -1024,6 +1083,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
1024 if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt)) 1083 if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
1025 goto nla_put_failure; 1084 goto nla_put_failure;
1026 1085
1086 if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency))
1087 goto nla_put_failure;
1088
1089 if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter))
1090 goto nla_put_failure;
1091
1027 cor.delay_corr = q->delay_cor.rho; 1092 cor.delay_corr = q->delay_cor.rho;
1028 cor.loss_corr = q->loss_cor.rho; 1093 cor.loss_corr = q->loss_cor.rho;
1029 cor.dup_corr = q->dup_cor.rho; 1094 cor.dup_corr = q->dup_cor.rho;
@@ -1060,6 +1125,16 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
1060 if (dump_loss_model(q, skb) != 0) 1125 if (dump_loss_model(q, skb) != 0)
1061 goto nla_put_failure; 1126 goto nla_put_failure;
1062 1127
1128 if (q->slot_config.min_delay | q->slot_config.max_delay) {
1129 slot = q->slot_config;
1130 if (slot.max_packets == INT_MAX)
1131 slot.max_packets = 0;
1132 if (slot.max_bytes == INT_MAX)
1133 slot.max_bytes = 0;
1134 if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot))
1135 goto nla_put_failure;
1136 }
1137
1063 return nla_nest_end(skb, nla); 1138 return nla_nest_end(skb, nla);
1064 1139
1065nla_put_failure: 1140nla_put_failure:
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 6c2791d6102d..776c694c77c7 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -74,6 +74,7 @@ struct pie_sched_data {
74 struct pie_vars vars; 74 struct pie_vars vars;
75 struct pie_stats stats; 75 struct pie_stats stats;
76 struct timer_list adapt_timer; 76 struct timer_list adapt_timer;
77 struct Qdisc *sch;
77}; 78};
78 79
79static void pie_params_init(struct pie_params *params) 80static void pie_params_init(struct pie_params *params)
@@ -422,10 +423,10 @@ static void calculate_probability(struct Qdisc *sch)
422 pie_vars_init(&q->vars); 423 pie_vars_init(&q->vars);
423} 424}
424 425
425static void pie_timer(unsigned long arg) 426static void pie_timer(struct timer_list *t)
426{ 427{
427 struct Qdisc *sch = (struct Qdisc *)arg; 428 struct pie_sched_data *q = from_timer(q, t, adapt_timer);
428 struct pie_sched_data *q = qdisc_priv(sch); 429 struct Qdisc *sch = q->sch;
429 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); 430 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
430 431
431 spin_lock(root_lock); 432 spin_lock(root_lock);
@@ -446,7 +447,8 @@ static int pie_init(struct Qdisc *sch, struct nlattr *opt)
446 pie_vars_init(&q->vars); 447 pie_vars_init(&q->vars);
447 sch->limit = q->params.limit; 448 sch->limit = q->params.limit;
448 449
449 setup_timer(&q->adapt_timer, pie_timer, (unsigned long)sch); 450 q->sch = sch;
451 timer_setup(&q->adapt_timer, pie_timer, 0);
450 452
451 if (opt) { 453 if (opt) {
452 int err = pie_change(sch, opt); 454 int err = pie_change(sch, opt);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 2dd6c68ae91e..2c79559a0d31 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -50,6 +50,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
50 case TC_ACT_QUEUED: 50 case TC_ACT_QUEUED:
51 case TC_ACT_TRAP: 51 case TC_ACT_TRAP:
52 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 52 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
53 /* fall through */
53 case TC_ACT_SHOT: 54 case TC_ACT_SHOT:
54 return NULL; 55 return NULL;
55 } 56 }
@@ -212,7 +213,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
212 if (!opt) 213 if (!opt)
213 return -EINVAL; 214 return -EINVAL;
214 215
215 err = tcf_block_get(&q->block, &q->filter_list); 216 err = tcf_block_get(&q->block, &q->filter_list, sch);
216 if (err) 217 if (err)
217 return err; 218 return err;
218 219
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 6ddfd4991108..6962b37a3ad3 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -709,6 +709,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
709 case TC_ACT_STOLEN: 709 case TC_ACT_STOLEN:
710 case TC_ACT_TRAP: 710 case TC_ACT_TRAP:
711 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 711 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
712 /* fall through */
712 case TC_ACT_SHOT: 713 case TC_ACT_SHOT:
713 return NULL; 714 return NULL;
714 } 715 }
@@ -1419,7 +1420,7 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
1419 int i, j, err; 1420 int i, j, err;
1420 u32 max_cl_shift, maxbudg_shift, max_classes; 1421 u32 max_cl_shift, maxbudg_shift, max_classes;
1421 1422
1422 err = tcf_block_get(&q->block, &q->filter_list); 1423 err = tcf_block_get(&q->block, &q->filter_list, sch);
1423 if (err) 1424 if (err)
1424 return err; 1425 return err;
1425 1426
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 93b9d70a9b28..7f8ea9e297c3 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -19,6 +19,7 @@
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/skbuff.h> 20#include <linux/skbuff.h>
21#include <net/pkt_sched.h> 21#include <net/pkt_sched.h>
22#include <net/pkt_cls.h>
22#include <net/inet_ecn.h> 23#include <net/inet_ecn.h>
23#include <net/red.h> 24#include <net/red.h>
24 25
@@ -40,6 +41,7 @@ struct red_sched_data {
40 u32 limit; /* HARD maximal queue length */ 41 u32 limit; /* HARD maximal queue length */
41 unsigned char flags; 42 unsigned char flags;
42 struct timer_list adapt_timer; 43 struct timer_list adapt_timer;
44 struct Qdisc *sch;
43 struct red_parms parms; 45 struct red_parms parms;
44 struct red_vars vars; 46 struct red_vars vars;
45 struct red_stats stats; 47 struct red_stats stats;
@@ -147,11 +149,37 @@ static void red_reset(struct Qdisc *sch)
147 red_restart(&q->vars); 149 red_restart(&q->vars);
148} 150}
149 151
152static int red_offload(struct Qdisc *sch, bool enable)
153{
154 struct red_sched_data *q = qdisc_priv(sch);
155 struct net_device *dev = qdisc_dev(sch);
156 struct tc_red_qopt_offload opt = {
157 .handle = sch->handle,
158 .parent = sch->parent,
159 };
160
161 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
162 return -EOPNOTSUPP;
163
164 if (enable) {
165 opt.command = TC_RED_REPLACE;
166 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
167 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
168 opt.set.probability = q->parms.max_P;
169 opt.set.is_ecn = red_use_ecn(q);
170 } else {
171 opt.command = TC_RED_DESTROY;
172 }
173
174 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
175}
176
150static void red_destroy(struct Qdisc *sch) 177static void red_destroy(struct Qdisc *sch)
151{ 178{
152 struct red_sched_data *q = qdisc_priv(sch); 179 struct red_sched_data *q = qdisc_priv(sch);
153 180
154 del_timer_sync(&q->adapt_timer); 181 del_timer_sync(&q->adapt_timer);
182 red_offload(sch, false);
155 qdisc_destroy(q->qdisc); 183 qdisc_destroy(q->qdisc);
156} 184}
157 185
@@ -218,13 +246,14 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
218 red_start_of_idle_period(&q->vars); 246 red_start_of_idle_period(&q->vars);
219 247
220 sch_tree_unlock(sch); 248 sch_tree_unlock(sch);
249 red_offload(sch, true);
221 return 0; 250 return 0;
222} 251}
223 252
224static inline void red_adaptative_timer(unsigned long arg) 253static inline void red_adaptative_timer(struct timer_list *t)
225{ 254{
226 struct Qdisc *sch = (struct Qdisc *)arg; 255 struct red_sched_data *q = from_timer(q, t, adapt_timer);
227 struct red_sched_data *q = qdisc_priv(sch); 256 struct Qdisc *sch = q->sch;
228 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); 257 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
229 258
230 spin_lock(root_lock); 259 spin_lock(root_lock);
@@ -238,10 +267,40 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt)
238 struct red_sched_data *q = qdisc_priv(sch); 267 struct red_sched_data *q = qdisc_priv(sch);
239 268
240 q->qdisc = &noop_qdisc; 269 q->qdisc = &noop_qdisc;
241 setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch); 270 q->sch = sch;
271 timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
242 return red_change(sch, opt); 272 return red_change(sch, opt);
243} 273}
244 274
275static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
276{
277 struct net_device *dev = qdisc_dev(sch);
278 struct tc_red_qopt_offload hw_stats = {
279 .command = TC_RED_STATS,
280 .handle = sch->handle,
281 .parent = sch->parent,
282 {
283 .stats.bstats = &sch->bstats,
284 .stats.qstats = &sch->qstats,
285 },
286 };
287 int err;
288
289 opt->flags &= ~TC_RED_OFFLOADED;
290 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
291 return 0;
292
293 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
294 &hw_stats);
295 if (err == -EOPNOTSUPP)
296 return 0;
297
298 if (!err)
299 opt->flags |= TC_RED_OFFLOADED;
300
301 return err;
302}
303
245static int red_dump(struct Qdisc *sch, struct sk_buff *skb) 304static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
246{ 305{
247 struct red_sched_data *q = qdisc_priv(sch); 306 struct red_sched_data *q = qdisc_priv(sch);
@@ -255,8 +314,13 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
255 .Plog = q->parms.Plog, 314 .Plog = q->parms.Plog,
256 .Scell_log = q->parms.Scell_log, 315 .Scell_log = q->parms.Scell_log,
257 }; 316 };
317 int err;
258 318
259 sch->qstats.backlog = q->qdisc->qstats.backlog; 319 sch->qstats.backlog = q->qdisc->qstats.backlog;
320 err = red_dump_offload(sch, &opt);
321 if (err)
322 goto nla_put_failure;
323
260 opts = nla_nest_start(skb, TCA_OPTIONS); 324 opts = nla_nest_start(skb, TCA_OPTIONS);
261 if (opts == NULL) 325 if (opts == NULL)
262 goto nla_put_failure; 326 goto nla_put_failure;
@@ -273,6 +337,7 @@ nla_put_failure:
273static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) 337static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
274{ 338{
275 struct red_sched_data *q = qdisc_priv(sch); 339 struct red_sched_data *q = qdisc_priv(sch);
340 struct net_device *dev = qdisc_dev(sch);
276 struct tc_red_xstats st = { 341 struct tc_red_xstats st = {
277 .early = q->stats.prob_drop + q->stats.forced_drop, 342 .early = q->stats.prob_drop + q->stats.forced_drop,
278 .pdrop = q->stats.pdrop, 343 .pdrop = q->stats.pdrop,
@@ -280,6 +345,26 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
280 .marked = q->stats.prob_mark + q->stats.forced_mark, 345 .marked = q->stats.prob_mark + q->stats.forced_mark,
281 }; 346 };
282 347
348 if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) {
349 struct red_stats hw_stats = {0};
350 struct tc_red_qopt_offload hw_stats_request = {
351 .command = TC_RED_XSTATS,
352 .handle = sch->handle,
353 .parent = sch->parent,
354 {
355 .xstats = &hw_stats,
356 },
357 };
358 if (!dev->netdev_ops->ndo_setup_tc(dev,
359 TC_SETUP_QDISC_RED,
360 &hw_stats_request)) {
361 st.early += hw_stats.prob_drop + hw_stats.forced_drop;
362 st.pdrop += hw_stats.pdrop;
363 st.other += hw_stats.other;
364 st.marked += hw_stats.prob_mark + hw_stats.forced_mark;
365 }
366 }
367
283 return gnet_stats_copy_app(d, &st, sizeof(st)); 368 return gnet_stats_copy_app(d, &st, sizeof(st));
284} 369}
285 370
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index cc39e170b4aa..0678debdd856 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -268,6 +268,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
268 case TC_ACT_QUEUED: 268 case TC_ACT_QUEUED:
269 case TC_ACT_TRAP: 269 case TC_ACT_TRAP:
270 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 270 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
271 /* fall through */
271 case TC_ACT_SHOT: 272 case TC_ACT_SHOT:
272 return false; 273 return false;
273 } 274 }
@@ -553,7 +554,7 @@ static int sfb_init(struct Qdisc *sch, struct nlattr *opt)
553 struct sfb_sched_data *q = qdisc_priv(sch); 554 struct sfb_sched_data *q = qdisc_priv(sch);
554 int err; 555 int err;
555 556
556 err = tcf_block_get(&q->block, &q->filter_list); 557 err = tcf_block_get(&q->block, &q->filter_list, sch);
557 if (err) 558 if (err)
558 return err; 559 return err;
559 560
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 74ea863b8240..890f4a4564e7 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -145,6 +145,7 @@ struct sfq_sched_data {
145 int perturb_period; 145 int perturb_period;
146 unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ 146 unsigned int quantum; /* Allotment per round: MUST BE >= MTU */
147 struct timer_list perturb_timer; 147 struct timer_list perturb_timer;
148 struct Qdisc *sch;
148}; 149};
149 150
150/* 151/*
@@ -189,6 +190,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
189 case TC_ACT_QUEUED: 190 case TC_ACT_QUEUED:
190 case TC_ACT_TRAP: 191 case TC_ACT_TRAP:
191 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 192 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
193 /* fall through */
192 case TC_ACT_SHOT: 194 case TC_ACT_SHOT:
193 return 0; 195 return 0;
194 } 196 }
@@ -604,10 +606,10 @@ drop:
604 qdisc_tree_reduce_backlog(sch, dropped, drop_len); 606 qdisc_tree_reduce_backlog(sch, dropped, drop_len);
605} 607}
606 608
607static void sfq_perturbation(unsigned long arg) 609static void sfq_perturbation(struct timer_list *t)
608{ 610{
609 struct Qdisc *sch = (struct Qdisc *)arg; 611 struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
610 struct sfq_sched_data *q = qdisc_priv(sch); 612 struct Qdisc *sch = q->sch;
611 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); 613 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
612 614
613 spin_lock(root_lock); 615 spin_lock(root_lock);
@@ -722,10 +724,9 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
722 int i; 724 int i;
723 int err; 725 int err;
724 726
725 setup_deferrable_timer(&q->perturb_timer, sfq_perturbation, 727 timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE);
726 (unsigned long)sch);
727 728
728 err = tcf_block_get(&q->block, &q->filter_list); 729 err = tcf_block_get(&q->block, &q->filter_list, sch);
729 if (err) 730 if (err)
730 return err; 731 return err;
731 732
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 70f1b570bab9..1ca84a288443 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for SCTP support code. 3# Makefile for SCTP support code.
3# 4#
@@ -12,7 +13,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
12 inqueue.o outqueue.o ulpqueue.o \ 13 inqueue.o outqueue.o ulpqueue.o \
13 tsnmap.o bind_addr.o socket.o primitive.o \ 14 tsnmap.o bind_addr.o socket.o primitive.o \
14 output.o input.o debug.o stream.o auth.o \ 15 output.o input.o debug.o stream.o auth.o \
15 offload.o 16 offload.o stream_sched.o stream_sched_prio.o \
17 stream_sched_rr.o
16 18
17sctp_probe-y := probe.o 19sctp_probe-y := probe.o
18 20
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index dfb9651e818b..69394f4d6091 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -149,8 +149,7 @@ static struct sctp_association *sctp_association_init(
149 149
150 /* Initializes the timers */ 150 /* Initializes the timers */
151 for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) 151 for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i)
152 setup_timer(&asoc->timers[i], sctp_timer_events[i], 152 timer_setup(&asoc->timers[i], sctp_timer_events[i], 0);
153 (unsigned long)asoc);
154 153
155 /* Pull default initialization values from the sock options. 154 /* Pull default initialization values from the sock options.
156 * Note: This assumes that the values have already been 155 * Note: This assumes that the values have already been
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 3afac275ee82..7b261afc47b9 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -311,10 +311,10 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
311 311
312 if (chunk->sent_count) { 312 if (chunk->sent_count) {
313 chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++; 313 chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
314 streamout->abandoned_sent[SCTP_PR_INDEX(TTL)]++; 314 streamout->ext->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
315 } else { 315 } else {
316 chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; 316 chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
317 streamout->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; 317 streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
318 } 318 }
319 return 1; 319 return 1;
320 } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && 320 } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
@@ -323,7 +323,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
323 &chunk->asoc->stream.out[chunk->sinfo.sinfo_stream]; 323 &chunk->asoc->stream.out[chunk->sinfo.sinfo_stream];
324 324
325 chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; 325 chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
326 streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++; 326 streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
327 return 1; 327 return 1;
328 } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) && 328 } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) &&
329 chunk->msg->expires_at && 329 chunk->msg->expires_at &&
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 92a07141fd07..621b5ca3fd1c 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -421,7 +421,7 @@ void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t,
421{ 421{
422 struct dst_entry *dst; 422 struct dst_entry *dst;
423 423
424 if (!t) 424 if (sock_owned_by_user(sk) || !t)
425 return; 425 return;
426 dst = sctp_transport_dst_check(t); 426 dst = sctp_transport_dst_check(t);
427 if (dst) 427 if (dst)
@@ -794,7 +794,7 @@ hit:
794struct sctp_hash_cmp_arg { 794struct sctp_hash_cmp_arg {
795 const union sctp_addr *paddr; 795 const union sctp_addr *paddr;
796 const struct net *net; 796 const struct net *net;
797 u16 lport; 797 __be16 lport;
798}; 798};
799 799
800static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, 800static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
@@ -820,37 +820,37 @@ out:
820 return err; 820 return err;
821} 821}
822 822
823static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed) 823static inline __u32 sctp_hash_obj(const void *data, u32 len, u32 seed)
824{ 824{
825 const struct sctp_transport *t = data; 825 const struct sctp_transport *t = data;
826 const union sctp_addr *paddr = &t->ipaddr; 826 const union sctp_addr *paddr = &t->ipaddr;
827 const struct net *net = sock_net(t->asoc->base.sk); 827 const struct net *net = sock_net(t->asoc->base.sk);
828 u16 lport = htons(t->asoc->base.bind_addr.port); 828 __be16 lport = htons(t->asoc->base.bind_addr.port);
829 u32 addr; 829 __u32 addr;
830 830
831 if (paddr->sa.sa_family == AF_INET6) 831 if (paddr->sa.sa_family == AF_INET6)
832 addr = jhash(&paddr->v6.sin6_addr, 16, seed); 832 addr = jhash(&paddr->v6.sin6_addr, 16, seed);
833 else 833 else
834 addr = paddr->v4.sin_addr.s_addr; 834 addr = (__force __u32)paddr->v4.sin_addr.s_addr;
835 835
836 return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 | 836 return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 |
837 (__force __u32)lport, net_hash_mix(net), seed); 837 (__force __u32)lport, net_hash_mix(net), seed);
838} 838}
839 839
840static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed) 840static inline __u32 sctp_hash_key(const void *data, u32 len, u32 seed)
841{ 841{
842 const struct sctp_hash_cmp_arg *x = data; 842 const struct sctp_hash_cmp_arg *x = data;
843 const union sctp_addr *paddr = x->paddr; 843 const union sctp_addr *paddr = x->paddr;
844 const struct net *net = x->net; 844 const struct net *net = x->net;
845 u16 lport = x->lport; 845 __be16 lport = x->lport;
846 u32 addr; 846 __u32 addr;
847 847
848 if (paddr->sa.sa_family == AF_INET6) 848 if (paddr->sa.sa_family == AF_INET6)
849 addr = jhash(&paddr->v6.sin6_addr, 16, seed); 849 addr = jhash(&paddr->v6.sin6_addr, 16, seed);
850 else 850 else
851 addr = paddr->v4.sin_addr.s_addr; 851 addr = (__force __u32)paddr->v4.sin_addr.s_addr;
852 852
853 return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 | 853 return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 |
854 (__force __u32)lport, net_hash_mix(net), seed); 854 (__force __u32)lport, net_hash_mix(net), seed);
855} 855}
856 856
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 51c488769590..3b18085e3b10 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -738,7 +738,7 @@ static int sctp_v6_skb_iif(const struct sk_buff *skb)
738/* Was this packet marked by Explicit Congestion Notification? */ 738/* Was this packet marked by Explicit Congestion Notification? */
739static int sctp_v6_is_ce(const struct sk_buff *skb) 739static int sctp_v6_is_ce(const struct sk_buff *skb)
740{ 740{
741 return *((__u32 *)(ipv6_hdr(skb))) & htonl(1 << 20); 741 return *((__u32 *)(ipv6_hdr(skb))) & (__force __u32)htonl(1 << 20);
742} 742}
743 743
744/* Dump the v6 addr to the seq file. */ 744/* Dump the v6 addr to the seq file. */
@@ -807,9 +807,10 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
807 addr->v6.sin6_flowinfo = 0; 807 addr->v6.sin6_flowinfo = 0;
808 addr->v6.sin6_port = sh->source; 808 addr->v6.sin6_port = sh->source;
809 addr->v6.sin6_addr = ipv6_hdr(skb)->saddr; 809 addr->v6.sin6_addr = ipv6_hdr(skb)->saddr;
810 if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) { 810 if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
811 addr->v6.sin6_scope_id = sctp_v6_skb_iif(skb); 811 addr->v6.sin6_scope_id = sctp_v6_skb_iif(skb);
812 } 812 else
813 addr->v6.sin6_scope_id = 0;
813 } 814 }
814 815
815 *addr_len = sctp_v6_addr_to_user(sctp_sk(skb->sk), addr); 816 *addr_len = sctp_v6_addr_to_user(sctp_sk(skb->sk), addr);
@@ -882,8 +883,10 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
882 net = sock_net(&opt->inet.sk); 883 net = sock_net(&opt->inet.sk);
883 rcu_read_lock(); 884 rcu_read_lock();
884 dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id); 885 dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id);
885 if (!dev || 886 if (!dev || !(opt->inet.freebind ||
886 !ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0)) { 887 net->ipv6.sysctl.ip_nonlocal_bind ||
888 ipv6_chk_addr(net, &addr->v6.sin6_addr,
889 dev, 0))) {
887 rcu_read_unlock(); 890 rcu_read_unlock();
888 return 0; 891 return 0;
889 } 892 }
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 2966ff400755..4db012aa25f7 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -50,6 +50,7 @@
50 50
51#include <net/sctp/sctp.h> 51#include <net/sctp/sctp.h>
52#include <net/sctp/sm.h> 52#include <net/sctp/sm.h>
53#include <net/sctp/stream_sched.h>
53 54
54/* Declare internal functions here. */ 55/* Declare internal functions here. */
55static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn); 56static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn);
@@ -72,32 +73,38 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp);
72 73
73/* Add data to the front of the queue. */ 74/* Add data to the front of the queue. */
74static inline void sctp_outq_head_data(struct sctp_outq *q, 75static inline void sctp_outq_head_data(struct sctp_outq *q,
75 struct sctp_chunk *ch) 76 struct sctp_chunk *ch)
76{ 77{
78 struct sctp_stream_out_ext *oute;
79 __u16 stream;
80
77 list_add(&ch->list, &q->out_chunk_list); 81 list_add(&ch->list, &q->out_chunk_list);
78 q->out_qlen += ch->skb->len; 82 q->out_qlen += ch->skb->len;
83
84 stream = sctp_chunk_stream_no(ch);
85 oute = q->asoc->stream.out[stream].ext;
86 list_add(&ch->stream_list, &oute->outq);
79} 87}
80 88
81/* Take data from the front of the queue. */ 89/* Take data from the front of the queue. */
82static inline struct sctp_chunk *sctp_outq_dequeue_data(struct sctp_outq *q) 90static inline struct sctp_chunk *sctp_outq_dequeue_data(struct sctp_outq *q)
83{ 91{
84 struct sctp_chunk *ch = NULL; 92 return q->sched->dequeue(q);
85
86 if (!list_empty(&q->out_chunk_list)) {
87 struct list_head *entry = q->out_chunk_list.next;
88
89 ch = list_entry(entry, struct sctp_chunk, list);
90 list_del_init(entry);
91 q->out_qlen -= ch->skb->len;
92 }
93 return ch;
94} 93}
94
95/* Add data chunk to the end of the queue. */ 95/* Add data chunk to the end of the queue. */
96static inline void sctp_outq_tail_data(struct sctp_outq *q, 96static inline void sctp_outq_tail_data(struct sctp_outq *q,
97 struct sctp_chunk *ch) 97 struct sctp_chunk *ch)
98{ 98{
99 struct sctp_stream_out_ext *oute;
100 __u16 stream;
101
99 list_add_tail(&ch->list, &q->out_chunk_list); 102 list_add_tail(&ch->list, &q->out_chunk_list);
100 q->out_qlen += ch->skb->len; 103 q->out_qlen += ch->skb->len;
104
105 stream = sctp_chunk_stream_no(ch);
106 oute = q->asoc->stream.out[stream].ext;
107 list_add_tail(&ch->stream_list, &oute->outq);
101} 108}
102 109
103/* 110/*
@@ -207,6 +214,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
207 INIT_LIST_HEAD(&q->retransmit); 214 INIT_LIST_HEAD(&q->retransmit);
208 INIT_LIST_HEAD(&q->sacked); 215 INIT_LIST_HEAD(&q->sacked);
209 INIT_LIST_HEAD(&q->abandoned); 216 INIT_LIST_HEAD(&q->abandoned);
217 sctp_sched_set_sched(asoc, SCTP_SS_FCFS);
210} 218}
211 219
212/* Free the outqueue structure and any related pending chunks. 220/* Free the outqueue structure and any related pending chunks.
@@ -258,6 +266,7 @@ static void __sctp_outq_teardown(struct sctp_outq *q)
258 266
259 /* Throw away any leftover data chunks. */ 267 /* Throw away any leftover data chunks. */
260 while ((chunk = sctp_outq_dequeue_data(q)) != NULL) { 268 while ((chunk = sctp_outq_dequeue_data(q)) != NULL) {
269 sctp_sched_dequeue_done(q, chunk);
261 270
262 /* Mark as send failure. */ 271 /* Mark as send failure. */
263 sctp_chunk_fail(chunk, q->error); 272 sctp_chunk_fail(chunk, q->error);
@@ -366,7 +375,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
366 streamout = &asoc->stream.out[chk->sinfo.sinfo_stream]; 375 streamout = &asoc->stream.out[chk->sinfo.sinfo_stream];
367 asoc->sent_cnt_removable--; 376 asoc->sent_cnt_removable--;
368 asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; 377 asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
369 streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; 378 streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
370 379
371 if (!chk->tsn_gap_acked) { 380 if (!chk->tsn_gap_acked) {
372 if (chk->transport) 381 if (chk->transport)
@@ -391,20 +400,21 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
391 struct sctp_outq *q = &asoc->outqueue; 400 struct sctp_outq *q = &asoc->outqueue;
392 struct sctp_chunk *chk, *temp; 401 struct sctp_chunk *chk, *temp;
393 402
403 q->sched->unsched_all(&asoc->stream);
404
394 list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) { 405 list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) {
395 if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || 406 if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
396 chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) 407 chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
397 continue; 408 continue;
398 409
399 list_del_init(&chk->list); 410 sctp_sched_dequeue_common(q, chk);
400 q->out_qlen -= chk->skb->len;
401 asoc->sent_cnt_removable--; 411 asoc->sent_cnt_removable--;
402 asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; 412 asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
403 if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) { 413 if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) {
404 struct sctp_stream_out *streamout = 414 struct sctp_stream_out *streamout =
405 &asoc->stream.out[chk->sinfo.sinfo_stream]; 415 &asoc->stream.out[chk->sinfo.sinfo_stream];
406 416
407 streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; 417 streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
408 } 418 }
409 419
410 msg_len -= SCTP_DATA_SNDSIZE(chk) + 420 msg_len -= SCTP_DATA_SNDSIZE(chk) +
@@ -415,6 +425,8 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
415 break; 425 break;
416 } 426 }
417 427
428 q->sched->sched_all(&asoc->stream);
429
418 return msg_len; 430 return msg_len;
419} 431}
420 432
@@ -1033,22 +1045,9 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
1033 while ((chunk = sctp_outq_dequeue_data(q)) != NULL) { 1045 while ((chunk = sctp_outq_dequeue_data(q)) != NULL) {
1034 __u32 sid = ntohs(chunk->subh.data_hdr->stream); 1046 __u32 sid = ntohs(chunk->subh.data_hdr->stream);
1035 1047
1036 /* RFC 2960 6.5 Every DATA chunk MUST carry a valid
1037 * stream identifier.
1038 */
1039 if (chunk->sinfo.sinfo_stream >= asoc->stream.outcnt) {
1040
1041 /* Mark as failed send. */
1042 sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
1043 if (asoc->peer.prsctp_capable &&
1044 SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
1045 asoc->sent_cnt_removable--;
1046 sctp_chunk_free(chunk);
1047 continue;
1048 }
1049
1050 /* Has this chunk expired? */ 1048 /* Has this chunk expired? */
1051 if (sctp_chunk_abandoned(chunk)) { 1049 if (sctp_chunk_abandoned(chunk)) {
1050 sctp_sched_dequeue_done(q, chunk);
1052 sctp_chunk_fail(chunk, 0); 1051 sctp_chunk_fail(chunk, 0);
1053 sctp_chunk_free(chunk); 1052 sctp_chunk_free(chunk);
1054 continue; 1053 continue;
@@ -1070,6 +1069,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
1070 new_transport = asoc->peer.active_path; 1069 new_transport = asoc->peer.active_path;
1071 if (new_transport->state == SCTP_UNCONFIRMED) { 1070 if (new_transport->state == SCTP_UNCONFIRMED) {
1072 WARN_ONCE(1, "Attempt to send packet on unconfirmed path."); 1071 WARN_ONCE(1, "Attempt to send packet on unconfirmed path.");
1072 sctp_sched_dequeue_done(q, chunk);
1073 sctp_chunk_fail(chunk, 0); 1073 sctp_chunk_fail(chunk, 0);
1074 sctp_chunk_free(chunk); 1074 sctp_chunk_free(chunk);
1075 continue; 1075 continue;
@@ -1133,6 +1133,11 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
1133 else 1133 else
1134 asoc->stats.oodchunks++; 1134 asoc->stats.oodchunks++;
1135 1135
1136 /* Only now it's safe to consider this
1137 * chunk as sent, sched-wise.
1138 */
1139 sctp_sched_dequeue_done(q, chunk);
1140
1136 break; 1141 break;
1137 1142
1138 default: 1143 default:
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index fcd80feb293f..f5172c21349b 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -622,9 +622,9 @@ static void sctp_v4_ecn_capable(struct sock *sk)
622 INET_ECN_xmit(sk); 622 INET_ECN_xmit(sk);
623} 623}
624 624
625static void sctp_addr_wq_timeout_handler(unsigned long arg) 625static void sctp_addr_wq_timeout_handler(struct timer_list *t)
626{ 626{
627 struct net *net = (struct net *)arg; 627 struct net *net = from_timer(net, t, sctp.addr_wq_timer);
628 struct sctp_sockaddr_entry *addrw, *temp; 628 struct sctp_sockaddr_entry *addrw, *temp;
629 struct sctp_sock *sp; 629 struct sctp_sock *sp;
630 630
@@ -1304,8 +1304,7 @@ static int __net_init sctp_defaults_init(struct net *net)
1304 INIT_LIST_HEAD(&net->sctp.auto_asconf_splist); 1304 INIT_LIST_HEAD(&net->sctp.auto_asconf_splist);
1305 spin_lock_init(&net->sctp.addr_wq_lock); 1305 spin_lock_init(&net->sctp.addr_wq_lock);
1306 net->sctp.addr_wq_timer.expires = 0; 1306 net->sctp.addr_wq_timer.expires = 0;
1307 setup_timer(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler, 1307 timer_setup(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler, 0);
1308 (unsigned long)net);
1309 1308
1310 return 0; 1309 return 0;
1311 1310
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index 22ed01a76b19..a72a7d925d46 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -463,6 +463,7 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
463 .r = r, 463 .r = r,
464 .net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN), 464 .net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN),
465 }; 465 };
466 int pos = cb->args[2];
466 467
467 /* eps hashtable dumps 468 /* eps hashtable dumps
468 * args: 469 * args:
@@ -493,7 +494,8 @@ skip:
493 goto done; 494 goto done;
494 495
495 sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump, 496 sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump,
496 net, (int *)&cb->args[2], &commp); 497 net, &pos, &commp);
498 cb->args[2] = pos;
497 499
498done: 500done:
499 cb->args[1] = cb->args[4]; 501 cb->args[1] = cb->args[4];
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index ca8f196b6c6c..9bf575f2e8ed 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2854,7 +2854,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
2854 addr_param_len = af->to_addr_param(addr, &addr_param); 2854 addr_param_len = af->to_addr_param(addr, &addr_param);
2855 param.param_hdr.type = flags; 2855 param.param_hdr.type = flags;
2856 param.param_hdr.length = htons(paramlen + addr_param_len); 2856 param.param_hdr.length = htons(paramlen + addr_param_len);
2857 param.crr_id = i; 2857 param.crr_id = htonl(i);
2858 2858
2859 sctp_addto_chunk(retval, paramlen, &param); 2859 sctp_addto_chunk(retval, paramlen, &param);
2860 sctp_addto_chunk(retval, addr_param_len, &addr_param); 2860 sctp_addto_chunk(retval, addr_param_len, &addr_param);
@@ -2867,7 +2867,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
2867 addr_param_len = af->to_addr_param(addr, &addr_param); 2867 addr_param_len = af->to_addr_param(addr, &addr_param);
2868 param.param_hdr.type = SCTP_PARAM_DEL_IP; 2868 param.param_hdr.type = SCTP_PARAM_DEL_IP;
2869 param.param_hdr.length = htons(paramlen + addr_param_len); 2869 param.param_hdr.length = htons(paramlen + addr_param_len);
2870 param.crr_id = i; 2870 param.crr_id = htonl(i);
2871 2871
2872 sctp_addto_chunk(retval, paramlen, &param); 2872 sctp_addto_chunk(retval, paramlen, &param);
2873 sctp_addto_chunk(retval, addr_param_len, &addr_param); 2873 sctp_addto_chunk(retval, addr_param_len, &addr_param);
@@ -3591,11 +3591,11 @@ static struct sctp_chunk *sctp_make_reconf(const struct sctp_association *asoc,
3591 */ 3591 */
3592struct sctp_chunk *sctp_make_strreset_req( 3592struct sctp_chunk *sctp_make_strreset_req(
3593 const struct sctp_association *asoc, 3593 const struct sctp_association *asoc,
3594 __u16 stream_num, __u16 *stream_list, 3594 __u16 stream_num, __be16 *stream_list,
3595 bool out, bool in) 3595 bool out, bool in)
3596{ 3596{
3597 __u16 stream_len = stream_num * sizeof(__u16);
3597 struct sctp_strreset_outreq outreq; 3598 struct sctp_strreset_outreq outreq;
3598 __u16 stream_len = stream_num * 2;
3599 struct sctp_strreset_inreq inreq; 3599 struct sctp_strreset_inreq inreq;
3600 struct sctp_chunk *retval; 3600 struct sctp_chunk *retval;
3601 __u16 outlen, inlen; 3601 __u16 outlen, inlen;
@@ -3788,7 +3788,8 @@ bool sctp_verify_reconf(const struct sctp_association *asoc,
3788{ 3788{
3789 struct sctp_reconf_chunk *hdr; 3789 struct sctp_reconf_chunk *hdr;
3790 union sctp_params param; 3790 union sctp_params param;
3791 __u16 last = 0, cnt = 0; 3791 __be16 last = 0;
3792 __u16 cnt = 0;
3792 3793
3793 hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr; 3794 hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr;
3794 sctp_walk_params(param, hdr, params) { 3795 sctp_walk_params(param, hdr, params) {
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index e6a2974e020e..df94d77401e7 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -50,6 +50,7 @@
50#include <net/sock.h> 50#include <net/sock.h>
51#include <net/sctp/sctp.h> 51#include <net/sctp/sctp.h>
52#include <net/sctp/sm.h> 52#include <net/sctp/sm.h>
53#include <net/sctp/stream_sched.h>
53 54
54static int sctp_cmd_interpreter(enum sctp_event event_type, 55static int sctp_cmd_interpreter(enum sctp_event event_type,
55 union sctp_subtype subtype, 56 union sctp_subtype subtype,
@@ -242,9 +243,10 @@ nomem:
242/* When the T3-RTX timer expires, it calls this function to create the 243/* When the T3-RTX timer expires, it calls this function to create the
243 * relevant state machine event. 244 * relevant state machine event.
244 */ 245 */
245void sctp_generate_t3_rtx_event(unsigned long peer) 246void sctp_generate_t3_rtx_event(struct timer_list *t)
246{ 247{
247 struct sctp_transport *transport = (struct sctp_transport *) peer; 248 struct sctp_transport *transport =
249 from_timer(transport, t, T3_rtx_timer);
248 struct sctp_association *asoc = transport->asoc; 250 struct sctp_association *asoc = transport->asoc;
249 struct sock *sk = asoc->base.sk; 251 struct sock *sk = asoc->base.sk;
250 struct net *net = sock_net(sk); 252 struct net *net = sock_net(sk);
@@ -318,50 +320,63 @@ out_unlock:
318 sctp_association_put(asoc); 320 sctp_association_put(asoc);
319} 321}
320 322
321static void sctp_generate_t1_cookie_event(unsigned long data) 323static void sctp_generate_t1_cookie_event(struct timer_list *t)
322{ 324{
323 struct sctp_association *asoc = (struct sctp_association *) data; 325 struct sctp_association *asoc =
326 from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T1_COOKIE]);
327
324 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_COOKIE); 328 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_COOKIE);
325} 329}
326 330
327static void sctp_generate_t1_init_event(unsigned long data) 331static void sctp_generate_t1_init_event(struct timer_list *t)
328{ 332{
329 struct sctp_association *asoc = (struct sctp_association *) data; 333 struct sctp_association *asoc =
334 from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T1_INIT]);
335
330 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_INIT); 336 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_INIT);
331} 337}
332 338
333static void sctp_generate_t2_shutdown_event(unsigned long data) 339static void sctp_generate_t2_shutdown_event(struct timer_list *t)
334{ 340{
335 struct sctp_association *asoc = (struct sctp_association *) data; 341 struct sctp_association *asoc =
342 from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN]);
343
336 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T2_SHUTDOWN); 344 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T2_SHUTDOWN);
337} 345}
338 346
339static void sctp_generate_t4_rto_event(unsigned long data) 347static void sctp_generate_t4_rto_event(struct timer_list *t)
340{ 348{
341 struct sctp_association *asoc = (struct sctp_association *) data; 349 struct sctp_association *asoc =
350 from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T4_RTO]);
351
342 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T4_RTO); 352 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T4_RTO);
343} 353}
344 354
345static void sctp_generate_t5_shutdown_guard_event(unsigned long data) 355static void sctp_generate_t5_shutdown_guard_event(struct timer_list *t)
346{ 356{
347 struct sctp_association *asoc = (struct sctp_association *)data; 357 struct sctp_association *asoc =
358 from_timer(asoc, t,
359 timers[SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD]);
360
348 sctp_generate_timeout_event(asoc, 361 sctp_generate_timeout_event(asoc,
349 SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD); 362 SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD);
350 363
351} /* sctp_generate_t5_shutdown_guard_event() */ 364} /* sctp_generate_t5_shutdown_guard_event() */
352 365
353static void sctp_generate_autoclose_event(unsigned long data) 366static void sctp_generate_autoclose_event(struct timer_list *t)
354{ 367{
355 struct sctp_association *asoc = (struct sctp_association *) data; 368 struct sctp_association *asoc =
369 from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]);
370
356 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_AUTOCLOSE); 371 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_AUTOCLOSE);
357} 372}
358 373
359/* Generate a heart beat event. If the sock is busy, reschedule. Make 374/* Generate a heart beat event. If the sock is busy, reschedule. Make
360 * sure that the transport is still valid. 375 * sure that the transport is still valid.
361 */ 376 */
362void sctp_generate_heartbeat_event(unsigned long data) 377void sctp_generate_heartbeat_event(struct timer_list *t)
363{ 378{
364 struct sctp_transport *transport = (struct sctp_transport *) data; 379 struct sctp_transport *transport = from_timer(transport, t, hb_timer);
365 struct sctp_association *asoc = transport->asoc; 380 struct sctp_association *asoc = transport->asoc;
366 struct sock *sk = asoc->base.sk; 381 struct sock *sk = asoc->base.sk;
367 struct net *net = sock_net(sk); 382 struct net *net = sock_net(sk);
@@ -404,9 +419,10 @@ out_unlock:
404/* Handle the timeout of the ICMP protocol unreachable timer. Trigger 419/* Handle the timeout of the ICMP protocol unreachable timer. Trigger
405 * the correct state machine transition that will close the association. 420 * the correct state machine transition that will close the association.
406 */ 421 */
407void sctp_generate_proto_unreach_event(unsigned long data) 422void sctp_generate_proto_unreach_event(struct timer_list *t)
408{ 423{
409 struct sctp_transport *transport = (struct sctp_transport *)data; 424 struct sctp_transport *transport =
425 from_timer(transport, t, proto_unreach_timer);
410 struct sctp_association *asoc = transport->asoc; 426 struct sctp_association *asoc = transport->asoc;
411 struct sock *sk = asoc->base.sk; 427 struct sock *sk = asoc->base.sk;
412 struct net *net = sock_net(sk); 428 struct net *net = sock_net(sk);
@@ -438,9 +454,10 @@ out_unlock:
438} 454}
439 455
440 /* Handle the timeout of the RE-CONFIG timer. */ 456 /* Handle the timeout of the RE-CONFIG timer. */
441void sctp_generate_reconf_event(unsigned long data) 457void sctp_generate_reconf_event(struct timer_list *t)
442{ 458{
443 struct sctp_transport *transport = (struct sctp_transport *)data; 459 struct sctp_transport *transport =
460 from_timer(transport, t, reconf_timer);
444 struct sctp_association *asoc = transport->asoc; 461 struct sctp_association *asoc = transport->asoc;
445 struct sock *sk = asoc->base.sk; 462 struct sock *sk = asoc->base.sk;
446 struct net *net = sock_net(sk); 463 struct net *net = sock_net(sk);
@@ -470,24 +487,27 @@ out_unlock:
470} 487}
471 488
472/* Inject a SACK Timeout event into the state machine. */ 489/* Inject a SACK Timeout event into the state machine. */
473static void sctp_generate_sack_event(unsigned long data) 490static void sctp_generate_sack_event(struct timer_list *t)
474{ 491{
475 struct sctp_association *asoc = (struct sctp_association *)data; 492 struct sctp_association *asoc =
493 from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_SACK]);
494
476 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_SACK); 495 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_SACK);
477} 496}
478 497
479sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = { 498sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = {
480 NULL, 499 [SCTP_EVENT_TIMEOUT_NONE] = NULL,
481 sctp_generate_t1_cookie_event, 500 [SCTP_EVENT_TIMEOUT_T1_COOKIE] = sctp_generate_t1_cookie_event,
482 sctp_generate_t1_init_event, 501 [SCTP_EVENT_TIMEOUT_T1_INIT] = sctp_generate_t1_init_event,
483 sctp_generate_t2_shutdown_event, 502 [SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = sctp_generate_t2_shutdown_event,
484 NULL, 503 [SCTP_EVENT_TIMEOUT_T3_RTX] = NULL,
485 sctp_generate_t4_rto_event, 504 [SCTP_EVENT_TIMEOUT_T4_RTO] = sctp_generate_t4_rto_event,
486 sctp_generate_t5_shutdown_guard_event, 505 [SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD] =
487 NULL, 506 sctp_generate_t5_shutdown_guard_event,
488 NULL, 507 [SCTP_EVENT_TIMEOUT_HEARTBEAT] = NULL,
489 sctp_generate_sack_event, 508 [SCTP_EVENT_TIMEOUT_RECONF] = NULL,
490 sctp_generate_autoclose_event, 509 [SCTP_EVENT_TIMEOUT_SACK] = sctp_generate_sack_event,
510 [SCTP_EVENT_TIMEOUT_AUTOCLOSE] = sctp_generate_autoclose_event,
491}; 511};
492 512
493 513
@@ -1089,6 +1109,8 @@ static void sctp_cmd_send_msg(struct sctp_association *asoc,
1089 1109
1090 list_for_each_entry(chunk, &msg->chunks, frag_list) 1110 list_for_each_entry(chunk, &msg->chunks, frag_list)
1091 sctp_outq_tail(&asoc->outqueue, chunk, gfp); 1111 sctp_outq_tail(&asoc->outqueue, chunk, gfp);
1112
1113 asoc->outqueue.sched->enqueue(&asoc->outqueue, msg);
1092} 1114}
1093 1115
1094 1116
@@ -1607,12 +1629,12 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
1607 break; 1629 break;
1608 1630
1609 case SCTP_CMD_INIT_FAILED: 1631 case SCTP_CMD_INIT_FAILED:
1610 sctp_cmd_init_failed(commands, asoc, cmd->obj.err); 1632 sctp_cmd_init_failed(commands, asoc, cmd->obj.u32);
1611 break; 1633 break;
1612 1634
1613 case SCTP_CMD_ASSOC_FAILED: 1635 case SCTP_CMD_ASSOC_FAILED:
1614 sctp_cmd_assoc_failed(commands, asoc, event_type, 1636 sctp_cmd_assoc_failed(commands, asoc, event_type,
1615 subtype, chunk, cmd->obj.err); 1637 subtype, chunk, cmd->obj.u32);
1616 break; 1638 break;
1617 1639
1618 case SCTP_CMD_INIT_COUNTER_INC: 1640 case SCTP_CMD_INIT_COUNTER_INC:
@@ -1680,8 +1702,8 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
1680 case SCTP_CMD_PROCESS_CTSN: 1702 case SCTP_CMD_PROCESS_CTSN:
1681 /* Dummy up a SACK for processing. */ 1703 /* Dummy up a SACK for processing. */
1682 sackh.cum_tsn_ack = cmd->obj.be32; 1704 sackh.cum_tsn_ack = cmd->obj.be32;
1683 sackh.a_rwnd = asoc->peer.rwnd + 1705 sackh.a_rwnd = htonl(asoc->peer.rwnd +
1684 asoc->outqueue.outstanding_bytes; 1706 asoc->outqueue.outstanding_bytes);
1685 sackh.num_gap_ack_blocks = 0; 1707 sackh.num_gap_ack_blocks = 0;
1686 sackh.num_dup_tsns = 0; 1708 sackh.num_dup_tsns = 0;
1687 chunk->subh.sack_hdr = &sackh; 1709 chunk->subh.sack_hdr = &sackh;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d4730ada7f32..3204a9b29407 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -79,12 +79,13 @@
79#include <net/sock.h> 79#include <net/sock.h>
80#include <net/sctp/sctp.h> 80#include <net/sctp/sctp.h>
81#include <net/sctp/sm.h> 81#include <net/sctp/sm.h>
82#include <net/sctp/stream_sched.h>
82 83
83/* Forward declarations for internal helper functions. */ 84/* Forward declarations for internal helper functions. */
84static int sctp_writeable(struct sock *sk); 85static int sctp_writeable(struct sock *sk);
85static void sctp_wfree(struct sk_buff *skb); 86static void sctp_wfree(struct sk_buff *skb);
86static int sctp_wait_for_sndbuf(struct sctp_association *, long *timeo_p, 87static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
87 size_t msg_len); 88 size_t msg_len, struct sock **orig_sk);
88static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); 89static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
89static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); 90static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
90static int sctp_wait_for_accept(struct sock *sk, long timeo); 91static int sctp_wait_for_accept(struct sock *sk, long timeo);
@@ -170,6 +171,36 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
170 sk_mem_charge(sk, chunk->skb->truesize); 171 sk_mem_charge(sk, chunk->skb->truesize);
171} 172}
172 173
174static void sctp_clear_owner_w(struct sctp_chunk *chunk)
175{
176 skb_orphan(chunk->skb);
177}
178
179static void sctp_for_each_tx_datachunk(struct sctp_association *asoc,
180 void (*cb)(struct sctp_chunk *))
181
182{
183 struct sctp_outq *q = &asoc->outqueue;
184 struct sctp_transport *t;
185 struct sctp_chunk *chunk;
186
187 list_for_each_entry(t, &asoc->peer.transport_addr_list, transports)
188 list_for_each_entry(chunk, &t->transmitted, transmitted_list)
189 cb(chunk);
190
191 list_for_each_entry(chunk, &q->retransmit, list)
192 cb(chunk);
193
194 list_for_each_entry(chunk, &q->sacked, list)
195 cb(chunk);
196
197 list_for_each_entry(chunk, &q->abandoned, list)
198 cb(chunk);
199
200 list_for_each_entry(chunk, &q->out_chunk_list, list)
201 cb(chunk);
202}
203
173/* Verify that this is a valid address. */ 204/* Verify that this is a valid address. */
174static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr, 205static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr,
175 int len) 206 int len)
@@ -1927,14 +1958,28 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
1927 goto out_free; 1958 goto out_free;
1928 } 1959 }
1929 1960
1961 /* Allocate sctp_stream_out_ext if not already done */
1962 if (unlikely(!asoc->stream.out[sinfo->sinfo_stream].ext)) {
1963 err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream);
1964 if (err)
1965 goto out_free;
1966 }
1967
1930 if (sctp_wspace(asoc) < msg_len) 1968 if (sctp_wspace(asoc) < msg_len)
1931 sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc)); 1969 sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
1932 1970
1933 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1971 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1934 if (!sctp_wspace(asoc)) { 1972 if (!sctp_wspace(asoc)) {
1935 err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); 1973 /* sk can be changed by peel off when waiting for buf. */
1936 if (err) 1974 err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk);
1975 if (err) {
1976 if (err == -ESRCH) {
1977 /* asoc is already dead. */
1978 new_asoc = NULL;
1979 err = -EPIPE;
1980 }
1937 goto out_free; 1981 goto out_free;
1982 }
1938 } 1983 }
1939 1984
1940 /* If an address is passed with the sendto/sendmsg call, it is used 1985 /* If an address is passed with the sendto/sendmsg call, it is used
@@ -3095,9 +3140,9 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsign
3095 */ 3140 */
3096static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen) 3141static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen)
3097{ 3142{
3143 struct sctp_sock *sp = sctp_sk(sk);
3098 struct sctp_assoc_value params; 3144 struct sctp_assoc_value params;
3099 struct sctp_association *asoc; 3145 struct sctp_association *asoc;
3100 struct sctp_sock *sp = sctp_sk(sk);
3101 int val; 3146 int val;
3102 3147
3103 if (optlen == sizeof(int)) { 3148 if (optlen == sizeof(int)) {
@@ -3113,26 +3158,35 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
3113 if (copy_from_user(&params, optval, optlen)) 3158 if (copy_from_user(&params, optval, optlen))
3114 return -EFAULT; 3159 return -EFAULT;
3115 val = params.assoc_value; 3160 val = params.assoc_value;
3116 } else 3161 } else {
3117 return -EINVAL; 3162 return -EINVAL;
3163 }
3118 3164
3119 if ((val != 0) && ((val < 8) || (val > SCTP_MAX_CHUNK_LEN))) 3165 if (val) {
3120 return -EINVAL; 3166 int min_len, max_len;
3121 3167
3122 asoc = sctp_id2assoc(sk, params.assoc_id); 3168 min_len = SCTP_DEFAULT_MINSEGMENT - sp->pf->af->net_header_len;
3123 if (!asoc && params.assoc_id && sctp_style(sk, UDP)) 3169 min_len -= sizeof(struct sctphdr) +
3124 return -EINVAL; 3170 sizeof(struct sctp_data_chunk);
3171
3172 max_len = SCTP_MAX_CHUNK_LEN - sizeof(struct sctp_data_chunk);
3173
3174 if (val < min_len || val > max_len)
3175 return -EINVAL;
3176 }
3125 3177
3178 asoc = sctp_id2assoc(sk, params.assoc_id);
3126 if (asoc) { 3179 if (asoc) {
3127 if (val == 0) { 3180 if (val == 0) {
3128 val = asoc->pathmtu; 3181 val = asoc->pathmtu - sp->pf->af->net_header_len;
3129 val -= sp->pf->af->net_header_len;
3130 val -= sizeof(struct sctphdr) + 3182 val -= sizeof(struct sctphdr) +
3131 sizeof(struct sctp_data_chunk); 3183 sizeof(struct sctp_data_chunk);
3132 } 3184 }
3133 asoc->user_frag = val; 3185 asoc->user_frag = val;
3134 asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu); 3186 asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu);
3135 } else { 3187 } else {
3188 if (params.assoc_id && sctp_style(sk, UDP))
3189 return -EINVAL;
3136 sp->user_frag = val; 3190 sp->user_frag = val;
3137 } 3191 }
3138 3192
@@ -3907,6 +3961,64 @@ out:
3907 return retval; 3961 return retval;
3908} 3962}
3909 3963
3964static int sctp_setsockopt_scheduler(struct sock *sk,
3965 char __user *optval,
3966 unsigned int optlen)
3967{
3968 struct sctp_association *asoc;
3969 struct sctp_assoc_value params;
3970 int retval = -EINVAL;
3971
3972 if (optlen < sizeof(params))
3973 goto out;
3974
3975 optlen = sizeof(params);
3976 if (copy_from_user(&params, optval, optlen)) {
3977 retval = -EFAULT;
3978 goto out;
3979 }
3980
3981 if (params.assoc_value > SCTP_SS_MAX)
3982 goto out;
3983
3984 asoc = sctp_id2assoc(sk, params.assoc_id);
3985 if (!asoc)
3986 goto out;
3987
3988 retval = sctp_sched_set_sched(asoc, params.assoc_value);
3989
3990out:
3991 return retval;
3992}
3993
3994static int sctp_setsockopt_scheduler_value(struct sock *sk,
3995 char __user *optval,
3996 unsigned int optlen)
3997{
3998 struct sctp_association *asoc;
3999 struct sctp_stream_value params;
4000 int retval = -EINVAL;
4001
4002 if (optlen < sizeof(params))
4003 goto out;
4004
4005 optlen = sizeof(params);
4006 if (copy_from_user(&params, optval, optlen)) {
4007 retval = -EFAULT;
4008 goto out;
4009 }
4010
4011 asoc = sctp_id2assoc(sk, params.assoc_id);
4012 if (!asoc)
4013 goto out;
4014
4015 retval = sctp_sched_set_value(asoc, params.stream_id,
4016 params.stream_value, GFP_KERNEL);
4017
4018out:
4019 return retval;
4020}
4021
3910/* API 6.2 setsockopt(), getsockopt() 4022/* API 6.2 setsockopt(), getsockopt()
3911 * 4023 *
3912 * Applications use setsockopt() and getsockopt() to set or retrieve 4024 * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4088,6 +4200,12 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
4088 case SCTP_ADD_STREAMS: 4200 case SCTP_ADD_STREAMS:
4089 retval = sctp_setsockopt_add_streams(sk, optval, optlen); 4201 retval = sctp_setsockopt_add_streams(sk, optval, optlen);
4090 break; 4202 break;
4203 case SCTP_STREAM_SCHEDULER:
4204 retval = sctp_setsockopt_scheduler(sk, optval, optlen);
4205 break;
4206 case SCTP_STREAM_SCHEDULER_VALUE:
4207 retval = sctp_setsockopt_scheduler_value(sk, optval, optlen);
4208 break;
4091 default: 4209 default:
4092 retval = -ENOPROTOOPT; 4210 retval = -ENOPROTOOPT;
4093 break; 4211 break;
@@ -4906,14 +5024,12 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
4906 struct socket *sock; 5024 struct socket *sock;
4907 int err = 0; 5025 int err = 0;
4908 5026
4909 if (!asoc) 5027 /* Do not peel off from one netns to another one. */
5028 if (!net_eq(current->nsproxy->net_ns, sock_net(sk)))
4910 return -EINVAL; 5029 return -EINVAL;
4911 5030
4912 /* If there is a thread waiting on more sndbuf space for 5031 if (!asoc)
4913 * sending on this asoc, it cannot be peeled. 5032 return -EINVAL;
4914 */
4915 if (waitqueue_active(&asoc->wait))
4916 return -EBUSY;
4917 5033
4918 /* An association cannot be branched off from an already peeled-off 5034 /* An association cannot be branched off from an already peeled-off
4919 * socket, nor is this supported for tcp style sockets. 5035 * socket, nor is this supported for tcp style sockets.
@@ -6645,7 +6761,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
6645 char __user *optval, 6761 char __user *optval,
6646 int __user *optlen) 6762 int __user *optlen)
6647{ 6763{
6648 struct sctp_stream_out *streamout; 6764 struct sctp_stream_out_ext *streamoute;
6649 struct sctp_association *asoc; 6765 struct sctp_association *asoc;
6650 struct sctp_prstatus params; 6766 struct sctp_prstatus params;
6651 int retval = -EINVAL; 6767 int retval = -EINVAL;
@@ -6668,21 +6784,29 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
6668 if (!asoc || params.sprstat_sid >= asoc->stream.outcnt) 6784 if (!asoc || params.sprstat_sid >= asoc->stream.outcnt)
6669 goto out; 6785 goto out;
6670 6786
6671 streamout = &asoc->stream.out[params.sprstat_sid]; 6787 streamoute = asoc->stream.out[params.sprstat_sid].ext;
6788 if (!streamoute) {
6789 /* Not allocated yet, means all stats are 0 */
6790 params.sprstat_abandoned_unsent = 0;
6791 params.sprstat_abandoned_sent = 0;
6792 retval = 0;
6793 goto out;
6794 }
6795
6672 if (policy == SCTP_PR_SCTP_NONE) { 6796 if (policy == SCTP_PR_SCTP_NONE) {
6673 params.sprstat_abandoned_unsent = 0; 6797 params.sprstat_abandoned_unsent = 0;
6674 params.sprstat_abandoned_sent = 0; 6798 params.sprstat_abandoned_sent = 0;
6675 for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) { 6799 for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
6676 params.sprstat_abandoned_unsent += 6800 params.sprstat_abandoned_unsent +=
6677 streamout->abandoned_unsent[policy]; 6801 streamoute->abandoned_unsent[policy];
6678 params.sprstat_abandoned_sent += 6802 params.sprstat_abandoned_sent +=
6679 streamout->abandoned_sent[policy]; 6803 streamoute->abandoned_sent[policy];
6680 } 6804 }
6681 } else { 6805 } else {
6682 params.sprstat_abandoned_unsent = 6806 params.sprstat_abandoned_unsent =
6683 streamout->abandoned_unsent[__SCTP_PR_INDEX(policy)]; 6807 streamoute->abandoned_unsent[__SCTP_PR_INDEX(policy)];
6684 params.sprstat_abandoned_sent = 6808 params.sprstat_abandoned_sent =
6685 streamout->abandoned_sent[__SCTP_PR_INDEX(policy)]; 6809 streamoute->abandoned_sent[__SCTP_PR_INDEX(policy)];
6686 } 6810 }
6687 6811
6688 if (put_user(len, optlen) || copy_to_user(optval, &params, len)) { 6812 if (put_user(len, optlen) || copy_to_user(optval, &params, len)) {
@@ -6778,6 +6902,85 @@ out:
6778 return retval; 6902 return retval;
6779} 6903}
6780 6904
6905static int sctp_getsockopt_scheduler(struct sock *sk, int len,
6906 char __user *optval,
6907 int __user *optlen)
6908{
6909 struct sctp_assoc_value params;
6910 struct sctp_association *asoc;
6911 int retval = -EFAULT;
6912
6913 if (len < sizeof(params)) {
6914 retval = -EINVAL;
6915 goto out;
6916 }
6917
6918 len = sizeof(params);
6919 if (copy_from_user(&params, optval, len))
6920 goto out;
6921
6922 asoc = sctp_id2assoc(sk, params.assoc_id);
6923 if (!asoc) {
6924 retval = -EINVAL;
6925 goto out;
6926 }
6927
6928 params.assoc_value = sctp_sched_get_sched(asoc);
6929
6930 if (put_user(len, optlen))
6931 goto out;
6932
6933 if (copy_to_user(optval, &params, len))
6934 goto out;
6935
6936 retval = 0;
6937
6938out:
6939 return retval;
6940}
6941
6942static int sctp_getsockopt_scheduler_value(struct sock *sk, int len,
6943 char __user *optval,
6944 int __user *optlen)
6945{
6946 struct sctp_stream_value params;
6947 struct sctp_association *asoc;
6948 int retval = -EFAULT;
6949
6950 if (len < sizeof(params)) {
6951 retval = -EINVAL;
6952 goto out;
6953 }
6954
6955 len = sizeof(params);
6956 if (copy_from_user(&params, optval, len))
6957 goto out;
6958
6959 asoc = sctp_id2assoc(sk, params.assoc_id);
6960 if (!asoc) {
6961 retval = -EINVAL;
6962 goto out;
6963 }
6964
6965 retval = sctp_sched_get_value(asoc, params.stream_id,
6966 &params.stream_value);
6967 if (retval)
6968 goto out;
6969
6970 if (put_user(len, optlen)) {
6971 retval = -EFAULT;
6972 goto out;
6973 }
6974
6975 if (copy_to_user(optval, &params, len)) {
6976 retval = -EFAULT;
6977 goto out;
6978 }
6979
6980out:
6981 return retval;
6982}
6983
6781static int sctp_getsockopt(struct sock *sk, int level, int optname, 6984static int sctp_getsockopt(struct sock *sk, int level, int optname,
6782 char __user *optval, int __user *optlen) 6985 char __user *optval, int __user *optlen)
6783{ 6986{
@@ -6960,6 +7163,14 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
6960 retval = sctp_getsockopt_enable_strreset(sk, len, optval, 7163 retval = sctp_getsockopt_enable_strreset(sk, len, optval,
6961 optlen); 7164 optlen);
6962 break; 7165 break;
7166 case SCTP_STREAM_SCHEDULER:
7167 retval = sctp_getsockopt_scheduler(sk, len, optval,
7168 optlen);
7169 break;
7170 case SCTP_STREAM_SCHEDULER_VALUE:
7171 retval = sctp_getsockopt_scheduler_value(sk, len, optval,
7172 optlen);
7173 break;
6963 default: 7174 default:
6964 retval = -ENOPROTOOPT; 7175 retval = -ENOPROTOOPT;
6965 break; 7176 break;
@@ -7788,7 +7999,7 @@ void sctp_sock_rfree(struct sk_buff *skb)
7788 7999
7789/* Helper function to wait for space in the sndbuf. */ 8000/* Helper function to wait for space in the sndbuf. */
7790static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, 8001static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
7791 size_t msg_len) 8002 size_t msg_len, struct sock **orig_sk)
7792{ 8003{
7793 struct sock *sk = asoc->base.sk; 8004 struct sock *sk = asoc->base.sk;
7794 int err = 0; 8005 int err = 0;
@@ -7805,10 +8016,11 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
7805 for (;;) { 8016 for (;;) {
7806 prepare_to_wait_exclusive(&asoc->wait, &wait, 8017 prepare_to_wait_exclusive(&asoc->wait, &wait,
7807 TASK_INTERRUPTIBLE); 8018 TASK_INTERRUPTIBLE);
8019 if (asoc->base.dead)
8020 goto do_dead;
7808 if (!*timeo_p) 8021 if (!*timeo_p)
7809 goto do_nonblock; 8022 goto do_nonblock;
7810 if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING || 8023 if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING)
7811 asoc->base.dead)
7812 goto do_error; 8024 goto do_error;
7813 if (signal_pending(current)) 8025 if (signal_pending(current))
7814 goto do_interrupted; 8026 goto do_interrupted;
@@ -7821,11 +8033,17 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
7821 release_sock(sk); 8033 release_sock(sk);
7822 current_timeo = schedule_timeout(current_timeo); 8034 current_timeo = schedule_timeout(current_timeo);
7823 lock_sock(sk); 8035 lock_sock(sk);
8036 if (sk != asoc->base.sk) {
8037 release_sock(sk);
8038 sk = asoc->base.sk;
8039 lock_sock(sk);
8040 }
7824 8041
7825 *timeo_p = current_timeo; 8042 *timeo_p = current_timeo;
7826 } 8043 }
7827 8044
7828out: 8045out:
8046 *orig_sk = sk;
7829 finish_wait(&asoc->wait, &wait); 8047 finish_wait(&asoc->wait, &wait);
7830 8048
7831 /* Release the association's refcnt. */ 8049 /* Release the association's refcnt. */
@@ -7833,6 +8051,10 @@ out:
7833 8051
7834 return err; 8052 return err;
7835 8053
8054do_dead:
8055 err = -ESRCH;
8056 goto out;
8057
7836do_error: 8058do_error:
7837 err = -EPIPE; 8059 err = -EPIPE;
7838 goto out; 8060 goto out;
@@ -8208,7 +8430,9 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
8208 * paths won't try to lock it and then oldsk. 8430 * paths won't try to lock it and then oldsk.
8209 */ 8431 */
8210 lock_sock_nested(newsk, SINGLE_DEPTH_NESTING); 8432 lock_sock_nested(newsk, SINGLE_DEPTH_NESTING);
8433 sctp_for_each_tx_datachunk(assoc, sctp_clear_owner_w);
8211 sctp_assoc_migrate(assoc, newsk); 8434 sctp_assoc_migrate(assoc, newsk);
8435 sctp_for_each_tx_datachunk(assoc, sctp_set_owner_w);
8212 8436
8213 /* If the association on the newsk is already closed before accept() 8437 /* If the association on the newsk is already closed before accept()
8214 * is called, set RCV_SHUTDOWN flag. 8438 * is called, set RCV_SHUTDOWN flag.
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 63ea15503714..a11db21dc8a0 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -32,44 +32,181 @@
32 * Xin Long <lucien.xin@gmail.com> 32 * Xin Long <lucien.xin@gmail.com>
33 */ 33 */
34 34
35#include <linux/list.h>
35#include <net/sctp/sctp.h> 36#include <net/sctp/sctp.h>
36#include <net/sctp/sm.h> 37#include <net/sctp/sm.h>
38#include <net/sctp/stream_sched.h>
39
40/* Migrates chunks from stream queues to new stream queues if needed,
41 * but not across associations. Also, removes those chunks to streams
42 * higher than the new max.
43 */
44static void sctp_stream_outq_migrate(struct sctp_stream *stream,
45 struct sctp_stream *new, __u16 outcnt)
46{
47 struct sctp_association *asoc;
48 struct sctp_chunk *ch, *temp;
49 struct sctp_outq *outq;
50 int i;
51
52 asoc = container_of(stream, struct sctp_association, stream);
53 outq = &asoc->outqueue;
54
55 list_for_each_entry_safe(ch, temp, &outq->out_chunk_list, list) {
56 __u16 sid = sctp_chunk_stream_no(ch);
57
58 if (sid < outcnt)
59 continue;
60
61 sctp_sched_dequeue_common(outq, ch);
62 /* No need to call dequeue_done here because
63 * the chunks are not scheduled by now.
64 */
65
66 /* Mark as failed send. */
67 sctp_chunk_fail(ch, SCTP_ERROR_INV_STRM);
68 if (asoc->peer.prsctp_capable &&
69 SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags))
70 asoc->sent_cnt_removable--;
71
72 sctp_chunk_free(ch);
73 }
74
75 if (new) {
76 /* Here we actually move the old ext stuff into the new
77 * buffer, because we want to keep it. Then
78 * sctp_stream_update will swap ->out pointers.
79 */
80 for (i = 0; i < outcnt; i++) {
81 kfree(new->out[i].ext);
82 new->out[i].ext = stream->out[i].ext;
83 stream->out[i].ext = NULL;
84 }
85 }
86
87 for (i = outcnt; i < stream->outcnt; i++)
88 kfree(stream->out[i].ext);
89}
90
91static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
92 gfp_t gfp)
93{
94 struct sctp_stream_out *out;
95
96 out = kmalloc_array(outcnt, sizeof(*out), gfp);
97 if (!out)
98 return -ENOMEM;
99
100 if (stream->out) {
101 memcpy(out, stream->out, min(outcnt, stream->outcnt) *
102 sizeof(*out));
103 kfree(stream->out);
104 }
105
106 if (outcnt > stream->outcnt)
107 memset(out + stream->outcnt, 0,
108 (outcnt - stream->outcnt) * sizeof(*out));
109
110 stream->out = out;
111
112 return 0;
113}
114
115static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt,
116 gfp_t gfp)
117{
118 struct sctp_stream_in *in;
119
120 in = kmalloc_array(incnt, sizeof(*stream->in), gfp);
121
122 if (!in)
123 return -ENOMEM;
124
125 if (stream->in) {
126 memcpy(in, stream->in, min(incnt, stream->incnt) *
127 sizeof(*in));
128 kfree(stream->in);
129 }
130
131 if (incnt > stream->incnt)
132 memset(in + stream->incnt, 0,
133 (incnt - stream->incnt) * sizeof(*in));
134
135 stream->in = in;
136
137 return 0;
138}
37 139
38int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, 140int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
39 gfp_t gfp) 141 gfp_t gfp)
40{ 142{
41 int i; 143 struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
144 int i, ret = 0;
145
146 gfp |= __GFP_NOWARN;
42 147
43 /* Initial stream->out size may be very big, so free it and alloc 148 /* Initial stream->out size may be very big, so free it and alloc
44 * a new one with new outcnt to save memory. 149 * a new one with new outcnt to save memory if needed.
45 */ 150 */
46 kfree(stream->out); 151 if (outcnt == stream->outcnt)
152 goto in;
47 153
48 stream->out = kcalloc(outcnt, sizeof(*stream->out), gfp); 154 /* Filter out chunks queued on streams that won't exist anymore */
49 if (!stream->out) 155 sched->unsched_all(stream);
50 return -ENOMEM; 156 sctp_stream_outq_migrate(stream, NULL, outcnt);
157 sched->sched_all(stream);
158
159 i = sctp_stream_alloc_out(stream, outcnt, gfp);
160 if (i)
161 return i;
51 162
52 stream->outcnt = outcnt; 163 stream->outcnt = outcnt;
53 for (i = 0; i < stream->outcnt; i++) 164 for (i = 0; i < stream->outcnt; i++)
54 stream->out[i].state = SCTP_STREAM_OPEN; 165 stream->out[i].state = SCTP_STREAM_OPEN;
55 166
167 sched->init(stream);
168
169in:
56 if (!incnt) 170 if (!incnt)
57 return 0; 171 goto out;
58 172
59 stream->in = kcalloc(incnt, sizeof(*stream->in), gfp); 173 i = sctp_stream_alloc_in(stream, incnt, gfp);
60 if (!stream->in) { 174 if (i) {
61 kfree(stream->out); 175 ret = -ENOMEM;
62 stream->out = NULL; 176 goto free;
63 return -ENOMEM;
64 } 177 }
65 178
66 stream->incnt = incnt; 179 stream->incnt = incnt;
180 goto out;
67 181
68 return 0; 182free:
183 sched->free(stream);
184 kfree(stream->out);
185 stream->out = NULL;
186out:
187 return ret;
188}
189
190int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)
191{
192 struct sctp_stream_out_ext *soute;
193
194 soute = kzalloc(sizeof(*soute), GFP_KERNEL);
195 if (!soute)
196 return -ENOMEM;
197 stream->out[sid].ext = soute;
198
199 return sctp_sched_init_sid(stream, sid, GFP_KERNEL);
69} 200}
70 201
71void sctp_stream_free(struct sctp_stream *stream) 202void sctp_stream_free(struct sctp_stream *stream)
72{ 203{
204 struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
205 int i;
206
207 sched->free(stream);
208 for (i = 0; i < stream->outcnt; i++)
209 kfree(stream->out[i].ext);
73 kfree(stream->out); 210 kfree(stream->out);
74 kfree(stream->in); 211 kfree(stream->in);
75} 212}
@@ -87,6 +224,10 @@ void sctp_stream_clear(struct sctp_stream *stream)
87 224
88void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new) 225void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
89{ 226{
227 struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
228
229 sched->unsched_all(stream);
230 sctp_stream_outq_migrate(stream, new, new->outcnt);
90 sctp_stream_free(stream); 231 sctp_stream_free(stream);
91 232
92 stream->out = new->out; 233 stream->out = new->out;
@@ -94,6 +235,8 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
94 stream->outcnt = new->outcnt; 235 stream->outcnt = new->outcnt;
95 stream->incnt = new->incnt; 236 stream->incnt = new->incnt;
96 237
238 sched->sched_all(stream);
239
97 new->out = NULL; 240 new->out = NULL;
98 new->in = NULL; 241 new->in = NULL;
99} 242}
@@ -118,6 +261,7 @@ int sctp_send_reset_streams(struct sctp_association *asoc,
118 __u16 i, str_nums, *str_list; 261 __u16 i, str_nums, *str_list;
119 struct sctp_chunk *chunk; 262 struct sctp_chunk *chunk;
120 int retval = -EINVAL; 263 int retval = -EINVAL;
264 __be16 *nstr_list;
121 bool out, in; 265 bool out, in;
122 266
123 if (!asoc->peer.reconf_capable || 267 if (!asoc->peer.reconf_capable ||
@@ -138,23 +282,44 @@ int sctp_send_reset_streams(struct sctp_association *asoc,
138 282
139 str_nums = params->srs_number_streams; 283 str_nums = params->srs_number_streams;
140 str_list = params->srs_stream_list; 284 str_list = params->srs_stream_list;
141 if (out && str_nums) 285 if (str_nums) {
142 for (i = 0; i < str_nums; i++) 286 int param_len = 0;
143 if (str_list[i] >= stream->outcnt)
144 goto out;
145 287
146 if (in && str_nums) 288 if (out) {
147 for (i = 0; i < str_nums; i++) 289 for (i = 0; i < str_nums; i++)
148 if (str_list[i] >= stream->incnt) 290 if (str_list[i] >= stream->outcnt)
149 goto out; 291 goto out;
150 292
151 for (i = 0; i < str_nums; i++) 293 param_len = str_nums * sizeof(__u16) +
152 str_list[i] = htons(str_list[i]); 294 sizeof(struct sctp_strreset_outreq);
295 }
296
297 if (in) {
298 for (i = 0; i < str_nums; i++)
299 if (str_list[i] >= stream->incnt)
300 goto out;
153 301
154 chunk = sctp_make_strreset_req(asoc, str_nums, str_list, out, in); 302 param_len += str_nums * sizeof(__u16) +
303 sizeof(struct sctp_strreset_inreq);
304 }
305
306 if (param_len > SCTP_MAX_CHUNK_LEN -
307 sizeof(struct sctp_reconf_chunk))
308 goto out;
309 }
310
311 nstr_list = kcalloc(str_nums, sizeof(__be16), GFP_KERNEL);
312 if (!nstr_list) {
313 retval = -ENOMEM;
314 goto out;
315 }
155 316
156 for (i = 0; i < str_nums; i++) 317 for (i = 0; i < str_nums; i++)
157 str_list[i] = ntohs(str_list[i]); 318 nstr_list[i] = htons(str_list[i]);
319
320 chunk = sctp_make_strreset_req(asoc, str_nums, nstr_list, out, in);
321
322 kfree(nstr_list);
158 323
159 if (!chunk) { 324 if (!chunk) {
160 retval = -ENOMEM; 325 retval = -ENOMEM;
@@ -244,7 +409,7 @@ int sctp_send_add_streams(struct sctp_association *asoc,
244{ 409{
245 struct sctp_stream *stream = &asoc->stream; 410 struct sctp_stream *stream = &asoc->stream;
246 struct sctp_chunk *chunk = NULL; 411 struct sctp_chunk *chunk = NULL;
247 int retval = -ENOMEM; 412 int retval;
248 __u32 outcnt, incnt; 413 __u32 outcnt, incnt;
249 __u16 out, in; 414 __u16 out, in;
250 415
@@ -270,20 +435,16 @@ int sctp_send_add_streams(struct sctp_association *asoc,
270 } 435 }
271 436
272 if (out) { 437 if (out) {
273 struct sctp_stream_out *streamout; 438 retval = sctp_stream_alloc_out(stream, outcnt, GFP_KERNEL);
274 439 if (retval)
275 streamout = krealloc(stream->out, outcnt * sizeof(*streamout),
276 GFP_KERNEL);
277 if (!streamout)
278 goto out; 440 goto out;
279
280 memset(streamout + stream->outcnt, 0, out * sizeof(*streamout));
281 stream->out = streamout;
282 } 441 }
283 442
284 chunk = sctp_make_strreset_addstrm(asoc, out, in); 443 chunk = sctp_make_strreset_addstrm(asoc, out, in);
285 if (!chunk) 444 if (!chunk) {
445 retval = -ENOMEM;
286 goto out; 446 goto out;
447 }
287 448
288 asoc->strreset_chunk = chunk; 449 asoc->strreset_chunk = chunk;
289 sctp_chunk_hold(asoc->strreset_chunk); 450 sctp_chunk_hold(asoc->strreset_chunk);
@@ -305,7 +466,7 @@ out:
305} 466}
306 467
307static struct sctp_paramhdr *sctp_chunk_lookup_strreset_param( 468static struct sctp_paramhdr *sctp_chunk_lookup_strreset_param(
308 struct sctp_association *asoc, __u32 resp_seq, 469 struct sctp_association *asoc, __be32 resp_seq,
309 __be16 type) 470 __be16 type)
310{ 471{
311 struct sctp_chunk *chunk = asoc->strreset_chunk; 472 struct sctp_chunk *chunk = asoc->strreset_chunk;
@@ -345,8 +506,9 @@ struct sctp_chunk *sctp_process_strreset_outreq(
345{ 506{
346 struct sctp_strreset_outreq *outreq = param.v; 507 struct sctp_strreset_outreq *outreq = param.v;
347 struct sctp_stream *stream = &asoc->stream; 508 struct sctp_stream *stream = &asoc->stream;
348 __u16 i, nums, flags = 0, *str_p = NULL;
349 __u32 result = SCTP_STRRESET_DENIED; 509 __u32 result = SCTP_STRRESET_DENIED;
510 __u16 i, nums, flags = 0;
511 __be16 *str_p = NULL;
350 __u32 request_seq; 512 __u32 request_seq;
351 513
352 request_seq = ntohl(outreq->request_seq); 514 request_seq = ntohl(outreq->request_seq);
@@ -439,8 +601,9 @@ struct sctp_chunk *sctp_process_strreset_inreq(
439 struct sctp_stream *stream = &asoc->stream; 601 struct sctp_stream *stream = &asoc->stream;
440 __u32 result = SCTP_STRRESET_DENIED; 602 __u32 result = SCTP_STRRESET_DENIED;
441 struct sctp_chunk *chunk = NULL; 603 struct sctp_chunk *chunk = NULL;
442 __u16 i, nums, *str_p;
443 __u32 request_seq; 604 __u32 request_seq;
605 __u16 i, nums;
606 __be16 *str_p;
444 607
445 request_seq = ntohl(inreq->request_seq); 608 request_seq = ntohl(inreq->request_seq);
446 if (TSN_lt(asoc->strreset_inseq, request_seq) || 609 if (TSN_lt(asoc->strreset_inseq, request_seq) ||
@@ -601,7 +764,6 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out(
601 struct sctp_strreset_addstrm *addstrm = param.v; 764 struct sctp_strreset_addstrm *addstrm = param.v;
602 struct sctp_stream *stream = &asoc->stream; 765 struct sctp_stream *stream = &asoc->stream;
603 __u32 result = SCTP_STRRESET_DENIED; 766 __u32 result = SCTP_STRRESET_DENIED;
604 struct sctp_stream_in *streamin;
605 __u32 request_seq, incnt; 767 __u32 request_seq, incnt;
606 __u16 in, i; 768 __u16 in, i;
607 769
@@ -648,13 +810,9 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out(
648 if (!in || incnt > SCTP_MAX_STREAM) 810 if (!in || incnt > SCTP_MAX_STREAM)
649 goto out; 811 goto out;
650 812
651 streamin = krealloc(stream->in, incnt * sizeof(*streamin), 813 if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC))
652 GFP_ATOMIC);
653 if (!streamin)
654 goto out; 814 goto out;
655 815
656 memset(streamin + stream->incnt, 0, in * sizeof(*streamin));
657 stream->in = streamin;
658 stream->incnt = incnt; 816 stream->incnt = incnt;
659 817
660 result = SCTP_STRRESET_PERFORMED; 818 result = SCTP_STRRESET_PERFORMED;
@@ -676,10 +834,10 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in(
676 struct sctp_strreset_addstrm *addstrm = param.v; 834 struct sctp_strreset_addstrm *addstrm = param.v;
677 struct sctp_stream *stream = &asoc->stream; 835 struct sctp_stream *stream = &asoc->stream;
678 __u32 result = SCTP_STRRESET_DENIED; 836 __u32 result = SCTP_STRRESET_DENIED;
679 struct sctp_stream_out *streamout;
680 struct sctp_chunk *chunk = NULL; 837 struct sctp_chunk *chunk = NULL;
681 __u32 request_seq, outcnt; 838 __u32 request_seq, outcnt;
682 __u16 out, i; 839 __u16 out, i;
840 int ret;
683 841
684 request_seq = ntohl(addstrm->request_seq); 842 request_seq = ntohl(addstrm->request_seq);
685 if (TSN_lt(asoc->strreset_inseq, request_seq) || 843 if (TSN_lt(asoc->strreset_inseq, request_seq) ||
@@ -708,14 +866,10 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in(
708 if (!out || outcnt > SCTP_MAX_STREAM) 866 if (!out || outcnt > SCTP_MAX_STREAM)
709 goto out; 867 goto out;
710 868
711 streamout = krealloc(stream->out, outcnt * sizeof(*streamout), 869 ret = sctp_stream_alloc_out(stream, outcnt, GFP_ATOMIC);
712 GFP_ATOMIC); 870 if (ret)
713 if (!streamout)
714 goto out; 871 goto out;
715 872
716 memset(streamout + stream->outcnt, 0, out * sizeof(*streamout));
717 stream->out = streamout;
718
719 chunk = sctp_make_strreset_addstrm(asoc, out, 0); 873 chunk = sctp_make_strreset_addstrm(asoc, out, 0);
720 if (!chunk) 874 if (!chunk)
721 goto out; 875 goto out;
@@ -769,7 +923,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
769 923
770 if (req->type == SCTP_PARAM_RESET_OUT_REQUEST) { 924 if (req->type == SCTP_PARAM_RESET_OUT_REQUEST) {
771 struct sctp_strreset_outreq *outreq; 925 struct sctp_strreset_outreq *outreq;
772 __u16 *str_p; 926 __be16 *str_p;
773 927
774 outreq = (struct sctp_strreset_outreq *)req; 928 outreq = (struct sctp_strreset_outreq *)req;
775 str_p = outreq->list_of_streams; 929 str_p = outreq->list_of_streams;
@@ -794,7 +948,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
794 nums, str_p, GFP_ATOMIC); 948 nums, str_p, GFP_ATOMIC);
795 } else if (req->type == SCTP_PARAM_RESET_IN_REQUEST) { 949 } else if (req->type == SCTP_PARAM_RESET_IN_REQUEST) {
796 struct sctp_strreset_inreq *inreq; 950 struct sctp_strreset_inreq *inreq;
797 __u16 *str_p; 951 __be16 *str_p;
798 952
799 /* if the result is performed, it's impossible for inreq */ 953 /* if the result is performed, it's impossible for inreq */
800 if (result == SCTP_STRRESET_PERFORMED) 954 if (result == SCTP_STRRESET_PERFORMED)
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
new file mode 100644
index 000000000000..0b83ec51e43b
--- /dev/null
+++ b/net/sctp/stream_sched.c
@@ -0,0 +1,275 @@
1/* SCTP kernel implementation
2 * (C) Copyright Red Hat Inc. 2017
3 *
4 * This file is part of the SCTP kernel implementation
5 *
6 * These functions manipulate sctp stream queue/scheduling.
7 *
8 * This SCTP implementation is free software;
9 * you can redistribute it and/or modify it under the terms of
10 * the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * This SCTP implementation is distributed in the hope that it
15 * will be useful, but WITHOUT ANY WARRANTY; without even the implied
16 * ************************
17 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18 * See the GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with GNU CC; see the file COPYING. If not, see
22 * <http://www.gnu.org/licenses/>.
23 *
24 * Please send any bug reports or fixes you make to the
25 * email addresched(es):
26 * lksctp developers <linux-sctp@vger.kernel.org>
27 *
28 * Written or modified by:
29 * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
30 */
31
32#include <linux/list.h>
33#include <net/sctp/sctp.h>
34#include <net/sctp/sm.h>
35#include <net/sctp/stream_sched.h>
36
37/* First Come First Serve (a.k.a. FIFO)
38 * RFC DRAFT ndata Section 3.1
39 */
40static int sctp_sched_fcfs_set(struct sctp_stream *stream, __u16 sid,
41 __u16 value, gfp_t gfp)
42{
43 return 0;
44}
45
46static int sctp_sched_fcfs_get(struct sctp_stream *stream, __u16 sid,
47 __u16 *value)
48{
49 *value = 0;
50 return 0;
51}
52
53static int sctp_sched_fcfs_init(struct sctp_stream *stream)
54{
55 return 0;
56}
57
58static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid,
59 gfp_t gfp)
60{
61 return 0;
62}
63
64static void sctp_sched_fcfs_free(struct sctp_stream *stream)
65{
66}
67
68static void sctp_sched_fcfs_enqueue(struct sctp_outq *q,
69 struct sctp_datamsg *msg)
70{
71}
72
73static struct sctp_chunk *sctp_sched_fcfs_dequeue(struct sctp_outq *q)
74{
75 struct sctp_stream *stream = &q->asoc->stream;
76 struct sctp_chunk *ch = NULL;
77 struct list_head *entry;
78
79 if (list_empty(&q->out_chunk_list))
80 goto out;
81
82 if (stream->out_curr) {
83 ch = list_entry(stream->out_curr->ext->outq.next,
84 struct sctp_chunk, stream_list);
85 } else {
86 entry = q->out_chunk_list.next;
87 ch = list_entry(entry, struct sctp_chunk, list);
88 }
89
90 sctp_sched_dequeue_common(q, ch);
91
92out:
93 return ch;
94}
95
96static void sctp_sched_fcfs_dequeue_done(struct sctp_outq *q,
97 struct sctp_chunk *chunk)
98{
99}
100
101static void sctp_sched_fcfs_sched_all(struct sctp_stream *stream)
102{
103}
104
105static void sctp_sched_fcfs_unsched_all(struct sctp_stream *stream)
106{
107}
108
109static struct sctp_sched_ops sctp_sched_fcfs = {
110 .set = sctp_sched_fcfs_set,
111 .get = sctp_sched_fcfs_get,
112 .init = sctp_sched_fcfs_init,
113 .init_sid = sctp_sched_fcfs_init_sid,
114 .free = sctp_sched_fcfs_free,
115 .enqueue = sctp_sched_fcfs_enqueue,
116 .dequeue = sctp_sched_fcfs_dequeue,
117 .dequeue_done = sctp_sched_fcfs_dequeue_done,
118 .sched_all = sctp_sched_fcfs_sched_all,
119 .unsched_all = sctp_sched_fcfs_unsched_all,
120};
121
122/* API to other parts of the stack */
123
124extern struct sctp_sched_ops sctp_sched_prio;
125extern struct sctp_sched_ops sctp_sched_rr;
126
127static struct sctp_sched_ops *sctp_sched_ops[] = {
128 &sctp_sched_fcfs,
129 &sctp_sched_prio,
130 &sctp_sched_rr,
131};
132
133int sctp_sched_set_sched(struct sctp_association *asoc,
134 enum sctp_sched_type sched)
135{
136 struct sctp_sched_ops *n = sctp_sched_ops[sched];
137 struct sctp_sched_ops *old = asoc->outqueue.sched;
138 struct sctp_datamsg *msg = NULL;
139 struct sctp_chunk *ch;
140 int i, ret = 0;
141
142 if (old == n)
143 return ret;
144
145 if (sched > SCTP_SS_MAX)
146 return -EINVAL;
147
148 if (old) {
149 old->free(&asoc->stream);
150
151 /* Give the next scheduler a clean slate. */
152 for (i = 0; i < asoc->stream.outcnt; i++) {
153 void *p = asoc->stream.out[i].ext;
154
155 if (!p)
156 continue;
157
158 p += offsetofend(struct sctp_stream_out_ext, outq);
159 memset(p, 0, sizeof(struct sctp_stream_out_ext) -
160 offsetofend(struct sctp_stream_out_ext, outq));
161 }
162 }
163
164 asoc->outqueue.sched = n;
165 n->init(&asoc->stream);
166 for (i = 0; i < asoc->stream.outcnt; i++) {
167 if (!asoc->stream.out[i].ext)
168 continue;
169
170 ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);
171 if (ret)
172 goto err;
173 }
174
175 /* We have to requeue all chunks already queued. */
176 list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
177 if (ch->msg == msg)
178 continue;
179 msg = ch->msg;
180 n->enqueue(&asoc->outqueue, msg);
181 }
182
183 return ret;
184
185err:
186 n->free(&asoc->stream);
187 asoc->outqueue.sched = &sctp_sched_fcfs; /* Always safe */
188
189 return ret;
190}
191
192int sctp_sched_get_sched(struct sctp_association *asoc)
193{
194 int i;
195
196 for (i = 0; i <= SCTP_SS_MAX; i++)
197 if (asoc->outqueue.sched == sctp_sched_ops[i])
198 return i;
199
200 return 0;
201}
202
203int sctp_sched_set_value(struct sctp_association *asoc, __u16 sid,
204 __u16 value, gfp_t gfp)
205{
206 if (sid >= asoc->stream.outcnt)
207 return -EINVAL;
208
209 if (!asoc->stream.out[sid].ext) {
210 int ret;
211
212 ret = sctp_stream_init_ext(&asoc->stream, sid);
213 if (ret)
214 return ret;
215 }
216
217 return asoc->outqueue.sched->set(&asoc->stream, sid, value, gfp);
218}
219
220int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid,
221 __u16 *value)
222{
223 if (sid >= asoc->stream.outcnt)
224 return -EINVAL;
225
226 if (!asoc->stream.out[sid].ext)
227 return 0;
228
229 return asoc->outqueue.sched->get(&asoc->stream, sid, value);
230}
231
232void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch)
233{
234 if (!list_is_last(&ch->frag_list, &ch->msg->chunks)) {
235 struct sctp_stream_out *sout;
236 __u16 sid;
237
238 /* datamsg is not finish, so save it as current one,
239 * in case application switch scheduler or a higher
240 * priority stream comes in.
241 */
242 sid = sctp_chunk_stream_no(ch);
243 sout = &q->asoc->stream.out[sid];
244 q->asoc->stream.out_curr = sout;
245 return;
246 }
247
248 q->asoc->stream.out_curr = NULL;
249 q->sched->dequeue_done(q, ch);
250}
251
252/* Auxiliary functions for the schedulers */
253void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch)
254{
255 list_del_init(&ch->list);
256 list_del_init(&ch->stream_list);
257 q->out_qlen -= ch->skb->len;
258}
259
260int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp)
261{
262 struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
263
264 INIT_LIST_HEAD(&stream->out[sid].ext->outq);
265 return sched->init_sid(stream, sid, gfp);
266}
267
268struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream)
269{
270 struct sctp_association *asoc;
271
272 asoc = container_of(stream, struct sctp_association, stream);
273
274 return asoc->outqueue.sched;
275}
diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c
new file mode 100644
index 000000000000..384dbf3c8760
--- /dev/null
+++ b/net/sctp/stream_sched_prio.c
@@ -0,0 +1,347 @@
1/* SCTP kernel implementation
2 * (C) Copyright Red Hat Inc. 2017
3 *
4 * This file is part of the SCTP kernel implementation
5 *
6 * These functions manipulate sctp stream queue/scheduling.
7 *
8 * This SCTP implementation is free software;
9 * you can redistribute it and/or modify it under the terms of
10 * the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * This SCTP implementation is distributed in the hope that it
15 * will be useful, but WITHOUT ANY WARRANTY; without even the implied
16 * ************************
17 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18 * See the GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with GNU CC; see the file COPYING. If not, see
22 * <http://www.gnu.org/licenses/>.
23 *
24 * Please send any bug reports or fixes you make to the
25 * email addresched(es):
26 * lksctp developers <linux-sctp@vger.kernel.org>
27 *
28 * Written or modified by:
29 * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
30 */
31
32#include <linux/list.h>
33#include <net/sctp/sctp.h>
34#include <net/sctp/sm.h>
35#include <net/sctp/stream_sched.h>
36
37/* Priority handling
38 * RFC DRAFT ndata section 3.4
39 */
40
41static void sctp_sched_prio_unsched_all(struct sctp_stream *stream);
42
43static struct sctp_stream_priorities *sctp_sched_prio_new_head(
44 struct sctp_stream *stream, int prio, gfp_t gfp)
45{
46 struct sctp_stream_priorities *p;
47
48 p = kmalloc(sizeof(*p), gfp);
49 if (!p)
50 return NULL;
51
52 INIT_LIST_HEAD(&p->prio_sched);
53 INIT_LIST_HEAD(&p->active);
54 p->next = NULL;
55 p->prio = prio;
56
57 return p;
58}
59
60static struct sctp_stream_priorities *sctp_sched_prio_get_head(
61 struct sctp_stream *stream, int prio, gfp_t gfp)
62{
63 struct sctp_stream_priorities *p;
64 int i;
65
66 /* Look into scheduled priorities first, as they are sorted and
67 * we can find it fast IF it's scheduled.
68 */
69 list_for_each_entry(p, &stream->prio_list, prio_sched) {
70 if (p->prio == prio)
71 return p;
72 if (p->prio > prio)
73 break;
74 }
75
76 /* No luck. So we search on all streams now. */
77 for (i = 0; i < stream->outcnt; i++) {
78 if (!stream->out[i].ext)
79 continue;
80
81 p = stream->out[i].ext->prio_head;
82 if (!p)
83 /* Means all other streams won't be initialized
84 * as well.
85 */
86 break;
87 if (p->prio == prio)
88 return p;
89 }
90
91 /* If not even there, allocate a new one. */
92 return sctp_sched_prio_new_head(stream, prio, gfp);
93}
94
95static void sctp_sched_prio_next_stream(struct sctp_stream_priorities *p)
96{
97 struct list_head *pos;
98
99 pos = p->next->prio_list.next;
100 if (pos == &p->active)
101 pos = pos->next;
102 p->next = list_entry(pos, struct sctp_stream_out_ext, prio_list);
103}
104
105static bool sctp_sched_prio_unsched(struct sctp_stream_out_ext *soute)
106{
107 bool scheduled = false;
108
109 if (!list_empty(&soute->prio_list)) {
110 struct sctp_stream_priorities *prio_head = soute->prio_head;
111
112 /* Scheduled */
113 scheduled = true;
114
115 if (prio_head->next == soute)
116 /* Try to move to the next stream */
117 sctp_sched_prio_next_stream(prio_head);
118
119 list_del_init(&soute->prio_list);
120
121 /* Also unsched the priority if this was the last stream */
122 if (list_empty(&prio_head->active)) {
123 list_del_init(&prio_head->prio_sched);
124 /* If there is no stream left, clear next */
125 prio_head->next = NULL;
126 }
127 }
128
129 return scheduled;
130}
131
132static void sctp_sched_prio_sched(struct sctp_stream *stream,
133 struct sctp_stream_out_ext *soute)
134{
135 struct sctp_stream_priorities *prio, *prio_head;
136
137 prio_head = soute->prio_head;
138
139 /* Nothing to do if already scheduled */
140 if (!list_empty(&soute->prio_list))
141 return;
142
143 /* Schedule the stream. If there is a next, we schedule the new
144 * one before it, so it's the last in round robin order.
145 * If there isn't, we also have to schedule the priority.
146 */
147 if (prio_head->next) {
148 list_add(&soute->prio_list, prio_head->next->prio_list.prev);
149 return;
150 }
151
152 list_add(&soute->prio_list, &prio_head->active);
153 prio_head->next = soute;
154
155 list_for_each_entry(prio, &stream->prio_list, prio_sched) {
156 if (prio->prio > prio_head->prio) {
157 list_add(&prio_head->prio_sched, prio->prio_sched.prev);
158 return;
159 }
160 }
161
162 list_add_tail(&prio_head->prio_sched, &stream->prio_list);
163}
164
165static int sctp_sched_prio_set(struct sctp_stream *stream, __u16 sid,
166 __u16 prio, gfp_t gfp)
167{
168 struct sctp_stream_out *sout = &stream->out[sid];
169 struct sctp_stream_out_ext *soute = sout->ext;
170 struct sctp_stream_priorities *prio_head, *old;
171 bool reschedule = false;
172 int i;
173
174 prio_head = sctp_sched_prio_get_head(stream, prio, gfp);
175 if (!prio_head)
176 return -ENOMEM;
177
178 reschedule = sctp_sched_prio_unsched(soute);
179 old = soute->prio_head;
180 soute->prio_head = prio_head;
181 if (reschedule)
182 sctp_sched_prio_sched(stream, soute);
183
184 if (!old)
185 /* Happens when we set the priority for the first time */
186 return 0;
187
188 for (i = 0; i < stream->outcnt; i++) {
189 soute = stream->out[i].ext;
190 if (soute && soute->prio_head == old)
191 /* It's still in use, nothing else to do here. */
192 return 0;
193 }
194
195 /* No hits, we are good to free it. */
196 kfree(old);
197
198 return 0;
199}
200
201static int sctp_sched_prio_get(struct sctp_stream *stream, __u16 sid,
202 __u16 *value)
203{
204 *value = stream->out[sid].ext->prio_head->prio;
205 return 0;
206}
207
208static int sctp_sched_prio_init(struct sctp_stream *stream)
209{
210 INIT_LIST_HEAD(&stream->prio_list);
211
212 return 0;
213}
214
215static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid,
216 gfp_t gfp)
217{
218 INIT_LIST_HEAD(&stream->out[sid].ext->prio_list);
219 return sctp_sched_prio_set(stream, sid, 0, gfp);
220}
221
222static void sctp_sched_prio_free(struct sctp_stream *stream)
223{
224 struct sctp_stream_priorities *prio, *n;
225 LIST_HEAD(list);
226 int i;
227
228 /* As we don't keep a list of priorities, to avoid multiple
229 * frees we have to do it in 3 steps:
230 * 1. unsched everyone, so the lists are free to use in 2.
231 * 2. build the list of the priorities
232 * 3. free the list
233 */
234 sctp_sched_prio_unsched_all(stream);
235 for (i = 0; i < stream->outcnt; i++) {
236 if (!stream->out[i].ext)
237 continue;
238 prio = stream->out[i].ext->prio_head;
239 if (prio && list_empty(&prio->prio_sched))
240 list_add(&prio->prio_sched, &list);
241 }
242 list_for_each_entry_safe(prio, n, &list, prio_sched) {
243 list_del_init(&prio->prio_sched);
244 kfree(prio);
245 }
246}
247
248static void sctp_sched_prio_enqueue(struct sctp_outq *q,
249 struct sctp_datamsg *msg)
250{
251 struct sctp_stream *stream;
252 struct sctp_chunk *ch;
253 __u16 sid;
254
255 ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list);
256 sid = sctp_chunk_stream_no(ch);
257 stream = &q->asoc->stream;
258 sctp_sched_prio_sched(stream, stream->out[sid].ext);
259}
260
261static struct sctp_chunk *sctp_sched_prio_dequeue(struct sctp_outq *q)
262{
263 struct sctp_stream *stream = &q->asoc->stream;
264 struct sctp_stream_priorities *prio;
265 struct sctp_stream_out_ext *soute;
266 struct sctp_chunk *ch = NULL;
267
268 /* Bail out quickly if queue is empty */
269 if (list_empty(&q->out_chunk_list))
270 goto out;
271
272 /* Find which chunk is next. It's easy, it's either the current
273 * one or the first chunk on the next active stream.
274 */
275 if (stream->out_curr) {
276 soute = stream->out_curr->ext;
277 } else {
278 prio = list_entry(stream->prio_list.next,
279 struct sctp_stream_priorities, prio_sched);
280 soute = prio->next;
281 }
282 ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list);
283 sctp_sched_dequeue_common(q, ch);
284
285out:
286 return ch;
287}
288
289static void sctp_sched_prio_dequeue_done(struct sctp_outq *q,
290 struct sctp_chunk *ch)
291{
292 struct sctp_stream_priorities *prio;
293 struct sctp_stream_out_ext *soute;
294 __u16 sid;
295
296 /* Last chunk on that msg, move to the next stream on
297 * this priority.
298 */
299 sid = sctp_chunk_stream_no(ch);
300 soute = q->asoc->stream.out[sid].ext;
301 prio = soute->prio_head;
302
303 sctp_sched_prio_next_stream(prio);
304
305 if (list_empty(&soute->outq))
306 sctp_sched_prio_unsched(soute);
307}
308
309static void sctp_sched_prio_sched_all(struct sctp_stream *stream)
310{
311 struct sctp_association *asoc;
312 struct sctp_stream_out *sout;
313 struct sctp_chunk *ch;
314
315 asoc = container_of(stream, struct sctp_association, stream);
316 list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
317 __u16 sid;
318
319 sid = sctp_chunk_stream_no(ch);
320 sout = &stream->out[sid];
321 if (sout->ext)
322 sctp_sched_prio_sched(stream, sout->ext);
323 }
324}
325
326static void sctp_sched_prio_unsched_all(struct sctp_stream *stream)
327{
328 struct sctp_stream_priorities *p, *tmp;
329 struct sctp_stream_out_ext *soute, *souttmp;
330
331 list_for_each_entry_safe(p, tmp, &stream->prio_list, prio_sched)
332 list_for_each_entry_safe(soute, souttmp, &p->active, prio_list)
333 sctp_sched_prio_unsched(soute);
334}
335
336struct sctp_sched_ops sctp_sched_prio = {
337 .set = sctp_sched_prio_set,
338 .get = sctp_sched_prio_get,
339 .init = sctp_sched_prio_init,
340 .init_sid = sctp_sched_prio_init_sid,
341 .free = sctp_sched_prio_free,
342 .enqueue = sctp_sched_prio_enqueue,
343 .dequeue = sctp_sched_prio_dequeue,
344 .dequeue_done = sctp_sched_prio_dequeue_done,
345 .sched_all = sctp_sched_prio_sched_all,
346 .unsched_all = sctp_sched_prio_unsched_all,
347};
diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c
new file mode 100644
index 000000000000..7612a438c5b9
--- /dev/null
+++ b/net/sctp/stream_sched_rr.c
@@ -0,0 +1,201 @@
1/* SCTP kernel implementation
2 * (C) Copyright Red Hat Inc. 2017
3 *
4 * This file is part of the SCTP kernel implementation
5 *
6 * These functions manipulate sctp stream queue/scheduling.
7 *
8 * This SCTP implementation is free software;
9 * you can redistribute it and/or modify it under the terms of
10 * the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * This SCTP implementation is distributed in the hope that it
15 * will be useful, but WITHOUT ANY WARRANTY; without even the implied
16 * ************************
17 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18 * See the GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with GNU CC; see the file COPYING. If not, see
22 * <http://www.gnu.org/licenses/>.
23 *
24 * Please send any bug reports or fixes you make to the
25 * email addresched(es):
26 * lksctp developers <linux-sctp@vger.kernel.org>
27 *
28 * Written or modified by:
29 * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
30 */
31
32#include <linux/list.h>
33#include <net/sctp/sctp.h>
34#include <net/sctp/sm.h>
35#include <net/sctp/stream_sched.h>
36
37/* Priority handling
38 * RFC DRAFT ndata section 3.2
39 */
40static void sctp_sched_rr_unsched_all(struct sctp_stream *stream);
41
42static void sctp_sched_rr_next_stream(struct sctp_stream *stream)
43{
44 struct list_head *pos;
45
46 pos = stream->rr_next->rr_list.next;
47 if (pos == &stream->rr_list)
48 pos = pos->next;
49 stream->rr_next = list_entry(pos, struct sctp_stream_out_ext, rr_list);
50}
51
52static void sctp_sched_rr_unsched(struct sctp_stream *stream,
53 struct sctp_stream_out_ext *soute)
54{
55 if (stream->rr_next == soute)
56 /* Try to move to the next stream */
57 sctp_sched_rr_next_stream(stream);
58
59 list_del_init(&soute->rr_list);
60
61 /* If we have no other stream queued, clear next */
62 if (list_empty(&stream->rr_list))
63 stream->rr_next = NULL;
64}
65
66static void sctp_sched_rr_sched(struct sctp_stream *stream,
67 struct sctp_stream_out_ext *soute)
68{
69 if (!list_empty(&soute->rr_list))
70 /* Already scheduled. */
71 return;
72
73 /* Schedule the stream */
74 list_add_tail(&soute->rr_list, &stream->rr_list);
75
76 if (!stream->rr_next)
77 stream->rr_next = soute;
78}
79
80static int sctp_sched_rr_set(struct sctp_stream *stream, __u16 sid,
81 __u16 prio, gfp_t gfp)
82{
83 return 0;
84}
85
86static int sctp_sched_rr_get(struct sctp_stream *stream, __u16 sid,
87 __u16 *value)
88{
89 return 0;
90}
91
92static int sctp_sched_rr_init(struct sctp_stream *stream)
93{
94 INIT_LIST_HEAD(&stream->rr_list);
95 stream->rr_next = NULL;
96
97 return 0;
98}
99
100static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid,
101 gfp_t gfp)
102{
103 INIT_LIST_HEAD(&stream->out[sid].ext->rr_list);
104
105 return 0;
106}
107
108static void sctp_sched_rr_free(struct sctp_stream *stream)
109{
110 sctp_sched_rr_unsched_all(stream);
111}
112
113static void sctp_sched_rr_enqueue(struct sctp_outq *q,
114 struct sctp_datamsg *msg)
115{
116 struct sctp_stream *stream;
117 struct sctp_chunk *ch;
118 __u16 sid;
119
120 ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list);
121 sid = sctp_chunk_stream_no(ch);
122 stream = &q->asoc->stream;
123 sctp_sched_rr_sched(stream, stream->out[sid].ext);
124}
125
126static struct sctp_chunk *sctp_sched_rr_dequeue(struct sctp_outq *q)
127{
128 struct sctp_stream *stream = &q->asoc->stream;
129 struct sctp_stream_out_ext *soute;
130 struct sctp_chunk *ch = NULL;
131
132 /* Bail out quickly if queue is empty */
133 if (list_empty(&q->out_chunk_list))
134 goto out;
135
136 /* Find which chunk is next */
137 if (stream->out_curr)
138 soute = stream->out_curr->ext;
139 else
140 soute = stream->rr_next;
141 ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list);
142
143 sctp_sched_dequeue_common(q, ch);
144
145out:
146 return ch;
147}
148
149static void sctp_sched_rr_dequeue_done(struct sctp_outq *q,
150 struct sctp_chunk *ch)
151{
152 struct sctp_stream_out_ext *soute;
153 __u16 sid;
154
155 /* Last chunk on that msg, move to the next stream */
156 sid = sctp_chunk_stream_no(ch);
157 soute = q->asoc->stream.out[sid].ext;
158
159 sctp_sched_rr_next_stream(&q->asoc->stream);
160
161 if (list_empty(&soute->outq))
162 sctp_sched_rr_unsched(&q->asoc->stream, soute);
163}
164
165static void sctp_sched_rr_sched_all(struct sctp_stream *stream)
166{
167 struct sctp_association *asoc;
168 struct sctp_stream_out_ext *soute;
169 struct sctp_chunk *ch;
170
171 asoc = container_of(stream, struct sctp_association, stream);
172 list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
173 __u16 sid;
174
175 sid = sctp_chunk_stream_no(ch);
176 soute = stream->out[sid].ext;
177 if (soute)
178 sctp_sched_rr_sched(stream, soute);
179 }
180}
181
182static void sctp_sched_rr_unsched_all(struct sctp_stream *stream)
183{
184 struct sctp_stream_out_ext *soute, *tmp;
185
186 list_for_each_entry_safe(soute, tmp, &stream->rr_list, rr_list)
187 sctp_sched_rr_unsched(stream, soute);
188}
189
190struct sctp_sched_ops sctp_sched_rr = {
191 .set = sctp_sched_rr_set,
192 .get = sctp_sched_rr_get,
193 .init = sctp_sched_rr_init,
194 .init_sid = sctp_sched_rr_init_sid,
195 .free = sctp_sched_rr_free,
196 .enqueue = sctp_sched_rr_enqueue,
197 .dequeue = sctp_sched_rr_dequeue,
198 .dequeue_done = sctp_sched_rr_dequeue_done,
199 .sched_all = sctp_sched_rr_sched_all,
200 .unsched_all = sctp_sched_rr_unsched_all,
201};
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 2d9bd3776bc8..1e5a22430cf5 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -87,14 +87,11 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
87 INIT_LIST_HEAD(&peer->send_ready); 87 INIT_LIST_HEAD(&peer->send_ready);
88 INIT_LIST_HEAD(&peer->transports); 88 INIT_LIST_HEAD(&peer->transports);
89 89
90 setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, 90 timer_setup(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, 0);
91 (unsigned long)peer); 91 timer_setup(&peer->hb_timer, sctp_generate_heartbeat_event, 0);
92 setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event, 92 timer_setup(&peer->reconf_timer, sctp_generate_reconf_event, 0);
93 (unsigned long)peer); 93 timer_setup(&peer->proto_unreach_timer,
94 setup_timer(&peer->reconf_timer, sctp_generate_reconf_event, 94 sctp_generate_proto_unreach_event, 0);
95 (unsigned long)peer);
96 setup_timer(&peer->proto_unreach_timer,
97 sctp_generate_proto_unreach_event, (unsigned long)peer);
98 95
99 /* Initialize the 64-bit random nonce sent with heartbeat. */ 96 /* Initialize the 64-bit random nonce sent with heartbeat. */
100 get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); 97 get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 67abc0194f30..5447228bf1a0 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -847,7 +847,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_sender_dry_event(
847 847
848struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event( 848struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event(
849 const struct sctp_association *asoc, __u16 flags, __u16 stream_num, 849 const struct sctp_association *asoc, __u16 flags, __u16 stream_num,
850 __u16 *stream_list, gfp_t gfp) 850 __be16 *stream_list, gfp_t gfp)
851{ 851{
852 struct sctp_stream_reset_event *sreset; 852 struct sctp_stream_reset_event *sreset;
853 struct sctp_ulpevent *event; 853 struct sctp_ulpevent *event;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 8c6d24b2995d..6451c5013e06 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -282,6 +282,7 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
282 __be32 *subnet, u8 *prefix_len) 282 __be32 *subnet, u8 *prefix_len)
283{ 283{
284 struct dst_entry *dst = sk_dst_get(clcsock->sk); 284 struct dst_entry *dst = sk_dst_get(clcsock->sk);
285 struct in_device *in_dev;
285 struct sockaddr_in addr; 286 struct sockaddr_in addr;
286 int rc = -ENOENT; 287 int rc = -ENOENT;
287 int len; 288 int len;
@@ -298,14 +299,17 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
298 /* get address to which the internal TCP socket is bound */ 299 /* get address to which the internal TCP socket is bound */
299 kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len); 300 kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len);
300 /* analyze IPv4 specific data of net_device belonging to TCP socket */ 301 /* analyze IPv4 specific data of net_device belonging to TCP socket */
301 for_ifa(dst->dev->ip_ptr) { 302 rcu_read_lock();
302 if (ifa->ifa_address != addr.sin_addr.s_addr) 303 in_dev = __in_dev_get_rcu(dst->dev);
304 for_ifa(in_dev) {
305 if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
303 continue; 306 continue;
304 *prefix_len = inet_mask_len(ifa->ifa_mask); 307 *prefix_len = inet_mask_len(ifa->ifa_mask);
305 *subnet = ifa->ifa_address & ifa->ifa_mask; 308 *subnet = ifa->ifa_address & ifa->ifa_mask;
306 rc = 0; 309 rc = 0;
307 break; 310 break;
308 } endfor_ifa(dst->dev->ip_ptr); 311 } endfor_ifa(in_dev);
312 rcu_read_unlock();
309 313
310out_rel: 314out_rel:
311 dst_release(dst); 315 dst_release(dst);
@@ -386,6 +390,12 @@ static int smc_connect_rdma(struct smc_sock *smc)
386 int rc = 0; 390 int rc = 0;
387 u8 ibport; 391 u8 ibport;
388 392
393 if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
394 /* peer has not signalled SMC-capability */
395 smc->use_fallback = true;
396 goto out_connected;
397 }
398
389 /* IPSec connections opt out of SMC-R optimizations */ 399 /* IPSec connections opt out of SMC-R optimizations */
390 if (using_ipsec(smc)) { 400 if (using_ipsec(smc)) {
391 reason_code = SMC_CLC_DECL_IPSEC; 401 reason_code = SMC_CLC_DECL_IPSEC;
@@ -509,7 +519,7 @@ decline_rdma:
509 /* RDMA setup failed, switch back to TCP */ 519 /* RDMA setup failed, switch back to TCP */
510 smc->use_fallback = true; 520 smc->use_fallback = true;
511 if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { 521 if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
512 rc = smc_clc_send_decline(smc, reason_code, 0); 522 rc = smc_clc_send_decline(smc, reason_code);
513 if (rc < sizeof(struct smc_clc_msg_decline)) 523 if (rc < sizeof(struct smc_clc_msg_decline))
514 goto out_err; 524 goto out_err;
515 } 525 }
@@ -551,6 +561,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
551 } 561 }
552 562
553 smc_copy_sock_settings_to_clc(smc); 563 smc_copy_sock_settings_to_clc(smc);
564 tcp_sk(smc->clcsock->sk)->syn_smc = 1;
554 rc = kernel_connect(smc->clcsock, addr, alen, flags); 565 rc = kernel_connect(smc->clcsock, addr, alen, flags);
555 if (rc) 566 if (rc)
556 goto out; 567 goto out;
@@ -755,6 +766,12 @@ static void smc_listen_work(struct work_struct *work)
755 u8 prefix_len; 766 u8 prefix_len;
756 u8 ibport; 767 u8 ibport;
757 768
769 /* check if peer is smc capable */
770 if (!tcp_sk(newclcsock->sk)->syn_smc) {
771 new_smc->use_fallback = true;
772 goto out_connected;
773 }
774
758 /* do inband token exchange - 775 /* do inband token exchange -
759 *wait for and receive SMC Proposal CLC message 776 *wait for and receive SMC Proposal CLC message
760 */ 777 */
@@ -804,9 +821,7 @@ static void smc_listen_work(struct work_struct *work)
804 rc = local_contact; 821 rc = local_contact;
805 if (rc == -ENOMEM) 822 if (rc == -ENOMEM)
806 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ 823 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
807 else if (rc == -ENOLINK) 824 goto decline_rdma_unlock;
808 reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
809 goto decline_rdma;
810 } 825 }
811 link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 826 link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
812 827
@@ -814,7 +829,7 @@ static void smc_listen_work(struct work_struct *work)
814 rc = smc_buf_create(new_smc); 829 rc = smc_buf_create(new_smc);
815 if (rc) { 830 if (rc) {
816 reason_code = SMC_CLC_DECL_MEM; 831 reason_code = SMC_CLC_DECL_MEM;
817 goto decline_rdma; 832 goto decline_rdma_unlock;
818 } 833 }
819 834
820 smc_close_init(new_smc); 835 smc_close_init(new_smc);
@@ -829,7 +844,7 @@ static void smc_listen_work(struct work_struct *work)
829 buf_desc->mr_rx[SMC_SINGLE_LINK]); 844 buf_desc->mr_rx[SMC_SINGLE_LINK]);
830 if (rc) { 845 if (rc) {
831 reason_code = SMC_CLC_DECL_INTERR; 846 reason_code = SMC_CLC_DECL_INTERR;
832 goto decline_rdma; 847 goto decline_rdma_unlock;
833 } 848 }
834 } 849 }
835 } 850 }
@@ -837,15 +852,15 @@ static void smc_listen_work(struct work_struct *work)
837 852
838 rc = smc_clc_send_accept(new_smc, local_contact); 853 rc = smc_clc_send_accept(new_smc, local_contact);
839 if (rc) 854 if (rc)
840 goto out_err; 855 goto out_err_unlock;
841 856
842 /* receive SMC Confirm CLC message */ 857 /* receive SMC Confirm CLC message */
843 reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), 858 reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
844 SMC_CLC_CONFIRM); 859 SMC_CLC_CONFIRM);
845 if (reason_code < 0) 860 if (reason_code < 0)
846 goto out_err; 861 goto out_err_unlock;
847 if (reason_code > 0) 862 if (reason_code > 0)
848 goto decline_rdma; 863 goto decline_rdma_unlock;
849 smc_conn_save_peer_info(new_smc, &cclc); 864 smc_conn_save_peer_info(new_smc, &cclc);
850 if (local_contact == SMC_FIRST_CONTACT) 865 if (local_contact == SMC_FIRST_CONTACT)
851 smc_link_save_peer_info(link, &cclc); 866 smc_link_save_peer_info(link, &cclc);
@@ -853,34 +868,34 @@ static void smc_listen_work(struct work_struct *work)
853 rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc); 868 rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc);
854 if (rc) { 869 if (rc) {
855 reason_code = SMC_CLC_DECL_INTERR; 870 reason_code = SMC_CLC_DECL_INTERR;
856 goto decline_rdma; 871 goto decline_rdma_unlock;
857 } 872 }
858 873
859 if (local_contact == SMC_FIRST_CONTACT) { 874 if (local_contact == SMC_FIRST_CONTACT) {
860 rc = smc_ib_ready_link(link); 875 rc = smc_ib_ready_link(link);
861 if (rc) { 876 if (rc) {
862 reason_code = SMC_CLC_DECL_INTERR; 877 reason_code = SMC_CLC_DECL_INTERR;
863 goto decline_rdma; 878 goto decline_rdma_unlock;
864 } 879 }
865 /* QP confirmation over RoCE fabric */ 880 /* QP confirmation over RoCE fabric */
866 reason_code = smc_serv_conf_first_link(new_smc); 881 reason_code = smc_serv_conf_first_link(new_smc);
867 if (reason_code < 0) { 882 if (reason_code < 0) {
868 /* peer is not aware of a problem */ 883 /* peer is not aware of a problem */
869 rc = reason_code; 884 rc = reason_code;
870 goto out_err; 885 goto out_err_unlock;
871 } 886 }
872 if (reason_code > 0) 887 if (reason_code > 0)
873 goto decline_rdma; 888 goto decline_rdma_unlock;
874 } 889 }
875 890
876 smc_tx_init(new_smc); 891 smc_tx_init(new_smc);
892 mutex_unlock(&smc_create_lgr_pending);
877 893
878out_connected: 894out_connected:
879 sk_refcnt_debug_inc(newsmcsk); 895 sk_refcnt_debug_inc(newsmcsk);
880 if (newsmcsk->sk_state == SMC_INIT) 896 if (newsmcsk->sk_state == SMC_INIT)
881 newsmcsk->sk_state = SMC_ACTIVE; 897 newsmcsk->sk_state = SMC_ACTIVE;
882enqueue: 898enqueue:
883 mutex_unlock(&smc_create_lgr_pending);
884 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); 899 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
885 if (lsmc->sk.sk_state == SMC_LISTEN) { 900 if (lsmc->sk.sk_state == SMC_LISTEN) {
886 smc_accept_enqueue(&lsmc->sk, newsmcsk); 901 smc_accept_enqueue(&lsmc->sk, newsmcsk);
@@ -894,17 +909,21 @@ enqueue:
894 sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ 909 sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
895 return; 910 return;
896 911
912decline_rdma_unlock:
913 mutex_unlock(&smc_create_lgr_pending);
897decline_rdma: 914decline_rdma:
898 /* RDMA setup failed, switch back to TCP */ 915 /* RDMA setup failed, switch back to TCP */
899 smc_conn_free(&new_smc->conn); 916 smc_conn_free(&new_smc->conn);
900 new_smc->use_fallback = true; 917 new_smc->use_fallback = true;
901 if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { 918 if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
902 rc = smc_clc_send_decline(new_smc, reason_code, 0); 919 rc = smc_clc_send_decline(new_smc, reason_code);
903 if (rc < sizeof(struct smc_clc_msg_decline)) 920 if (rc < sizeof(struct smc_clc_msg_decline))
904 goto out_err; 921 goto out_err;
905 } 922 }
906 goto out_connected; 923 goto out_connected;
907 924
925out_err_unlock:
926 mutex_unlock(&smc_create_lgr_pending);
908out_err: 927out_err:
909 newsmcsk->sk_state = SMC_CLOSED; 928 newsmcsk->sk_state = SMC_CLOSED;
910 smc_conn_free(&new_smc->conn); 929 smc_conn_free(&new_smc->conn);
@@ -961,6 +980,7 @@ static int smc_listen(struct socket *sock, int backlog)
961 * them to the clc socket -- copy smc socket options to clc socket 980 * them to the clc socket -- copy smc socket options to clc socket
962 */ 981 */
963 smc_copy_sock_settings_to_clc(smc); 982 smc_copy_sock_settings_to_clc(smc);
983 tcp_sk(smc->clcsock->sk)->syn_smc = 1;
964 984
965 rc = kernel_listen(smc->clcsock, backlog); 985 rc = kernel_listen(smc->clcsock, backlog);
966 if (rc) 986 if (rc)
@@ -1403,6 +1423,7 @@ static int __init smc_init(void)
1403 goto out_sock; 1423 goto out_sock;
1404 } 1424 }
1405 1425
1426 static_branch_enable(&tcp_have_smc);
1406 return 0; 1427 return 0;
1407 1428
1408out_sock: 1429out_sock:
@@ -1427,6 +1448,7 @@ static void __exit smc_exit(void)
1427 list_del_init(&lgr->list); 1448 list_del_init(&lgr->list);
1428 smc_lgr_free(lgr); /* free link group */ 1449 smc_lgr_free(lgr); /* free link group */
1429 } 1450 }
1451 static_branch_disable(&tcp_have_smc);
1430 smc_ib_unregister_client(); 1452 smc_ib_unregister_client();
1431 sock_unregister(PF_SMC); 1453 sock_unregister(PF_SMC);
1432 proto_unregister(&smc_proto); 1454 proto_unregister(&smc_proto);
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 6e44313e4467..0bee9d16cf29 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
@@ -149,7 +150,7 @@ struct smc_connection {
149 atomic_t sndbuf_space; /* remaining space in sndbuf */ 150 atomic_t sndbuf_space; /* remaining space in sndbuf */
150 u16 tx_cdc_seq; /* sequence # for CDC send */ 151 u16 tx_cdc_seq; /* sequence # for CDC send */
151 spinlock_t send_lock; /* protect wr_sends */ 152 spinlock_t send_lock; /* protect wr_sends */
152 struct work_struct tx_work; /* retry of smc_cdc_msg_send */ 153 struct delayed_work tx_work; /* retry of smc_cdc_msg_send */
153 154
154 struct smc_host_cdc_msg local_rx_ctrl; /* filled during event_handl. 155 struct smc_host_cdc_msg local_rx_ctrl; /* filled during event_handl.
155 * .prod cf. TCP rcv_nxt 156 * .prod cf. TCP rcv_nxt
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index a7294edbc221..87f7bede6eab 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
@@ -62,10 +63,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
62 bh_unlock_sock(&smc->sk); 63 bh_unlock_sock(&smc->sk);
63} 64}
64 65
65int smc_cdc_get_free_slot(struct smc_link *link, 66int smc_cdc_get_free_slot(struct smc_connection *conn,
66 struct smc_wr_buf **wr_buf, 67 struct smc_wr_buf **wr_buf,
67 struct smc_cdc_tx_pend **pend) 68 struct smc_cdc_tx_pend **pend)
68{ 69{
70 struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
71
69 return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, 72 return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
70 (struct smc_wr_tx_pend_priv **)pend); 73 (struct smc_wr_tx_pend_priv **)pend);
71} 74}
@@ -118,8 +121,7 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
118 struct smc_wr_buf *wr_buf; 121 struct smc_wr_buf *wr_buf;
119 int rc; 122 int rc;
120 123
121 rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], &wr_buf, 124 rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
122 &pend);
123 if (rc) 125 if (rc)
124 return rc; 126 return rc;
125 127
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 8e1d76f26007..149ceda1b088 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
@@ -206,7 +207,8 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
206 207
207struct smc_cdc_tx_pend; 208struct smc_cdc_tx_pend;
208 209
209int smc_cdc_get_free_slot(struct smc_link *link, struct smc_wr_buf **wr_buf, 210int smc_cdc_get_free_slot(struct smc_connection *conn,
211 struct smc_wr_buf **wr_buf,
210 struct smc_cdc_tx_pend **pend); 212 struct smc_cdc_tx_pend **pend);
211void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); 213void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
212int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, 214int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 3934913ab835..1800e16b2a02 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
@@ -95,9 +96,10 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
95 } 96 }
96 if (clcm->type == SMC_CLC_DECLINE) { 97 if (clcm->type == SMC_CLC_DECLINE) {
97 reason_code = SMC_CLC_DECL_REPLY; 98 reason_code = SMC_CLC_DECL_REPLY;
98 if (ntohl(((struct smc_clc_msg_decline *)buf)->peer_diagnosis) 99 if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
99 == SMC_CLC_DECL_SYNCERR)
100 smc->conn.lgr->sync_err = true; 100 smc->conn.lgr->sync_err = true;
101 smc_lgr_terminate(smc->conn.lgr);
102 }
101 } 103 }
102 104
103out: 105out:
@@ -105,8 +107,7 @@ out:
105} 107}
106 108
107/* send CLC DECLINE message across internal TCP socket */ 109/* send CLC DECLINE message across internal TCP socket */
108int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, 110int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
109 u8 out_of_sync)
110{ 111{
111 struct smc_clc_msg_decline dclc; 112 struct smc_clc_msg_decline dclc;
112 struct msghdr msg; 113 struct msghdr msg;
@@ -118,7 +119,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
118 dclc.hdr.type = SMC_CLC_DECLINE; 119 dclc.hdr.type = SMC_CLC_DECLINE;
119 dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline)); 120 dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
120 dclc.hdr.version = SMC_CLC_V1; 121 dclc.hdr.version = SMC_CLC_V1;
121 dclc.hdr.flag = out_of_sync ? 1 : 0; 122 dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0;
122 memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid)); 123 memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
123 dclc.peer_diagnosis = htonl(peer_diag_info); 124 dclc.peer_diagnosis = htonl(peer_diag_info);
124 memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 125 memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 13db8ce177c9..12a9af1539a2 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
@@ -106,8 +107,7 @@ struct smc_ib_device;
106 107
107int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, 108int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
108 u8 expected_type); 109 u8 expected_type);
109int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, 110int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
110 u8 out_of_sync);
111int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev, 111int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev,
112 u8 ibport); 112 u8 ibport);
113int smc_clc_send_confirm(struct smc_sock *smc); 113int smc_clc_send_confirm(struct smc_sock *smc);
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 3c2e166b5d22..48615d2ac4aa 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
@@ -174,15 +175,15 @@ int smc_close_active(struct smc_sock *smc)
174{ 175{
175 struct smc_cdc_conn_state_flags *txflags = 176 struct smc_cdc_conn_state_flags *txflags =
176 &smc->conn.local_tx_ctrl.conn_state_flags; 177 &smc->conn.local_tx_ctrl.conn_state_flags;
177 long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
178 struct smc_connection *conn = &smc->conn; 178 struct smc_connection *conn = &smc->conn;
179 struct sock *sk = &smc->sk; 179 struct sock *sk = &smc->sk;
180 int old_state; 180 int old_state;
181 long timeout;
181 int rc = 0; 182 int rc = 0;
182 183
183 if (sock_flag(sk, SOCK_LINGER) && 184 timeout = current->flags & PF_EXITING ?
184 !(current->flags & PF_EXITING)) 185 0 : sock_flag(sk, SOCK_LINGER) ?
185 timeout = sk->sk_lingertime; 186 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
186 187
187again: 188again:
188 old_state = sk->sk_state; 189 old_state = sk->sk_state;
@@ -208,7 +209,7 @@ again:
208 case SMC_ACTIVE: 209 case SMC_ACTIVE:
209 smc_close_stream_wait(smc, timeout); 210 smc_close_stream_wait(smc, timeout);
210 release_sock(sk); 211 release_sock(sk);
211 cancel_work_sync(&conn->tx_work); 212 cancel_delayed_work_sync(&conn->tx_work);
212 lock_sock(sk); 213 lock_sock(sk);
213 if (sk->sk_state == SMC_ACTIVE) { 214 if (sk->sk_state == SMC_ACTIVE) {
214 /* send close request */ 215 /* send close request */
@@ -234,7 +235,7 @@ again:
234 if (!smc_cdc_rxed_any_close(conn)) 235 if (!smc_cdc_rxed_any_close(conn))
235 smc_close_stream_wait(smc, timeout); 236 smc_close_stream_wait(smc, timeout);
236 release_sock(sk); 237 release_sock(sk);
237 cancel_work_sync(&conn->tx_work); 238 cancel_delayed_work_sync(&conn->tx_work);
238 lock_sock(sk); 239 lock_sock(sk);
239 if (sk->sk_err != ECONNABORTED) { 240 if (sk->sk_err != ECONNABORTED) {
240 /* confirm close from peer */ 241 /* confirm close from peer */
@@ -263,7 +264,9 @@ again:
263 /* peer sending PeerConnectionClosed will cause transition */ 264 /* peer sending PeerConnectionClosed will cause transition */
264 break; 265 break;
265 case SMC_PROCESSABORT: 266 case SMC_PROCESSABORT:
266 cancel_work_sync(&conn->tx_work); 267 release_sock(sk);
268 cancel_delayed_work_sync(&conn->tx_work);
269 lock_sock(sk);
267 smc_close_abort(conn); 270 smc_close_abort(conn);
268 sk->sk_state = SMC_CLOSED; 271 sk->sk_state = SMC_CLOSED;
269 smc_close_wait_tx_pends(smc); 272 smc_close_wait_tx_pends(smc);
@@ -358,7 +361,8 @@ static void smc_close_passive_work(struct work_struct *work)
358 case SMC_PEERCLOSEWAIT1: 361 case SMC_PEERCLOSEWAIT1:
359 if (rxflags->peer_done_writing) 362 if (rxflags->peer_done_writing)
360 sk->sk_state = SMC_PEERCLOSEWAIT2; 363 sk->sk_state = SMC_PEERCLOSEWAIT2;
361 /* fall through to check for closing */ 364 /* fall through */
365 /* to check for closing */
362 case SMC_PEERCLOSEWAIT2: 366 case SMC_PEERCLOSEWAIT2:
363 case SMC_PEERFINCLOSEWAIT: 367 case SMC_PEERFINCLOSEWAIT:
364 if (!smc_cdc_rxed_any_close(&smc->conn)) 368 if (!smc_cdc_rxed_any_close(&smc->conn))
@@ -411,13 +415,14 @@ void smc_close_sock_put_work(struct work_struct *work)
411int smc_close_shutdown_write(struct smc_sock *smc) 415int smc_close_shutdown_write(struct smc_sock *smc)
412{ 416{
413 struct smc_connection *conn = &smc->conn; 417 struct smc_connection *conn = &smc->conn;
414 long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
415 struct sock *sk = &smc->sk; 418 struct sock *sk = &smc->sk;
416 int old_state; 419 int old_state;
420 long timeout;
417 int rc = 0; 421 int rc = 0;
418 422
419 if (sock_flag(sk, SOCK_LINGER)) 423 timeout = current->flags & PF_EXITING ?
420 timeout = sk->sk_lingertime; 424 0 : sock_flag(sk, SOCK_LINGER) ?
425 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
421 426
422again: 427again:
423 old_state = sk->sk_state; 428 old_state = sk->sk_state;
@@ -425,7 +430,7 @@ again:
425 case SMC_ACTIVE: 430 case SMC_ACTIVE:
426 smc_close_stream_wait(smc, timeout); 431 smc_close_stream_wait(smc, timeout);
427 release_sock(sk); 432 release_sock(sk);
428 cancel_work_sync(&conn->tx_work); 433 cancel_delayed_work_sync(&conn->tx_work);
429 lock_sock(sk); 434 lock_sock(sk);
430 /* send close wr request */ 435 /* send close wr request */
431 rc = smc_close_wr(conn); 436 rc = smc_close_wr(conn);
@@ -439,7 +444,7 @@ again:
439 if (!smc_cdc_rxed_any_close(conn)) 444 if (!smc_cdc_rxed_any_close(conn))
440 smc_close_stream_wait(smc, timeout); 445 smc_close_stream_wait(smc, timeout);
441 release_sock(sk); 446 release_sock(sk);
442 cancel_work_sync(&conn->tx_work); 447 cancel_delayed_work_sync(&conn->tx_work);
443 lock_sock(sk); 448 lock_sock(sk);
444 /* confirm close from peer */ 449 /* confirm close from peer */
445 rc = smc_close_wr(conn); 450 rc = smc_close_wr(conn);
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
index 4a3d99a8d7cb..ed82506b1b0a 100644
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 1a16d51e2330..94f21116dac5 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
@@ -25,8 +26,9 @@
25#include "smc_cdc.h" 26#include "smc_cdc.h"
26#include "smc_close.h" 27#include "smc_close.h"
27 28
28#define SMC_LGR_NUM_INCR 256 29#define SMC_LGR_NUM_INCR 256
29#define SMC_LGR_FREE_DELAY (600 * HZ) 30#define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
31#define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10)
30 32
31static u32 smc_lgr_num; /* unique link group number */ 33static u32 smc_lgr_num; /* unique link group number */
32 34
@@ -107,8 +109,15 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
107 __smc_lgr_unregister_conn(conn); 109 __smc_lgr_unregister_conn(conn);
108 } 110 }
109 write_unlock_bh(&lgr->conns_lock); 111 write_unlock_bh(&lgr->conns_lock);
110 if (reduced && !lgr->conns_num) 112 if (!reduced || lgr->conns_num)
111 schedule_delayed_work(&lgr->free_work, SMC_LGR_FREE_DELAY); 113 return;
114 /* client link group creation always follows the server link group
115 * creation. For client use a somewhat higher removal delay time,
116 * otherwise there is a risk of out-of-sync link groups.
117 */
118 mod_delayed_work(system_wq, &lgr->free_work,
119 lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
120 SMC_LGR_FREE_DELAY_SERV);
112} 121}
113 122
114static void smc_lgr_free_work(struct work_struct *work) 123static void smc_lgr_free_work(struct work_struct *work)
@@ -372,10 +381,14 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
372 if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid, 381 if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid,
373 &gattr)) 382 &gattr))
374 continue; 383 continue;
375 if (gattr.ndev && 384 if (gattr.ndev) {
376 (vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id)) { 385 if (is_vlan_dev(gattr.ndev) &&
377 lnk->gid = gid; 386 vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) {
378 return 0; 387 lnk->gid = gid;
388 dev_put(gattr.ndev);
389 return 0;
390 }
391 dev_put(gattr.ndev);
379 } 392 }
380 } 393 }
381 return -ENODEV; 394 return -ENODEV;
@@ -549,7 +562,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
549{ 562{
550 struct smc_connection *conn = &smc->conn; 563 struct smc_connection *conn = &smc->conn;
551 struct smc_link_group *lgr = conn->lgr; 564 struct smc_link_group *lgr = conn->lgr;
552 struct smc_buf_desc *buf_desc = NULL; 565 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
553 struct list_head *buf_list; 566 struct list_head *buf_list;
554 int bufsize, bufsize_short; 567 int bufsize, bufsize_short;
555 int sk_buf_size; 568 int sk_buf_size;
@@ -562,7 +575,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
562 /* use socket send buffer size (w/o overhead) as start value */ 575 /* use socket send buffer size (w/o overhead) as start value */
563 sk_buf_size = smc->sk.sk_sndbuf / 2; 576 sk_buf_size = smc->sk.sk_sndbuf / 2;
564 577
565 for (bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2); 578 for (bufsize_short = smc_compress_bufsize(sk_buf_size);
566 bufsize_short >= 0; bufsize_short--) { 579 bufsize_short >= 0; bufsize_short--) {
567 580
568 if (is_rmb) { 581 if (is_rmb) {
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 19c44bf4e391..fe691bf9af91 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 547e0e113b17..90f1a7f9085c 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
@@ -369,25 +370,17 @@ void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
369 370
370static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport) 371static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
371{ 372{
372 struct net_device *ndev; 373 struct ib_gid_attr gattr;
373 int rc; 374 int rc;
374 375
375 rc = ib_query_gid(smcibdev->ibdev, ibport, 0, 376 rc = ib_query_gid(smcibdev->ibdev, ibport, 0,
376 &smcibdev->gid[ibport - 1], NULL); 377 &smcibdev->gid[ibport - 1], &gattr);
377 /* the SMC protocol requires specification of the roce MAC address; 378 if (rc || !gattr.ndev)
378 * if net_device cannot be determined, it can be derived from gid 0 379 return -ENODEV;
379 */ 380
380 ndev = smcibdev->ibdev->get_netdev(smcibdev->ibdev, ibport); 381 memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN);
381 if (ndev) { 382 dev_put(gattr.ndev);
382 memcpy(&smcibdev->mac, ndev->dev_addr, ETH_ALEN); 383 return 0;
383 } else if (!rc) {
384 memcpy(&smcibdev->mac[ibport - 1][0],
385 &smcibdev->gid[ibport - 1].raw[8], 3);
386 memcpy(&smcibdev->mac[ibport - 1][3],
387 &smcibdev->gid[ibport - 1].raw[13], 3);
388 smcibdev->mac[ibport - 1][0] &= ~0x02;
389 }
390 return rc;
391} 384}
392 385
393/* Create an identifier unique for this instance of SMC-R. 386/* Create an identifier unique for this instance of SMC-R.
@@ -418,6 +411,7 @@ int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
418 &smcibdev->pattr[ibport - 1]); 411 &smcibdev->pattr[ibport - 1]);
419 if (rc) 412 if (rc)
420 goto out; 413 goto out;
414 /* the SMC protocol requires specification of the RoCE MAC address */
421 rc = smc_ib_fill_gid_and_mac(smcibdev, ibport); 415 rc = smc_ib_fill_gid_and_mac(smcibdev, ibport);
422 if (rc) 416 if (rc)
423 goto out; 417 goto out;
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 9b927a33d5e6..e90630dadf8e 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index c2f9165d13ef..92fe4cc8c82c 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index b472f853953a..51b27ce90dbd 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 78f7af28ae4f..74568cdbca70 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
@@ -181,8 +182,10 @@ static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem)
181 sizeof(new_pnetelem->ndev->name)) || 182 sizeof(new_pnetelem->ndev->name)) ||
182 smc_pnet_same_ibname(pnetelem, 183 smc_pnet_same_ibname(pnetelem,
183 new_pnetelem->smcibdev->ibdev->name, 184 new_pnetelem->smcibdev->ibdev->name,
184 new_pnetelem->ib_port)) 185 new_pnetelem->ib_port)) {
186 dev_put(pnetelem->ndev);
185 goto found; 187 goto found;
188 }
186 } 189 }
187 list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist); 190 list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist);
188 rc = 0; 191 rc = 0;
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index c4f1bccd4358..5a29519db976 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index b17a333e9bb0..cbf58637ee14 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
@@ -148,6 +149,8 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
148 read_done = sock_intr_errno(timeo); 149 read_done = sock_intr_errno(timeo);
149 break; 150 break;
150 } 151 }
152 if (!timeo)
153 return -EAGAIN;
151 } 154 }
152 155
153 if (!atomic_read(&conn->bytes_to_rcv)) { 156 if (!atomic_read(&conn->bytes_to_rcv)) {
diff --git a/net/smc/smc_rx.h b/net/smc/smc_rx.h
index b5b80e1f8b0f..3a32b59bf06c 100644
--- a/net/smc/smc_rx.h
+++ b/net/smc/smc_rx.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 3c656beb8820..c48dc2d5fd3a 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
@@ -24,6 +25,8 @@
24#include "smc_cdc.h" 25#include "smc_cdc.h"
25#include "smc_tx.h" 26#include "smc_tx.h"
26 27
28#define SMC_TX_WORK_DELAY HZ
29
27/***************************** sndbuf producer *******************************/ 30/***************************** sndbuf producer *******************************/
28 31
29/* callback implementation for sk.sk_write_space() 32/* callback implementation for sk.sk_write_space()
@@ -394,8 +397,7 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
394 int rc; 397 int rc;
395 398
396 spin_lock_bh(&conn->send_lock); 399 spin_lock_bh(&conn->send_lock);
397 rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], &wr_buf, 400 rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
398 &pend);
399 if (rc < 0) { 401 if (rc < 0) {
400 if (rc == -EBUSY) { 402 if (rc == -EBUSY) {
401 struct smc_sock *smc = 403 struct smc_sock *smc =
@@ -406,7 +408,8 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
406 goto out_unlock; 408 goto out_unlock;
407 } 409 }
408 rc = 0; 410 rc = 0;
409 schedule_work(&conn->tx_work); 411 schedule_delayed_work(&conn->tx_work,
412 SMC_TX_WORK_DELAY);
410 } 413 }
411 goto out_unlock; 414 goto out_unlock;
412 } 415 }
@@ -430,7 +433,7 @@ out_unlock:
430 */ 433 */
431static void smc_tx_work(struct work_struct *work) 434static void smc_tx_work(struct work_struct *work)
432{ 435{
433 struct smc_connection *conn = container_of(work, 436 struct smc_connection *conn = container_of(to_delayed_work(work),
434 struct smc_connection, 437 struct smc_connection,
435 tx_work); 438 tx_work);
436 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 439 struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
@@ -463,12 +466,12 @@ void smc_tx_consumer_update(struct smc_connection *conn)
463 ((to_confirm > conn->rmbe_update_limit) && 466 ((to_confirm > conn->rmbe_update_limit) &&
464 ((to_confirm > (conn->rmbe_size / 2)) || 467 ((to_confirm > (conn->rmbe_size / 2)) ||
465 conn->local_rx_ctrl.prod_flags.write_blocked))) { 468 conn->local_rx_ctrl.prod_flags.write_blocked))) {
466 rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], 469 rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
467 &wr_buf, &pend);
468 if (!rc) 470 if (!rc)
469 rc = smc_cdc_msg_send(conn, wr_buf, pend); 471 rc = smc_cdc_msg_send(conn, wr_buf, pend);
470 if (rc < 0) { 472 if (rc < 0) {
471 schedule_work(&conn->tx_work); 473 schedule_delayed_work(&conn->tx_work,
474 SMC_TX_WORK_DELAY);
472 return; 475 return;
473 } 476 }
474 smc_curs_write(&conn->rx_curs_confirmed, 477 smc_curs_write(&conn->rx_curs_confirmed,
@@ -487,6 +490,6 @@ void smc_tx_consumer_update(struct smc_connection *conn)
487void smc_tx_init(struct smc_sock *smc) 490void smc_tx_init(struct smc_sock *smc)
488{ 491{
489 smc->sk.sk_write_space = smc_tx_write_space; 492 smc->sk.sk_write_space = smc_tx_write_space;
490 INIT_WORK(&smc->conn.tx_work, smc_tx_work); 493 INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
491 spin_lock_init(&smc->conn.send_lock); 494 spin_lock_init(&smc->conn.send_lock);
492} 495}
diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h
index 1d6a0dcdcfe6..78255964fa4d 100644
--- a/net/smc/smc_tx.h
+++ b/net/smc/smc_tx.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index ab56bda66783..de4537f66832 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
@@ -244,7 +245,7 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
244 int rc; 245 int rc;
245 246
246 ib_req_notify_cq(link->smcibdev->roce_cq_send, 247 ib_req_notify_cq(link->smcibdev->roce_cq_send,
247 IB_CQ_SOLICITED_MASK | IB_CQ_REPORT_MISSED_EVENTS); 248 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
248 pend = container_of(priv, struct smc_wr_tx_pend, priv); 249 pend = container_of(priv, struct smc_wr_tx_pend, priv);
249 rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], 250 rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx],
250 &failed_wr); 251 &failed_wr);
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 45eb53833052..2acf12b06063 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 * 4 *
diff --git a/net/socket.c b/net/socket.c
index c729625eb5d3..42d8e9c9ccd5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -568,7 +568,6 @@ struct socket *sock_alloc(void)
568 568
569 sock = SOCKET_I(inode); 569 sock = SOCKET_I(inode);
570 570
571 kmemcheck_annotate_bitfield(sock, type);
572 inode->i_ino = get_next_ino(); 571 inode->i_ino = get_next_ino();
573 inode->i_mode = S_IFSOCK | S_IRWXUGO; 572 inode->i_mode = S_IFSOCK | S_IRWXUGO;
574 inode->i_uid = current_fsuid(); 573 inode->i_uid = current_fsuid();
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index d4ea46a5f233..c5fda15ba319 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -49,7 +49,7 @@ static void strp_abort_strp(struct strparser *strp, int err)
49{ 49{
50 /* Unrecoverable error in receive */ 50 /* Unrecoverable error in receive */
51 51
52 del_timer(&strp->msg_timer); 52 cancel_delayed_work(&strp->msg_timer_work);
53 53
54 if (strp->stopped) 54 if (strp->stopped)
55 return; 55 return;
@@ -68,7 +68,7 @@ static void strp_abort_strp(struct strparser *strp, int err)
68static void strp_start_timer(struct strparser *strp, long timeo) 68static void strp_start_timer(struct strparser *strp, long timeo)
69{ 69{
70 if (timeo) 70 if (timeo)
71 mod_timer(&strp->msg_timer, timeo); 71 mod_delayed_work(strp_wq, &strp->msg_timer_work, timeo);
72} 72}
73 73
74/* Lower lock held */ 74/* Lower lock held */
@@ -319,7 +319,7 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
319 eaten += (cand_len - extra); 319 eaten += (cand_len - extra);
320 320
321 /* Hurray, we have a new message! */ 321 /* Hurray, we have a new message! */
322 del_timer(&strp->msg_timer); 322 cancel_delayed_work(&strp->msg_timer_work);
323 strp->skb_head = NULL; 323 strp->skb_head = NULL;
324 STRP_STATS_INCR(strp->stats.msgs); 324 STRP_STATS_INCR(strp->stats.msgs);
325 325
@@ -450,9 +450,10 @@ static void strp_work(struct work_struct *w)
450 do_strp_work(container_of(w, struct strparser, work)); 450 do_strp_work(container_of(w, struct strparser, work));
451} 451}
452 452
453static void strp_msg_timeout(unsigned long arg) 453static void strp_msg_timeout(struct work_struct *w)
454{ 454{
455 struct strparser *strp = (struct strparser *)arg; 455 struct strparser *strp = container_of(w, struct strparser,
456 msg_timer_work.work);
456 457
457 /* Message assembly timed out */ 458 /* Message assembly timed out */
458 STRP_STATS_INCR(strp->stats.msg_timeouts); 459 STRP_STATS_INCR(strp->stats.msg_timeouts);
@@ -505,9 +506,7 @@ int strp_init(struct strparser *strp, struct sock *sk,
505 strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done; 506 strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done;
506 strp->cb.abort_parser = cb->abort_parser ? : strp_abort_strp; 507 strp->cb.abort_parser = cb->abort_parser ? : strp_abort_strp;
507 508
508 setup_timer(&strp->msg_timer, strp_msg_timeout, 509 INIT_DELAYED_WORK(&strp->msg_timer_work, strp_msg_timeout);
509 (unsigned long)strp);
510
511 INIT_WORK(&strp->work, strp_work); 510 INIT_WORK(&strp->work, strp_work);
512 511
513 return 0; 512 return 0;
@@ -532,7 +531,7 @@ void strp_done(struct strparser *strp)
532{ 531{
533 WARN_ON(!strp->stopped); 532 WARN_ON(!strp->stopped);
534 533
535 del_timer_sync(&strp->msg_timer); 534 cancel_delayed_work_sync(&strp->msg_timer_work);
536 cancel_work_sync(&strp->work); 535 cancel_work_sync(&strp->work);
537 536
538 if (strp->skb_head) { 537 if (strp->skb_head) {
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index ea7ffa12e0f9..090658c3da12 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for Linux kernel SUN RPC 3# Makefile for Linux kernel SUN RPC
3# 4#
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 14e9e53e63d5..c374268b008f 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for Linux kernel rpcsec_gss implementation 3# Makefile for Linux kernel rpcsec_gss implementation
3# 4#
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 7b1ee5a0b03c..73165e9ca5bf 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -855,11 +855,13 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
855 return stat; 855 return stat;
856 if (integ_len > buf->len) 856 if (integ_len > buf->len)
857 return stat; 857 return stat;
858 if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) 858 if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) {
859 BUG(); 859 WARN_ON_ONCE(1);
860 return stat;
861 }
860 /* copy out mic... */ 862 /* copy out mic... */
861 if (read_u32_from_xdr_buf(buf, integ_len, &mic.len)) 863 if (read_u32_from_xdr_buf(buf, integ_len, &mic.len))
862 BUG(); 864 return stat;
863 if (mic.len > RPC_MAX_AUTH_SIZE) 865 if (mic.len > RPC_MAX_AUTH_SIZE)
864 return stat; 866 return stat;
865 mic.data = kmalloc(mic.len, GFP_KERNEL); 867 mic.data = kmalloc(mic.len, GFP_KERNEL);
@@ -1611,8 +1613,10 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
1611 BUG_ON(integ_len % 4); 1613 BUG_ON(integ_len % 4);
1612 *p++ = htonl(integ_len); 1614 *p++ = htonl(integ_len);
1613 *p++ = htonl(gc->gc_seq); 1615 *p++ = htonl(gc->gc_seq);
1614 if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) 1616 if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) {
1615 BUG(); 1617 WARN_ON_ONCE(1);
1618 goto out_err;
1619 }
1616 if (resbuf->tail[0].iov_base == NULL) { 1620 if (resbuf->tail[0].iov_base == NULL) {
1617 if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE) 1621 if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE)
1618 goto out_err; 1622 goto out_err;
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 5f3d527dff65..75d72e109a04 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * linux/net/sunrpc/auth_null.c 3 * linux/net/sunrpc/auth_null.c
3 * 4 *
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 82337e1ec9cd..dafd6b870ba3 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * linux/net/sunrpc/auth_unix.c 3 * linux/net/sunrpc/auth_unix.c
3 * 4 *
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 2ad827db2704..a801da812f86 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1491,7 +1491,6 @@ rpc_restart_call(struct rpc_task *task)
1491} 1491}
1492EXPORT_SYMBOL_GPL(rpc_restart_call); 1492EXPORT_SYMBOL_GPL(rpc_restart_call);
1493 1493
1494#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
1495const char 1494const char
1496*rpc_proc_name(const struct rpc_task *task) 1495*rpc_proc_name(const struct rpc_task *task)
1497{ 1496{
@@ -1505,7 +1504,6 @@ const char
1505 } else 1504 } else
1506 return "no proc"; 1505 return "no proc";
1507} 1506}
1508#endif
1509 1507
1510/* 1508/*
1511 * 0. Initial state 1509 * 0. Initial state
@@ -1519,6 +1517,7 @@ call_start(struct rpc_task *task)
1519 struct rpc_clnt *clnt = task->tk_client; 1517 struct rpc_clnt *clnt = task->tk_client;
1520 int idx = task->tk_msg.rpc_proc->p_statidx; 1518 int idx = task->tk_msg.rpc_proc->p_statidx;
1521 1519
1520 trace_rpc_request(task);
1522 dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid, 1521 dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
1523 clnt->cl_program->name, clnt->cl_vers, 1522 clnt->cl_program->name, clnt->cl_vers,
1524 rpc_proc_name(task), 1523 rpc_proc_name(task),
@@ -1586,6 +1585,7 @@ call_reserveresult(struct rpc_task *task)
1586 switch (status) { 1585 switch (status) {
1587 case -ENOMEM: 1586 case -ENOMEM:
1588 rpc_delay(task, HZ >> 2); 1587 rpc_delay(task, HZ >> 2);
1588 /* fall through */
1589 case -EAGAIN: /* woken up; retry */ 1589 case -EAGAIN: /* woken up; retry */
1590 task->tk_action = call_retry_reserve; 1590 task->tk_action = call_retry_reserve;
1591 return; 1591 return;
@@ -1647,10 +1647,13 @@ call_refreshresult(struct rpc_task *task)
1647 /* Use rate-limiting and a max number of retries if refresh 1647 /* Use rate-limiting and a max number of retries if refresh
1648 * had status 0 but failed to update the cred. 1648 * had status 0 but failed to update the cred.
1649 */ 1649 */
1650 /* fall through */
1650 case -ETIMEDOUT: 1651 case -ETIMEDOUT:
1651 rpc_delay(task, 3*HZ); 1652 rpc_delay(task, 3*HZ);
1653 /* fall through */
1652 case -EAGAIN: 1654 case -EAGAIN:
1653 status = -EACCES; 1655 status = -EACCES;
1656 /* fall through */
1654 case -EKEYEXPIRED: 1657 case -EKEYEXPIRED:
1655 if (!task->tk_cred_retry) 1658 if (!task->tk_cred_retry)
1656 break; 1659 break;
@@ -1911,6 +1914,7 @@ call_connect_status(struct rpc_task *task)
1911 task->tk_action = call_bind; 1914 task->tk_action = call_bind;
1912 return; 1915 return;
1913 } 1916 }
1917 /* fall through */
1914 case -ECONNRESET: 1918 case -ECONNRESET:
1915 case -ECONNABORTED: 1919 case -ECONNABORTED:
1916 case -ENETUNREACH: 1920 case -ENETUNREACH:
@@ -1924,6 +1928,7 @@ call_connect_status(struct rpc_task *task)
1924 break; 1928 break;
1925 /* retry with existing socket, after a delay */ 1929 /* retry with existing socket, after a delay */
1926 rpc_delay(task, 3*HZ); 1930 rpc_delay(task, 3*HZ);
1931 /* fall through */
1927 case -EAGAIN: 1932 case -EAGAIN:
1928 /* Check for timeouts before looping back to call_bind */ 1933 /* Check for timeouts before looping back to call_bind */
1929 case -ETIMEDOUT: 1934 case -ETIMEDOUT:
@@ -2025,6 +2030,7 @@ call_transmit_status(struct rpc_task *task)
2025 rpc_exit(task, task->tk_status); 2030 rpc_exit(task, task->tk_status);
2026 break; 2031 break;
2027 } 2032 }
2033 /* fall through */
2028 case -ECONNRESET: 2034 case -ECONNRESET:
2029 case -ECONNABORTED: 2035 case -ECONNABORTED:
2030 case -EADDRINUSE: 2036 case -EADDRINUSE:
@@ -2145,6 +2151,7 @@ call_status(struct rpc_task *task)
2145 * were a timeout. 2151 * were a timeout.
2146 */ 2152 */
2147 rpc_delay(task, 3*HZ); 2153 rpc_delay(task, 3*HZ);
2154 /* fall through */
2148 case -ETIMEDOUT: 2155 case -ETIMEDOUT:
2149 task->tk_action = call_timeout; 2156 task->tk_action = call_timeout;
2150 break; 2157 break;
@@ -2152,14 +2159,17 @@ call_status(struct rpc_task *task)
2152 case -ECONNRESET: 2159 case -ECONNRESET:
2153 case -ECONNABORTED: 2160 case -ECONNABORTED:
2154 rpc_force_rebind(clnt); 2161 rpc_force_rebind(clnt);
2162 /* fall through */
2155 case -EADDRINUSE: 2163 case -EADDRINUSE:
2156 rpc_delay(task, 3*HZ); 2164 rpc_delay(task, 3*HZ);
2165 /* fall through */
2157 case -EPIPE: 2166 case -EPIPE:
2158 case -ENOTCONN: 2167 case -ENOTCONN:
2159 task->tk_action = call_bind; 2168 task->tk_action = call_bind;
2160 break; 2169 break;
2161 case -ENOBUFS: 2170 case -ENOBUFS:
2162 rpc_delay(task, HZ>>2); 2171 rpc_delay(task, HZ>>2);
2172 /* fall through */
2163 case -EAGAIN: 2173 case -EAGAIN:
2164 task->tk_action = call_transmit; 2174 task->tk_action = call_transmit;
2165 break; 2175 break;
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index c8fd0b6c1618..e980d2a493de 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/** 2/**
2 * debugfs interface for sunrpc 3 * debugfs interface for sunrpc
3 * 4 *
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index 394ce523174c..7ec10b92bea1 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __SUNRPC_NETNS_H__ 2#ifndef __SUNRPC_NETNS_H__
2#define __SUNRPC_NETNS_H__ 3#define __SUNRPC_NETNS_H__
3 4
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 61a504fb1ae2..7803f3b6aa53 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1410,8 +1410,8 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
1410 return PTR_ERR(gssd_dentry); 1410 return PTR_ERR(gssd_dentry);
1411 } 1411 }
1412 1412
1413 dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n", 1413 dprintk("RPC: sending pipefs MOUNT notification for net %x%s\n",
1414 net, NET_NAME(net)); 1414 net->ns.inum, NET_NAME(net));
1415 mutex_lock(&sn->pipefs_sb_lock); 1415 mutex_lock(&sn->pipefs_sb_lock);
1416 sn->pipefs_sb = sb; 1416 sn->pipefs_sb = sb;
1417 err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list, 1417 err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
@@ -1462,8 +1462,8 @@ static void rpc_kill_sb(struct super_block *sb)
1462 goto out; 1462 goto out;
1463 } 1463 }
1464 sn->pipefs_sb = NULL; 1464 sn->pipefs_sb = NULL;
1465 dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n", 1465 dprintk("RPC: sending pipefs UMOUNT notification for net %x%s\n",
1466 net, NET_NAME(net)); 1466 net->ns.inum, NET_NAME(net));
1467 blocking_notifier_call_chain(&rpc_pipefs_notifier_list, 1467 blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
1468 RPC_PIPEFS_UMOUNT, 1468 RPC_PIPEFS_UMOUNT,
1469 sb); 1469 sb);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index ea0676f199c8..c526f8fb37c9 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -216,9 +216,9 @@ static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt,
216 smp_wmb(); 216 smp_wmb();
217 sn->rpcb_users = 1; 217 sn->rpcb_users = 1;
218 dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " 218 dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: "
219 "%p, rpcb_local_clnt4: %p) for net %p%s\n", 219 "%p, rpcb_local_clnt4: %p) for net %x%s\n",
220 sn->rpcb_local_clnt, sn->rpcb_local_clnt4, 220 sn->rpcb_local_clnt, sn->rpcb_local_clnt4,
221 net, (net == &init_net) ? " (init_net)" : ""); 221 net->ns.inum, (net == &init_net) ? " (init_net)" : "");
222} 222}
223 223
224/* 224/*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 0cc83839c13c..b1b49edd7c4d 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -44,7 +44,7 @@ static mempool_t *rpc_buffer_mempool __read_mostly;
44 44
45static void rpc_async_schedule(struct work_struct *); 45static void rpc_async_schedule(struct work_struct *);
46static void rpc_release_task(struct rpc_task *task); 46static void rpc_release_task(struct rpc_task *task);
47static void __rpc_queue_timer_fn(unsigned long ptr); 47static void __rpc_queue_timer_fn(struct timer_list *t);
48 48
49/* 49/*
50 * RPC tasks sit here while waiting for conditions to improve. 50 * RPC tasks sit here while waiting for conditions to improve.
@@ -228,7 +228,7 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c
228 queue->maxpriority = nr_queues - 1; 228 queue->maxpriority = nr_queues - 1;
229 rpc_reset_waitqueue_priority(queue); 229 rpc_reset_waitqueue_priority(queue);
230 queue->qlen = 0; 230 queue->qlen = 0;
231 setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue); 231 timer_setup(&queue->timer_list.timer, __rpc_queue_timer_fn, 0);
232 INIT_LIST_HEAD(&queue->timer_list.list); 232 INIT_LIST_HEAD(&queue->timer_list.list);
233 rpc_assign_waitqueue_name(queue, qname); 233 rpc_assign_waitqueue_name(queue, qname);
234} 234}
@@ -274,10 +274,9 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task)
274 274
275static void rpc_set_active(struct rpc_task *task) 275static void rpc_set_active(struct rpc_task *task)
276{ 276{
277 trace_rpc_task_begin(task->tk_client, task, NULL);
278
279 rpc_task_set_debuginfo(task); 277 rpc_task_set_debuginfo(task);
280 set_bit(RPC_TASK_ACTIVE, &task->tk_runstate); 278 set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
279 trace_rpc_task_begin(task->tk_client, task, NULL);
281} 280}
282 281
283/* 282/*
@@ -635,9 +634,9 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
635} 634}
636EXPORT_SYMBOL_GPL(rpc_wake_up_status); 635EXPORT_SYMBOL_GPL(rpc_wake_up_status);
637 636
638static void __rpc_queue_timer_fn(unsigned long ptr) 637static void __rpc_queue_timer_fn(struct timer_list *t)
639{ 638{
640 struct rpc_wait_queue *queue = (struct rpc_wait_queue *)ptr; 639 struct rpc_wait_queue *queue = from_timer(queue, t, timer_list.timer);
641 struct rpc_task *task, *n; 640 struct rpc_task *task, *n;
642 unsigned long expires, now, timeo; 641 unsigned long expires, now, timeo;
643 642
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index c73de181467a..56f9eff74150 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -65,10 +65,13 @@ err_proc:
65 65
66static __net_exit void sunrpc_exit_net(struct net *net) 66static __net_exit void sunrpc_exit_net(struct net *net)
67{ 67{
68 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
69
68 rpc_pipefs_exit_net(net); 70 rpc_pipefs_exit_net(net);
69 unix_gid_cache_destroy(net); 71 unix_gid_cache_destroy(net);
70 ip_map_cache_destroy(net); 72 ip_map_cache_destroy(net);
71 rpc_proc_exit(net); 73 rpc_proc_exit(net);
74 WARN_ON_ONCE(!list_empty(&sn->all_clients));
72} 75}
73 76
74static struct pernet_operations sunrpc_net_ops = { 77static struct pernet_operations sunrpc_net_ops = {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index aa04666f929d..387cc4add6f6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -50,7 +50,7 @@ EXPORT_SYMBOL_GPL(svc_pool_map);
50static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */ 50static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
51 51
52static int 52static int
53param_set_pool_mode(const char *val, struct kernel_param *kp) 53param_set_pool_mode(const char *val, const struct kernel_param *kp)
54{ 54{
55 int *ip = (int *)kp->arg; 55 int *ip = (int *)kp->arg;
56 struct svc_pool_map *m = &svc_pool_map; 56 struct svc_pool_map *m = &svc_pool_map;
@@ -80,7 +80,7 @@ out:
80} 80}
81 81
82static int 82static int
83param_get_pool_mode(char *buf, struct kernel_param *kp) 83param_get_pool_mode(char *buf, const struct kernel_param *kp)
84{ 84{
85 int *ip = (int *)kp->arg; 85 int *ip = (int *)kp->arg;
86 86
@@ -455,7 +455,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
455 serv->sv_xdrsize = xdrsize; 455 serv->sv_xdrsize = xdrsize;
456 INIT_LIST_HEAD(&serv->sv_tempsocks); 456 INIT_LIST_HEAD(&serv->sv_tempsocks);
457 INIT_LIST_HEAD(&serv->sv_permsocks); 457 INIT_LIST_HEAD(&serv->sv_permsocks);
458 init_timer(&serv->sv_temptimer); 458 timer_setup(&serv->sv_temptimer, NULL, 0);
459 spin_lock_init(&serv->sv_lock); 459 spin_lock_init(&serv->sv_lock);
460 460
461 __svc_init_bc(serv); 461 __svc_init_bc(serv);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d16a8b423c20..f9307bd6644b 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -28,7 +28,7 @@ module_param(svc_rpc_per_connection_limit, uint, 0644);
28static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); 28static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
29static int svc_deferred_recv(struct svc_rqst *rqstp); 29static int svc_deferred_recv(struct svc_rqst *rqstp);
30static struct cache_deferred_req *svc_defer(struct cache_req *req); 30static struct cache_deferred_req *svc_defer(struct cache_req *req);
31static void svc_age_temp_xprts(unsigned long closure); 31static void svc_age_temp_xprts(struct timer_list *t);
32static void svc_delete_xprt(struct svc_xprt *xprt); 32static void svc_delete_xprt(struct svc_xprt *xprt);
33 33
34/* apparently the "standard" is that clients close 34/* apparently the "standard" is that clients close
@@ -250,9 +250,9 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
250 svc_xprt_received(new); 250 svc_xprt_received(new);
251} 251}
252 252
253int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, 253static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
254 struct net *net, const int family, 254 struct net *net, const int family,
255 const unsigned short port, int flags) 255 const unsigned short port, int flags)
256{ 256{
257 struct svc_xprt_class *xcl; 257 struct svc_xprt_class *xcl;
258 258
@@ -380,7 +380,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
380 struct svc_pool *pool; 380 struct svc_pool *pool;
381 struct svc_rqst *rqstp = NULL; 381 struct svc_rqst *rqstp = NULL;
382 int cpu; 382 int cpu;
383 bool queued = false;
384 383
385 if (!svc_xprt_has_something_to_do(xprt)) 384 if (!svc_xprt_has_something_to_do(xprt))
386 goto out; 385 goto out;
@@ -401,58 +400,25 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
401 400
402 atomic_long_inc(&pool->sp_stats.packets); 401 atomic_long_inc(&pool->sp_stats.packets);
403 402
404redo_search: 403 dprintk("svc: transport %p put into queue\n", xprt);
404 spin_lock_bh(&pool->sp_lock);
405 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
406 pool->sp_stats.sockets_queued++;
407 spin_unlock_bh(&pool->sp_lock);
408
405 /* find a thread for this xprt */ 409 /* find a thread for this xprt */
406 rcu_read_lock(); 410 rcu_read_lock();
407 list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { 411 list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
408 /* Do a lockless check first */ 412 if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
409 if (test_bit(RQ_BUSY, &rqstp->rq_flags))
410 continue; 413 continue;
411
412 /*
413 * Once the xprt has been queued, it can only be dequeued by
414 * the task that intends to service it. All we can do at that
415 * point is to try to wake this thread back up so that it can
416 * do so.
417 */
418 if (!queued) {
419 spin_lock_bh(&rqstp->rq_lock);
420 if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) {
421 /* already busy, move on... */
422 spin_unlock_bh(&rqstp->rq_lock);
423 continue;
424 }
425
426 /* this one will do */
427 rqstp->rq_xprt = xprt;
428 svc_xprt_get(xprt);
429 spin_unlock_bh(&rqstp->rq_lock);
430 }
431 rcu_read_unlock();
432
433 atomic_long_inc(&pool->sp_stats.threads_woken); 414 atomic_long_inc(&pool->sp_stats.threads_woken);
434 wake_up_process(rqstp->rq_task); 415 wake_up_process(rqstp->rq_task);
435 put_cpu(); 416 goto out_unlock;
436 goto out;
437 }
438 rcu_read_unlock();
439
440 /*
441 * We didn't find an idle thread to use, so we need to queue the xprt.
442 * Do so and then search again. If we find one, we can't hook this one
443 * up to it directly but we can wake the thread up in the hopes that it
444 * will pick it up once it searches for a xprt to service.
445 */
446 if (!queued) {
447 queued = true;
448 dprintk("svc: transport %p put into queue\n", xprt);
449 spin_lock_bh(&pool->sp_lock);
450 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
451 pool->sp_stats.sockets_queued++;
452 spin_unlock_bh(&pool->sp_lock);
453 goto redo_search;
454 } 417 }
418 set_bit(SP_CONGESTED, &pool->sp_flags);
455 rqstp = NULL; 419 rqstp = NULL;
420out_unlock:
421 rcu_read_unlock();
456 put_cpu(); 422 put_cpu();
457out: 423out:
458 trace_svc_xprt_do_enqueue(xprt, rqstp); 424 trace_svc_xprt_do_enqueue(xprt, rqstp);
@@ -721,38 +687,25 @@ rqst_should_sleep(struct svc_rqst *rqstp)
721 687
722static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) 688static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
723{ 689{
724 struct svc_xprt *xprt;
725 struct svc_pool *pool = rqstp->rq_pool; 690 struct svc_pool *pool = rqstp->rq_pool;
726 long time_left = 0; 691 long time_left = 0;
727 692
728 /* rq_xprt should be clear on entry */ 693 /* rq_xprt should be clear on entry */
729 WARN_ON_ONCE(rqstp->rq_xprt); 694 WARN_ON_ONCE(rqstp->rq_xprt);
730 695
731 /* Normally we will wait up to 5 seconds for any required 696 rqstp->rq_xprt = svc_xprt_dequeue(pool);
732 * cache information to be provided. 697 if (rqstp->rq_xprt)
733 */ 698 goto out_found;
734 rqstp->rq_chandle.thread_wait = 5*HZ;
735
736 xprt = svc_xprt_dequeue(pool);
737 if (xprt) {
738 rqstp->rq_xprt = xprt;
739
740 /* As there is a shortage of threads and this request
741 * had to be queued, don't allow the thread to wait so
742 * long for cache updates.
743 */
744 rqstp->rq_chandle.thread_wait = 1*HZ;
745 clear_bit(SP_TASK_PENDING, &pool->sp_flags);
746 return xprt;
747 }
748 699
749 /* 700 /*
750 * We have to be able to interrupt this wait 701 * We have to be able to interrupt this wait
751 * to bring down the daemons ... 702 * to bring down the daemons ...
752 */ 703 */
753 set_current_state(TASK_INTERRUPTIBLE); 704 set_current_state(TASK_INTERRUPTIBLE);
705 smp_mb__before_atomic();
706 clear_bit(SP_CONGESTED, &pool->sp_flags);
754 clear_bit(RQ_BUSY, &rqstp->rq_flags); 707 clear_bit(RQ_BUSY, &rqstp->rq_flags);
755 smp_mb(); 708 smp_mb__after_atomic();
756 709
757 if (likely(rqst_should_sleep(rqstp))) 710 if (likely(rqst_should_sleep(rqstp)))
758 time_left = schedule_timeout(timeout); 711 time_left = schedule_timeout(timeout);
@@ -761,13 +714,11 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
761 714
762 try_to_freeze(); 715 try_to_freeze();
763 716
764 spin_lock_bh(&rqstp->rq_lock);
765 set_bit(RQ_BUSY, &rqstp->rq_flags); 717 set_bit(RQ_BUSY, &rqstp->rq_flags);
766 spin_unlock_bh(&rqstp->rq_lock); 718 smp_mb__after_atomic();
767 719 rqstp->rq_xprt = svc_xprt_dequeue(pool);
768 xprt = rqstp->rq_xprt; 720 if (rqstp->rq_xprt)
769 if (xprt != NULL) 721 goto out_found;
770 return xprt;
771 722
772 if (!time_left) 723 if (!time_left)
773 atomic_long_inc(&pool->sp_stats.threads_timedout); 724 atomic_long_inc(&pool->sp_stats.threads_timedout);
@@ -775,6 +726,15 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
775 if (signalled() || kthread_should_stop()) 726 if (signalled() || kthread_should_stop())
776 return ERR_PTR(-EINTR); 727 return ERR_PTR(-EINTR);
777 return ERR_PTR(-EAGAIN); 728 return ERR_PTR(-EAGAIN);
729out_found:
730 /* Normally we will wait up to 5 seconds for any required
731 * cache information to be provided.
732 */
733 if (!test_bit(SP_CONGESTED, &pool->sp_flags))
734 rqstp->rq_chandle.thread_wait = 5*HZ;
735 else
736 rqstp->rq_chandle.thread_wait = 1*HZ;
737 return rqstp->rq_xprt;
778} 738}
779 739
780static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt) 740static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
@@ -785,8 +745,7 @@ static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt
785 serv->sv_tmpcnt++; 745 serv->sv_tmpcnt++;
786 if (serv->sv_temptimer.function == NULL) { 746 if (serv->sv_temptimer.function == NULL) {
787 /* setup timer to age temp transports */ 747 /* setup timer to age temp transports */
788 setup_timer(&serv->sv_temptimer, svc_age_temp_xprts, 748 serv->sv_temptimer.function = svc_age_temp_xprts;
789 (unsigned long)serv);
790 mod_timer(&serv->sv_temptimer, 749 mod_timer(&serv->sv_temptimer,
791 jiffies + svc_conn_age_period * HZ); 750 jiffies + svc_conn_age_period * HZ);
792 } 751 }
@@ -960,9 +919,9 @@ out:
960 * Timer function to close old temporary transports, using 919 * Timer function to close old temporary transports, using
961 * a mark-and-sweep algorithm. 920 * a mark-and-sweep algorithm.
962 */ 921 */
963static void svc_age_temp_xprts(unsigned long closure) 922static void svc_age_temp_xprts(struct timer_list *t)
964{ 923{
965 struct svc_serv *serv = (struct svc_serv *)closure; 924 struct svc_serv *serv = from_timer(serv, t, sv_temptimer);
966 struct svc_xprt *xprt; 925 struct svc_xprt *xprt;
967 struct list_head *le, *next; 926 struct list_head *le, *next;
968 927
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index e741ec2b4d8e..333b9d697ae5 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -696,9 +696,9 @@ xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
696} 696}
697 697
698static void 698static void
699xprt_init_autodisconnect(unsigned long data) 699xprt_init_autodisconnect(struct timer_list *t)
700{ 700{
701 struct rpc_xprt *xprt = (struct rpc_xprt *)data; 701 struct rpc_xprt *xprt = from_timer(xprt, t, timer);
702 702
703 spin_lock(&xprt->transport_lock); 703 spin_lock(&xprt->transport_lock);
704 if (!list_empty(&xprt->recv)) 704 if (!list_empty(&xprt->recv))
@@ -1139,6 +1139,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
1139 case -EAGAIN: 1139 case -EAGAIN:
1140 xprt_add_backlog(xprt, task); 1140 xprt_add_backlog(xprt, task);
1141 dprintk("RPC: waiting for request slot\n"); 1141 dprintk("RPC: waiting for request slot\n");
1142 /* fall through */
1142 default: 1143 default:
1143 task->tk_status = -EAGAIN; 1144 task->tk_status = -EAGAIN;
1144 } 1145 }
@@ -1333,7 +1334,7 @@ void xprt_release(struct rpc_task *task)
1333 rpc_count_iostats(task, task->tk_client->cl_metrics); 1334 rpc_count_iostats(task, task->tk_client->cl_metrics);
1334 spin_lock(&xprt->recv_lock); 1335 spin_lock(&xprt->recv_lock);
1335 if (!list_empty(&req->rq_list)) { 1336 if (!list_empty(&req->rq_list)) {
1336 list_del(&req->rq_list); 1337 list_del_init(&req->rq_list);
1337 xprt_wait_on_pinned_rqst(req); 1338 xprt_wait_on_pinned_rqst(req);
1338 } 1339 }
1339 spin_unlock(&xprt->recv_lock); 1340 spin_unlock(&xprt->recv_lock);
@@ -1422,10 +1423,9 @@ found:
1422 xprt->idle_timeout = 0; 1423 xprt->idle_timeout = 0;
1423 INIT_WORK(&xprt->task_cleanup, xprt_autoclose); 1424 INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
1424 if (xprt_has_timer(xprt)) 1425 if (xprt_has_timer(xprt))
1425 setup_timer(&xprt->timer, xprt_init_autodisconnect, 1426 timer_setup(&xprt->timer, xprt_init_autodisconnect, 0);
1426 (unsigned long)xprt);
1427 else 1427 else
1428 init_timer(&xprt->timer); 1428 timer_setup(&xprt->timer, NULL, 0);
1429 1429
1430 if (strlen(args->servername) > RPC_MAXNETNAMELEN) { 1430 if (strlen(args->servername) > RPC_MAXNETNAMELEN) {
1431 xprt_destroy(xprt); 1431 xprt_destroy(xprt);
@@ -1445,6 +1445,23 @@ out:
1445 return xprt; 1445 return xprt;
1446} 1446}
1447 1447
1448static void xprt_destroy_cb(struct work_struct *work)
1449{
1450 struct rpc_xprt *xprt =
1451 container_of(work, struct rpc_xprt, task_cleanup);
1452
1453 rpc_xprt_debugfs_unregister(xprt);
1454 rpc_destroy_wait_queue(&xprt->binding);
1455 rpc_destroy_wait_queue(&xprt->pending);
1456 rpc_destroy_wait_queue(&xprt->sending);
1457 rpc_destroy_wait_queue(&xprt->backlog);
1458 kfree(xprt->servername);
1459 /*
1460 * Tear down transport state and free the rpc_xprt
1461 */
1462 xprt->ops->destroy(xprt);
1463}
1464
1448/** 1465/**
1449 * xprt_destroy - destroy an RPC transport, killing off all requests. 1466 * xprt_destroy - destroy an RPC transport, killing off all requests.
1450 * @xprt: transport to destroy 1467 * @xprt: transport to destroy
@@ -1454,22 +1471,19 @@ static void xprt_destroy(struct rpc_xprt *xprt)
1454{ 1471{
1455 dprintk("RPC: destroying transport %p\n", xprt); 1472 dprintk("RPC: destroying transport %p\n", xprt);
1456 1473
1457 /* Exclude transport connect/disconnect handlers */ 1474 /*
1475 * Exclude transport connect/disconnect handlers and autoclose
1476 */
1458 wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE); 1477 wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE);
1459 1478
1460 del_timer_sync(&xprt->timer); 1479 del_timer_sync(&xprt->timer);
1461 1480
1462 rpc_xprt_debugfs_unregister(xprt);
1463 rpc_destroy_wait_queue(&xprt->binding);
1464 rpc_destroy_wait_queue(&xprt->pending);
1465 rpc_destroy_wait_queue(&xprt->sending);
1466 rpc_destroy_wait_queue(&xprt->backlog);
1467 cancel_work_sync(&xprt->task_cleanup);
1468 kfree(xprt->servername);
1469 /* 1481 /*
1470 * Tear down transport state and free the rpc_xprt 1482 * Destroy sockets etc from the system workqueue so they can
1483 * safely flush receive work running on rpciod.
1471 */ 1484 */
1472 xprt->ops->destroy(xprt); 1485 INIT_WORK(&xprt->task_cleanup, xprt_destroy_cb);
1486 schedule_work(&xprt->task_cleanup);
1473} 1487}
1474 1488
1475static void xprt_destroy_kref(struct kref *kref) 1489static void xprt_destroy_kref(struct kref *kref)
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index ae92a9e9ba52..e2d64c7138c3 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Multipath support for RPC 3 * Multipath support for RPC
3 * 4 *
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index b8213ddce2f2..8bf19e142b6b 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o 2obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
2 3
3rpcrdma-y := transport.o rpc_rdma.o verbs.o \ 4rpcrdma-y := transport.o rpc_rdma.o verbs.o \
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index d31d0ac5ada9..8b818bb3518a 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Copyright (c) 2015 Oracle. All rights reserved. 3 * Copyright (c) 2015 Oracle. All rights reserved.
3 * 4 *
@@ -42,7 +43,7 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
42 req = rpcrdma_create_req(r_xprt); 43 req = rpcrdma_create_req(r_xprt);
43 if (IS_ERR(req)) 44 if (IS_ERR(req))
44 return PTR_ERR(req); 45 return PTR_ERR(req);
45 req->rl_backchannel = true; 46 __set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags);
46 47
47 rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, 48 rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
48 DMA_TO_DEVICE, GFP_KERNEL); 49 DMA_TO_DEVICE, GFP_KERNEL);
@@ -222,8 +223,8 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
222 *p++ = xdr_zero; 223 *p++ = xdr_zero;
223 *p = xdr_zero; 224 *p = xdr_zero;
224 225
225 if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, RPCRDMA_HDRLEN_MIN, 226 if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN,
226 &rqst->rq_snd_buf, rpcrdma_noch)) 227 &rqst->rq_snd_buf, rpcrdma_noch))
227 return -EIO; 228 return -EIO;
228 return 0; 229 return 0;
229} 230}
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 6c7151341194..29fc84c7ff98 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Copyright (c) 2015 Oracle. All rights reserved. 3 * Copyright (c) 2015 Oracle. All rights reserved.
3 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@@ -305,28 +306,9 @@ out_reset:
305 } 306 }
306} 307}
307 308
308/* Use a slow, safe mechanism to invalidate all memory regions
309 * that were registered for "req".
310 */
311static void
312fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
313 bool sync)
314{
315 struct rpcrdma_mw *mw;
316
317 while (!list_empty(&req->rl_registered)) {
318 mw = rpcrdma_pop_mw(&req->rl_registered);
319 if (sync)
320 fmr_op_recover_mr(mw);
321 else
322 rpcrdma_defer_mr_recovery(mw);
323 }
324}
325
326const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { 309const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
327 .ro_map = fmr_op_map, 310 .ro_map = fmr_op_map,
328 .ro_unmap_sync = fmr_op_unmap_sync, 311 .ro_unmap_sync = fmr_op_unmap_sync,
329 .ro_unmap_safe = fmr_op_unmap_safe,
330 .ro_recover_mr = fmr_op_recover_mr, 312 .ro_recover_mr = fmr_op_recover_mr,
331 .ro_open = fmr_op_open, 313 .ro_open = fmr_op_open,
332 .ro_maxpages = fmr_op_maxpages, 314 .ro_maxpages = fmr_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 5a936a6a31a3..773e66e10a15 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Copyright (c) 2015 Oracle. All rights reserved. 3 * Copyright (c) 2015 Oracle. All rights reserved.
3 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@@ -401,7 +402,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
401 if (unlikely(n != mw->mw_nents)) 402 if (unlikely(n != mw->mw_nents))
402 goto out_mapmr_err; 403 goto out_mapmr_err;
403 404
404 dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n", 405 dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n",
405 __func__, frmr, mw->mw_nents, mr->length); 406 __func__, frmr, mw->mw_nents, mr->length);
406 407
407 key = (u8)(mr->rkey & 0x000000FF); 408 key = (u8)(mr->rkey & 0x000000FF);
@@ -419,7 +420,6 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
419 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 420 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
420 IB_ACCESS_REMOTE_READ; 421 IB_ACCESS_REMOTE_READ;
421 422
422 rpcrdma_set_signaled(&r_xprt->rx_ep, &reg_wr->wr);
423 rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr); 423 rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
424 if (rc) 424 if (rc)
425 goto out_senderr; 425 goto out_senderr;
@@ -507,12 +507,6 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
507 f->fr_cqe.done = frwr_wc_localinv_wake; 507 f->fr_cqe.done = frwr_wc_localinv_wake;
508 reinit_completion(&f->fr_linv_done); 508 reinit_completion(&f->fr_linv_done);
509 509
510 /* Initialize CQ count, since there is always a signaled
511 * WR being posted here. The new cqcount depends on how
512 * many SQEs are about to be consumed.
513 */
514 rpcrdma_init_cqcount(&r_xprt->rx_ep, count);
515
516 /* Transport disconnect drains the receive CQ before it 510 /* Transport disconnect drains the receive CQ before it
517 * replaces the QP. The RPC reply handler won't call us 511 * replaces the QP. The RPC reply handler won't call us
518 * unless ri_id->qp is a valid pointer. 512 * unless ri_id->qp is a valid pointer.
@@ -545,7 +539,6 @@ reset_mrs:
545 /* Find and reset the MRs in the LOCAL_INV WRs that did not 539 /* Find and reset the MRs in the LOCAL_INV WRs that did not
546 * get posted. 540 * get posted.
547 */ 541 */
548 rpcrdma_init_cqcount(&r_xprt->rx_ep, -count);
549 while (bad_wr) { 542 while (bad_wr) {
550 f = container_of(bad_wr, struct rpcrdma_frmr, 543 f = container_of(bad_wr, struct rpcrdma_frmr,
551 fr_invwr); 544 fr_invwr);
@@ -558,28 +551,9 @@ reset_mrs:
558 goto unmap; 551 goto unmap;
559} 552}
560 553
561/* Use a slow, safe mechanism to invalidate all memory regions
562 * that were registered for "req".
563 */
564static void
565frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
566 bool sync)
567{
568 struct rpcrdma_mw *mw;
569
570 while (!list_empty(&req->rl_registered)) {
571 mw = rpcrdma_pop_mw(&req->rl_registered);
572 if (sync)
573 frwr_op_recover_mr(mw);
574 else
575 rpcrdma_defer_mr_recovery(mw);
576 }
577}
578
579const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { 554const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
580 .ro_map = frwr_op_map, 555 .ro_map = frwr_op_map,
581 .ro_unmap_sync = frwr_op_unmap_sync, 556 .ro_unmap_sync = frwr_op_unmap_sync,
582 .ro_unmap_safe = frwr_op_unmap_safe,
583 .ro_recover_mr = frwr_op_recover_mr, 557 .ro_recover_mr = frwr_op_recover_mr,
584 .ro_open = frwr_op_open, 558 .ro_open = frwr_op_open,
585 .ro_maxpages = frwr_op_maxpages, 559 .ro_maxpages = frwr_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index f1889f4d4803..ed34dc0f144c 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1,4 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014-2017 Oracle. All rights reserved.
2 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 3 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -75,11 +76,11 @@ static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
75 76
76 /* Maximum Read list size */ 77 /* Maximum Read list size */
77 maxsegs += 2; /* segment for head and tail buffers */ 78 maxsegs += 2; /* segment for head and tail buffers */
78 size = maxsegs * sizeof(struct rpcrdma_read_chunk); 79 size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
79 80
80 /* Minimal Read chunk size */ 81 /* Minimal Read chunk size */
81 size += sizeof(__be32); /* segment count */ 82 size += sizeof(__be32); /* segment count */
82 size += sizeof(struct rpcrdma_segment); 83 size += rpcrdma_segment_maxsz * sizeof(__be32);
83 size += sizeof(__be32); /* list discriminator */ 84 size += sizeof(__be32); /* list discriminator */
84 85
85 dprintk("RPC: %s: max call header size = %u\n", 86 dprintk("RPC: %s: max call header size = %u\n",
@@ -102,7 +103,7 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
102 /* Maximum Write list size */ 103 /* Maximum Write list size */
103 maxsegs += 2; /* segment for head and tail buffers */ 104 maxsegs += 2; /* segment for head and tail buffers */
104 size = sizeof(__be32); /* segment count */ 105 size = sizeof(__be32); /* segment count */
105 size += maxsegs * sizeof(struct rpcrdma_segment); 106 size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
106 size += sizeof(__be32); /* list discriminator */ 107 size += sizeof(__be32); /* list discriminator */
107 108
108 dprintk("RPC: %s: max reply header size = %u\n", 109 dprintk("RPC: %s: max reply header size = %u\n",
@@ -511,27 +512,60 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
511 return 0; 512 return 0;
512} 513}
513 514
514/* Prepare the RPC-over-RDMA header SGE. 515/**
516 * rpcrdma_unmap_sendctx - DMA-unmap Send buffers
517 * @sc: sendctx containing SGEs to unmap
518 *
519 */
520void
521rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
522{
523 struct rpcrdma_ia *ia = &sc->sc_xprt->rx_ia;
524 struct ib_sge *sge;
525 unsigned int count;
526
527 dprintk("RPC: %s: unmapping %u sges for sc=%p\n",
528 __func__, sc->sc_unmap_count, sc);
529
530 /* The first two SGEs contain the transport header and
531 * the inline buffer. These are always left mapped so
532 * they can be cheaply re-used.
533 */
534 sge = &sc->sc_sges[2];
535 for (count = sc->sc_unmap_count; count; ++sge, --count)
536 ib_dma_unmap_page(ia->ri_device,
537 sge->addr, sge->length, DMA_TO_DEVICE);
538
539 if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &sc->sc_req->rl_flags)) {
540 smp_mb__after_atomic();
541 wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES);
542 }
543}
544
545/* Prepare an SGE for the RPC-over-RDMA transport header.
515 */ 546 */
516static bool 547static bool
517rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req, 548rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
518 u32 len) 549 u32 len)
519{ 550{
551 struct rpcrdma_sendctx *sc = req->rl_sendctx;
520 struct rpcrdma_regbuf *rb = req->rl_rdmabuf; 552 struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
521 struct ib_sge *sge = &req->rl_send_sge[0]; 553 struct ib_sge *sge = sc->sc_sges;
522 554
523 if (unlikely(!rpcrdma_regbuf_is_mapped(rb))) { 555 if (!rpcrdma_dma_map_regbuf(ia, rb))
524 if (!__rpcrdma_dma_map_regbuf(ia, rb)) 556 goto out_regbuf;
525 return false; 557 sge->addr = rdmab_addr(rb);
526 sge->addr = rdmab_addr(rb);
527 sge->lkey = rdmab_lkey(rb);
528 }
529 sge->length = len; 558 sge->length = len;
559 sge->lkey = rdmab_lkey(rb);
530 560
531 ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, 561 ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr,
532 sge->length, DMA_TO_DEVICE); 562 sge->length, DMA_TO_DEVICE);
533 req->rl_send_wr.num_sge++; 563 sc->sc_wr.num_sge++;
534 return true; 564 return true;
565
566out_regbuf:
567 pr_err("rpcrdma: failed to DMA map a Send buffer\n");
568 return false;
535} 569}
536 570
537/* Prepare the Send SGEs. The head and tail iovec, and each entry 571/* Prepare the Send SGEs. The head and tail iovec, and each entry
@@ -541,10 +575,11 @@ static bool
541rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, 575rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
542 struct xdr_buf *xdr, enum rpcrdma_chunktype rtype) 576 struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
543{ 577{
578 struct rpcrdma_sendctx *sc = req->rl_sendctx;
544 unsigned int sge_no, page_base, len, remaining; 579 unsigned int sge_no, page_base, len, remaining;
545 struct rpcrdma_regbuf *rb = req->rl_sendbuf; 580 struct rpcrdma_regbuf *rb = req->rl_sendbuf;
546 struct ib_device *device = ia->ri_device; 581 struct ib_device *device = ia->ri_device;
547 struct ib_sge *sge = req->rl_send_sge; 582 struct ib_sge *sge = sc->sc_sges;
548 u32 lkey = ia->ri_pd->local_dma_lkey; 583 u32 lkey = ia->ri_pd->local_dma_lkey;
549 struct page *page, **ppages; 584 struct page *page, **ppages;
550 585
@@ -552,7 +587,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
552 * DMA-mapped. Sync the content that has changed. 587 * DMA-mapped. Sync the content that has changed.
553 */ 588 */
554 if (!rpcrdma_dma_map_regbuf(ia, rb)) 589 if (!rpcrdma_dma_map_regbuf(ia, rb))
555 return false; 590 goto out_regbuf;
556 sge_no = 1; 591 sge_no = 1;
557 sge[sge_no].addr = rdmab_addr(rb); 592 sge[sge_no].addr = rdmab_addr(rb);
558 sge[sge_no].length = xdr->head[0].iov_len; 593 sge[sge_no].length = xdr->head[0].iov_len;
@@ -607,7 +642,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
607 sge[sge_no].length = len; 642 sge[sge_no].length = len;
608 sge[sge_no].lkey = lkey; 643 sge[sge_no].lkey = lkey;
609 644
610 req->rl_mapped_sges++; 645 sc->sc_unmap_count++;
611 ppages++; 646 ppages++;
612 remaining -= len; 647 remaining -= len;
613 page_base = 0; 648 page_base = 0;
@@ -633,56 +668,61 @@ map_tail:
633 goto out_mapping_err; 668 goto out_mapping_err;
634 sge[sge_no].length = len; 669 sge[sge_no].length = len;
635 sge[sge_no].lkey = lkey; 670 sge[sge_no].lkey = lkey;
636 req->rl_mapped_sges++; 671 sc->sc_unmap_count++;
637 } 672 }
638 673
639out: 674out:
640 req->rl_send_wr.num_sge = sge_no + 1; 675 sc->sc_wr.num_sge += sge_no;
676 if (sc->sc_unmap_count)
677 __set_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
641 return true; 678 return true;
642 679
680out_regbuf:
681 pr_err("rpcrdma: failed to DMA map a Send buffer\n");
682 return false;
683
643out_mapping_overflow: 684out_mapping_overflow:
685 rpcrdma_unmap_sendctx(sc);
644 pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no); 686 pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no);
645 return false; 687 return false;
646 688
647out_mapping_err: 689out_mapping_err:
690 rpcrdma_unmap_sendctx(sc);
648 pr_err("rpcrdma: Send mapping error\n"); 691 pr_err("rpcrdma: Send mapping error\n");
649 return false; 692 return false;
650} 693}
651 694
652bool 695/**
653rpcrdma_prepare_send_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, 696 * rpcrdma_prepare_send_sges - Construct SGEs for a Send WR
654 u32 hdrlen, struct xdr_buf *xdr, 697 * @r_xprt: controlling transport
655 enum rpcrdma_chunktype rtype) 698 * @req: context of RPC Call being marshalled
699 * @hdrlen: size of transport header, in bytes
700 * @xdr: xdr_buf containing RPC Call
701 * @rtype: chunk type being encoded
702 *
703 * Returns 0 on success; otherwise a negative errno is returned.
704 */
705int
706rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
707 struct rpcrdma_req *req, u32 hdrlen,
708 struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
656{ 709{
657 req->rl_send_wr.num_sge = 0; 710 req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf);
658 req->rl_mapped_sges = 0; 711 if (!req->rl_sendctx)
659 712 return -ENOBUFS;
660 if (!rpcrdma_prepare_hdr_sge(ia, req, hdrlen)) 713 req->rl_sendctx->sc_wr.num_sge = 0;
661 goto out_map; 714 req->rl_sendctx->sc_unmap_count = 0;
715 req->rl_sendctx->sc_req = req;
716 __clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
717
718 if (!rpcrdma_prepare_hdr_sge(&r_xprt->rx_ia, req, hdrlen))
719 return -EIO;
662 720
663 if (rtype != rpcrdma_areadch) 721 if (rtype != rpcrdma_areadch)
664 if (!rpcrdma_prepare_msg_sges(ia, req, xdr, rtype)) 722 if (!rpcrdma_prepare_msg_sges(&r_xprt->rx_ia, req, xdr, rtype))
665 goto out_map; 723 return -EIO;
666
667 return true;
668
669out_map:
670 pr_err("rpcrdma: failed to DMA map a Send buffer\n");
671 return false;
672}
673
674void
675rpcrdma_unmap_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
676{
677 struct ib_device *device = ia->ri_device;
678 struct ib_sge *sge;
679 int count;
680 724
681 sge = &req->rl_send_sge[2]; 725 return 0;
682 for (count = req->rl_mapped_sges; count--; sge++)
683 ib_dma_unmap_page(device, sge->addr, sge->length,
684 DMA_TO_DEVICE);
685 req->rl_mapped_sges = 0;
686} 726}
687 727
688/** 728/**
@@ -833,12 +873,10 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
833 transfertypes[rtype], transfertypes[wtype], 873 transfertypes[rtype], transfertypes[wtype],
834 xdr_stream_pos(xdr)); 874 xdr_stream_pos(xdr));
835 875
836 if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, 876 ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr),
837 xdr_stream_pos(xdr), 877 &rqst->rq_snd_buf, rtype);
838 &rqst->rq_snd_buf, rtype)) { 878 if (ret)
839 ret = -EIO;
840 goto out_err; 879 goto out_err;
841 }
842 return 0; 880 return 0;
843 881
844out_err: 882out_err:
@@ -970,14 +1008,13 @@ rpcrdma_mark_remote_invalidation(struct list_head *mws,
970 * straightforward to check the RPC header's direction field. 1008 * straightforward to check the RPC header's direction field.
971 */ 1009 */
972static bool 1010static bool
973rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep, 1011rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
974 __be32 xid, __be32 proc)
975#if defined(CONFIG_SUNRPC_BACKCHANNEL) 1012#if defined(CONFIG_SUNRPC_BACKCHANNEL)
976{ 1013{
977 struct xdr_stream *xdr = &rep->rr_stream; 1014 struct xdr_stream *xdr = &rep->rr_stream;
978 __be32 *p; 1015 __be32 *p;
979 1016
980 if (proc != rdma_msg) 1017 if (rep->rr_proc != rdma_msg)
981 return false; 1018 return false;
982 1019
983 /* Peek at stream contents without advancing. */ 1020 /* Peek at stream contents without advancing. */
@@ -992,7 +1029,7 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
992 return false; 1029 return false;
993 1030
994 /* RPC header */ 1031 /* RPC header */
995 if (*p++ != xid) 1032 if (*p++ != rep->rr_xid)
996 return false; 1033 return false;
997 if (*p != cpu_to_be32(RPC_CALL)) 1034 if (*p != cpu_to_be32(RPC_CALL))
998 return false; 1035 return false;
@@ -1212,105 +1249,170 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
1212 return -EREMOTEIO; 1249 return -EREMOTEIO;
1213} 1250}
1214 1251
1252/* Perform XID lookup, reconstruction of the RPC reply, and
1253 * RPC completion while holding the transport lock to ensure
1254 * the rep, rqst, and rq_task pointers remain stable.
1255 */
1256void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
1257{
1258 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
1259 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1260 struct rpc_rqst *rqst = rep->rr_rqst;
1261 unsigned long cwnd;
1262 int status;
1263
1264 xprt->reestablish_timeout = 0;
1265
1266 switch (rep->rr_proc) {
1267 case rdma_msg:
1268 status = rpcrdma_decode_msg(r_xprt, rep, rqst);
1269 break;
1270 case rdma_nomsg:
1271 status = rpcrdma_decode_nomsg(r_xprt, rep);
1272 break;
1273 case rdma_error:
1274 status = rpcrdma_decode_error(r_xprt, rep, rqst);
1275 break;
1276 default:
1277 status = -EIO;
1278 }
1279 if (status < 0)
1280 goto out_badheader;
1281
1282out:
1283 spin_lock(&xprt->recv_lock);
1284 cwnd = xprt->cwnd;
1285 xprt->cwnd = r_xprt->rx_buf.rb_credits << RPC_CWNDSHIFT;
1286 if (xprt->cwnd > cwnd)
1287 xprt_release_rqst_cong(rqst->rq_task);
1288
1289 xprt_complete_rqst(rqst->rq_task, status);
1290 xprt_unpin_rqst(rqst);
1291 spin_unlock(&xprt->recv_lock);
1292 return;
1293
1294/* If the incoming reply terminated a pending RPC, the next
1295 * RPC call will post a replacement receive buffer as it is
1296 * being marshaled.
1297 */
1298out_badheader:
1299 dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n",
1300 rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc));
1301 r_xprt->rx_stats.bad_reply_count++;
1302 status = -EIO;
1303 goto out;
1304}
1305
1306void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
1307{
1308 /* Invalidate and unmap the data payloads before waking
1309 * the waiting application. This guarantees the memory
1310 * regions are properly fenced from the server before the
1311 * application accesses the data. It also ensures proper
1312 * send flow control: waking the next RPC waits until this
1313 * RPC has relinquished all its Send Queue entries.
1314 */
1315 if (!list_empty(&req->rl_registered))
1316 r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
1317 &req->rl_registered);
1318
1319 /* Ensure that any DMA mapped pages associated with
1320 * the Send of the RPC Call have been unmapped before
1321 * allowing the RPC to complete. This protects argument
1322 * memory not controlled by the RPC client from being
1323 * re-used before we're done with it.
1324 */
1325 if (test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
1326 r_xprt->rx_stats.reply_waits_for_send++;
1327 out_of_line_wait_on_bit(&req->rl_flags,
1328 RPCRDMA_REQ_F_TX_RESOURCES,
1329 bit_wait,
1330 TASK_UNINTERRUPTIBLE);
1331 }
1332}
1333
1334/* Reply handling runs in the poll worker thread. Anything that
1335 * might wait is deferred to a separate workqueue.
1336 */
1337void rpcrdma_deferred_completion(struct work_struct *work)
1338{
1339 struct rpcrdma_rep *rep =
1340 container_of(work, struct rpcrdma_rep, rr_work);
1341 struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
1342
1343 rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
1344 rpcrdma_release_rqst(rep->rr_rxprt, req);
1345 rpcrdma_complete_rqst(rep);
1346}
1347
1215/* Process received RPC/RDMA messages. 1348/* Process received RPC/RDMA messages.
1216 * 1349 *
1217 * Errors must result in the RPC task either being awakened, or 1350 * Errors must result in the RPC task either being awakened, or
1218 * allowed to timeout, to discover the errors at that time. 1351 * allowed to timeout, to discover the errors at that time.
1219 */ 1352 */
1220void 1353void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1221rpcrdma_reply_handler(struct work_struct *work)
1222{ 1354{
1223 struct rpcrdma_rep *rep =
1224 container_of(work, struct rpcrdma_rep, rr_work);
1225 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; 1355 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
1226 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 1356 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1227 struct xdr_stream *xdr = &rep->rr_stream; 1357 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1228 struct rpcrdma_req *req; 1358 struct rpcrdma_req *req;
1229 struct rpc_rqst *rqst; 1359 struct rpc_rqst *rqst;
1230 __be32 *p, xid, vers, proc; 1360 u32 credits;
1231 unsigned long cwnd; 1361 __be32 *p;
1232 int status;
1233 1362
1234 dprintk("RPC: %s: incoming rep %p\n", __func__, rep); 1363 dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
1235 1364
1236 if (rep->rr_hdrbuf.head[0].iov_len == 0) 1365 if (rep->rr_hdrbuf.head[0].iov_len == 0)
1237 goto out_badstatus; 1366 goto out_badstatus;
1238 1367
1239 xdr_init_decode(xdr, &rep->rr_hdrbuf, 1368 xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
1240 rep->rr_hdrbuf.head[0].iov_base); 1369 rep->rr_hdrbuf.head[0].iov_base);
1241 1370
1242 /* Fixed transport header fields */ 1371 /* Fixed transport header fields */
1243 p = xdr_inline_decode(xdr, 4 * sizeof(*p)); 1372 p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
1244 if (unlikely(!p)) 1373 if (unlikely(!p))
1245 goto out_shortreply; 1374 goto out_shortreply;
1246 xid = *p++; 1375 rep->rr_xid = *p++;
1247 vers = *p++; 1376 rep->rr_vers = *p++;
1248 p++; /* credits */ 1377 credits = be32_to_cpu(*p++);
1249 proc = *p++; 1378 rep->rr_proc = *p++;
1379
1380 if (rep->rr_vers != rpcrdma_version)
1381 goto out_badversion;
1250 1382
1251 if (rpcrdma_is_bcall(r_xprt, rep, xid, proc)) 1383 if (rpcrdma_is_bcall(r_xprt, rep))
1252 return; 1384 return;
1253 1385
1254 /* Match incoming rpcrdma_rep to an rpcrdma_req to 1386 /* Match incoming rpcrdma_rep to an rpcrdma_req to
1255 * get context for handling any incoming chunks. 1387 * get context for handling any incoming chunks.
1256 */ 1388 */
1257 spin_lock(&xprt->recv_lock); 1389 spin_lock(&xprt->recv_lock);
1258 rqst = xprt_lookup_rqst(xprt, xid); 1390 rqst = xprt_lookup_rqst(xprt, rep->rr_xid);
1259 if (!rqst) 1391 if (!rqst)
1260 goto out_norqst; 1392 goto out_norqst;
1261 xprt_pin_rqst(rqst); 1393 xprt_pin_rqst(rqst);
1394
1395 if (credits == 0)
1396 credits = 1; /* don't deadlock */
1397 else if (credits > buf->rb_max_requests)
1398 credits = buf->rb_max_requests;
1399 buf->rb_credits = credits;
1400
1262 spin_unlock(&xprt->recv_lock); 1401 spin_unlock(&xprt->recv_lock);
1402
1263 req = rpcr_to_rdmar(rqst); 1403 req = rpcr_to_rdmar(rqst);
1264 req->rl_reply = rep; 1404 req->rl_reply = rep;
1405 rep->rr_rqst = rqst;
1406 clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
1265 1407
1266 dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n", 1408 dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
1267 __func__, rep, req, be32_to_cpu(xid)); 1409 __func__, rep, req, be32_to_cpu(rep->rr_xid));
1268
1269 /* Invalidate and unmap the data payloads before waking the
1270 * waiting application. This guarantees the memory regions
1271 * are properly fenced from the server before the application
1272 * accesses the data. It also ensures proper send flow control:
1273 * waking the next RPC waits until this RPC has relinquished
1274 * all its Send Queue entries.
1275 */
1276 if (!list_empty(&req->rl_registered)) {
1277 rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
1278 r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
1279 &req->rl_registered);
1280 }
1281
1282 xprt->reestablish_timeout = 0;
1283 if (vers != rpcrdma_version)
1284 goto out_badversion;
1285 1410
1286 switch (proc) { 1411 if (list_empty(&req->rl_registered) &&
1287 case rdma_msg: 1412 !test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags))
1288 status = rpcrdma_decode_msg(r_xprt, rep, rqst); 1413 rpcrdma_complete_rqst(rep);
1289 break; 1414 else
1290 case rdma_nomsg: 1415 queue_work(rpcrdma_receive_wq, &rep->rr_work);
1291 status = rpcrdma_decode_nomsg(r_xprt, rep);
1292 break;
1293 case rdma_error:
1294 status = rpcrdma_decode_error(r_xprt, rep, rqst);
1295 break;
1296 default:
1297 status = -EIO;
1298 }
1299 if (status < 0)
1300 goto out_badheader;
1301
1302out:
1303 spin_lock(&xprt->recv_lock);
1304 cwnd = xprt->cwnd;
1305 xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
1306 if (xprt->cwnd > cwnd)
1307 xprt_release_rqst_cong(rqst->rq_task);
1308
1309 xprt_complete_rqst(rqst->rq_task, status);
1310 xprt_unpin_rqst(rqst);
1311 spin_unlock(&xprt->recv_lock);
1312 dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
1313 __func__, xprt, rqst, status);
1314 return; 1416 return;
1315 1417
1316out_badstatus: 1418out_badstatus:
@@ -1321,37 +1423,22 @@ out_badstatus:
1321 } 1423 }
1322 return; 1424 return;
1323 1425
1324/* If the incoming reply terminated a pending RPC, the next
1325 * RPC call will post a replacement receive buffer as it is
1326 * being marshaled.
1327 */
1328out_badversion: 1426out_badversion:
1329 dprintk("RPC: %s: invalid version %d\n", 1427 dprintk("RPC: %s: invalid version %d\n",
1330 __func__, be32_to_cpu(vers)); 1428 __func__, be32_to_cpu(rep->rr_vers));
1331 status = -EIO; 1429 goto repost;
1332 r_xprt->rx_stats.bad_reply_count++;
1333 goto out;
1334
1335out_badheader:
1336 dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n",
1337 rqst->rq_task->tk_pid, __func__, be32_to_cpu(proc));
1338 r_xprt->rx_stats.bad_reply_count++;
1339 status = -EIO;
1340 goto out;
1341 1430
1342/* The req was still available, but by the time the recv_lock 1431/* The RPC transaction has already been terminated, or the header
1343 * was acquired, the rqst and task had been released. Thus the RPC 1432 * is corrupt.
1344 * has already been terminated.
1345 */ 1433 */
1346out_norqst: 1434out_norqst:
1347 spin_unlock(&xprt->recv_lock); 1435 spin_unlock(&xprt->recv_lock);
1348 dprintk("RPC: %s: no match for incoming xid 0x%08x\n", 1436 dprintk("RPC: %s: no match for incoming xid 0x%08x\n",
1349 __func__, be32_to_cpu(xid)); 1437 __func__, be32_to_cpu(rep->rr_xid));
1350 goto repost; 1438 goto repost;
1351 1439
1352out_shortreply: 1440out_shortreply:
1353 dprintk("RPC: %s: short/invalid reply\n", __func__); 1441 dprintk("RPC: %s: short/invalid reply\n", __func__);
1354 goto repost;
1355 1442
1356/* If no pending RPC transaction was matched, post a replacement 1443/* If no pending RPC transaction was matched, post a replacement
1357 * receive buffer before returning. 1444 * receive buffer before returning.
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index ec37ad83b068..af7893501e40 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Copyright (c) 2015 Oracle. All rights reserved. 3 * Copyright (c) 2015 Oracle. All rights reserved.
3 * 4 *
@@ -132,6 +133,10 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
132 if (ret) 133 if (ret)
133 goto out_err; 134 goto out_err;
134 135
136 /* Bump page refcnt so Send completion doesn't release
137 * the rq_buffer before all retransmits are complete.
138 */
139 get_page(virt_to_page(rqst->rq_buffer));
135 ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0); 140 ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0);
136 if (ret) 141 if (ret)
137 goto out_unmap; 142 goto out_unmap;
@@ -164,7 +169,6 @@ xprt_rdma_bc_allocate(struct rpc_task *task)
164 return -EINVAL; 169 return -EINVAL;
165 } 170 }
166 171
167 /* svc_rdma_sendto releases this page */
168 page = alloc_page(RPCRDMA_DEF_GFP); 172 page = alloc_page(RPCRDMA_DEF_GFP);
169 if (!page) 173 if (!page)
170 return -ENOMEM; 174 return -ENOMEM;
@@ -183,6 +187,7 @@ xprt_rdma_bc_free(struct rpc_task *task)
183{ 187{
184 struct rpc_rqst *rqst = task->tk_rqstp; 188 struct rpc_rqst *rqst = task->tk_rqstp;
185 189
190 put_page(virt_to_page(rqst->rq_buffer));
186 kfree(rqst->rq_rbuffer); 191 kfree(rqst->rq_rbuffer);
187} 192}
188 193
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 7dcda4597057..9bd04549a1ad 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Copyright (c) 2016 Oracle. All rights reserved. 3 * Copyright (c) 2016 Oracle. All rights reserved.
3 * 4 *
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 5caf8e722a11..46ec069150d5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -290,6 +290,7 @@ static void qp_event_handler(struct ib_event *event, void *context)
290 ib_event_msg(event->event), event->event, 290 ib_event_msg(event->event), event->event,
291 event->element.qp); 291 event->element.qp);
292 set_bit(XPT_CLOSE, &xprt->xpt_flags); 292 set_bit(XPT_CLOSE, &xprt->xpt_flags);
293 svc_xprt_enqueue(xprt);
293 break; 294 break;
294 } 295 }
295} 296}
@@ -322,8 +323,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
322 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); 323 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
323 if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) 324 if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
324 goto out; 325 goto out;
325 svc_xprt_enqueue(&xprt->sc_xprt); 326 goto out_enqueue;
326 goto out;
327 327
328flushed: 328flushed:
329 if (wc->status != IB_WC_WR_FLUSH_ERR) 329 if (wc->status != IB_WC_WR_FLUSH_ERR)
@@ -333,6 +333,8 @@ flushed:
333 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 333 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
334 svc_rdma_put_context(ctxt, 1); 334 svc_rdma_put_context(ctxt, 1);
335 335
336out_enqueue:
337 svc_xprt_enqueue(&xprt->sc_xprt);
336out: 338out:
337 svc_xprt_put(&xprt->sc_xprt); 339 svc_xprt_put(&xprt->sc_xprt);
338} 340}
@@ -358,6 +360,7 @@ void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
358 360
359 if (unlikely(wc->status != IB_WC_SUCCESS)) { 361 if (unlikely(wc->status != IB_WC_SUCCESS)) {
360 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 362 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
363 svc_xprt_enqueue(&xprt->sc_xprt);
361 if (wc->status != IB_WC_WR_FLUSH_ERR) 364 if (wc->status != IB_WC_WR_FLUSH_ERR)
362 pr_err("svcrdma: Send: %s (%u/0x%x)\n", 365 pr_err("svcrdma: Send: %s (%u/0x%x)\n",
363 ib_wc_status_msg(wc->status), 366 ib_wc_status_msg(wc->status),
@@ -569,8 +572,10 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
569 case RDMA_CM_EVENT_DEVICE_REMOVAL: 572 case RDMA_CM_EVENT_DEVICE_REMOVAL:
570 dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n", 573 dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n",
571 xprt, cma_id); 574 xprt, cma_id);
572 if (xprt) 575 if (xprt) {
573 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 576 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
577 svc_xprt_enqueue(&xprt->sc_xprt);
578 }
574 break; 579 break;
575 580
576 default: 581 default:
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index c84e2b644e13..646c24494ea7 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -1,4 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014-2017 Oracle. All rights reserved.
2 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 3 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -678,16 +679,14 @@ xprt_rdma_free(struct rpc_task *task)
678 struct rpc_rqst *rqst = task->tk_rqstp; 679 struct rpc_rqst *rqst = task->tk_rqstp;
679 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); 680 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
680 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 681 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
681 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
682 682
683 if (req->rl_backchannel) 683 if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags))
684 return; 684 return;
685 685
686 dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); 686 dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
687 687
688 if (!list_empty(&req->rl_registered)) 688 if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
689 ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task)); 689 rpcrdma_release_rqst(r_xprt, req);
690 rpcrdma_unmap_sges(ia, req);
691 rpcrdma_buffer_put(req); 690 rpcrdma_buffer_put(req);
692} 691}
693 692
@@ -728,7 +727,8 @@ xprt_rdma_send_request(struct rpc_task *task)
728 727
729 /* On retransmit, remove any previously registered chunks */ 728 /* On retransmit, remove any previously registered chunks */
730 if (unlikely(!list_empty(&req->rl_registered))) 729 if (unlikely(!list_empty(&req->rl_registered)))
731 r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); 730 r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
731 &req->rl_registered);
732 732
733 rc = rpcrdma_marshal_req(r_xprt, rqst); 733 rc = rpcrdma_marshal_req(r_xprt, rqst);
734 if (rc < 0) 734 if (rc < 0)
@@ -742,6 +742,7 @@ xprt_rdma_send_request(struct rpc_task *task)
742 goto drop_connection; 742 goto drop_connection;
743 req->rl_connect_cookie = xprt->connect_cookie; 743 req->rl_connect_cookie = xprt->connect_cookie;
744 744
745 set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
745 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) 746 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
746 goto drop_connection; 747 goto drop_connection;
747 748
@@ -789,11 +790,13 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
789 r_xprt->rx_stats.failed_marshal_count, 790 r_xprt->rx_stats.failed_marshal_count,
790 r_xprt->rx_stats.bad_reply_count, 791 r_xprt->rx_stats.bad_reply_count,
791 r_xprt->rx_stats.nomsg_call_count); 792 r_xprt->rx_stats.nomsg_call_count);
792 seq_printf(seq, "%lu %lu %lu %lu\n", 793 seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n",
793 r_xprt->rx_stats.mrs_recovered, 794 r_xprt->rx_stats.mrs_recovered,
794 r_xprt->rx_stats.mrs_orphaned, 795 r_xprt->rx_stats.mrs_orphaned,
795 r_xprt->rx_stats.mrs_allocated, 796 r_xprt->rx_stats.mrs_allocated,
796 r_xprt->rx_stats.local_inv_needed); 797 r_xprt->rx_stats.local_inv_needed,
798 r_xprt->rx_stats.empty_sendctx_q,
799 r_xprt->rx_stats.reply_waits_for_send);
797} 800}
798 801
799static int 802static int
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 11a1fbf7e59e..710b3f77db82 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1,4 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014-2017 Oracle. All rights reserved.
2 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 3 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -49,9 +50,10 @@
49 50
50#include <linux/interrupt.h> 51#include <linux/interrupt.h>
51#include <linux/slab.h> 52#include <linux/slab.h>
52#include <linux/prefetch.h>
53#include <linux/sunrpc/addr.h> 53#include <linux/sunrpc/addr.h>
54#include <linux/sunrpc/svc_rdma.h> 54#include <linux/sunrpc/svc_rdma.h>
55
56#include <asm-generic/barrier.h>
55#include <asm/bitops.h> 57#include <asm/bitops.h>
56 58
57#include <rdma/ib_cm.h> 59#include <rdma/ib_cm.h>
@@ -73,7 +75,7 @@ static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt);
73static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf); 75static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
74static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); 76static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
75 77
76static struct workqueue_struct *rpcrdma_receive_wq __read_mostly; 78struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
77 79
78int 80int
79rpcrdma_alloc_wq(void) 81rpcrdma_alloc_wq(void)
@@ -126,30 +128,17 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
126static void 128static void
127rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) 129rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
128{ 130{
131 struct ib_cqe *cqe = wc->wr_cqe;
132 struct rpcrdma_sendctx *sc =
133 container_of(cqe, struct rpcrdma_sendctx, sc_cqe);
134
129 /* WARNING: Only wr_cqe and status are reliable at this point */ 135 /* WARNING: Only wr_cqe and status are reliable at this point */
130 if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) 136 if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
131 pr_err("rpcrdma: Send: %s (%u/0x%x)\n", 137 pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
132 ib_wc_status_msg(wc->status), 138 ib_wc_status_msg(wc->status),
133 wc->status, wc->vendor_err); 139 wc->status, wc->vendor_err);
134}
135
136/* Perform basic sanity checking to avoid using garbage
137 * to update the credit grant value.
138 */
139static void
140rpcrdma_update_granted_credits(struct rpcrdma_rep *rep)
141{
142 struct rpcrdma_buffer *buffer = &rep->rr_rxprt->rx_buf;
143 __be32 *p = rep->rr_rdmabuf->rg_base;
144 u32 credits;
145 140
146 credits = be32_to_cpup(p + 2); 141 rpcrdma_sendctx_put_locked(sc);
147 if (credits == 0)
148 credits = 1; /* don't deadlock */
149 else if (credits > buffer->rb_max_requests)
150 credits = buffer->rb_max_requests;
151
152 atomic_set(&buffer->rb_credits, credits);
153} 142}
154 143
155/** 144/**
@@ -181,11 +170,8 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
181 rdmab_addr(rep->rr_rdmabuf), 170 rdmab_addr(rep->rr_rdmabuf),
182 wc->byte_len, DMA_FROM_DEVICE); 171 wc->byte_len, DMA_FROM_DEVICE);
183 172
184 if (wc->byte_len >= RPCRDMA_HDRLEN_ERR)
185 rpcrdma_update_granted_credits(rep);
186
187out_schedule: 173out_schedule:
188 queue_work(rpcrdma_receive_wq, &rep->rr_work); 174 rpcrdma_reply_handler(rep);
189 return; 175 return;
190 176
191out_fail: 177out_fail:
@@ -295,7 +281,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
295 case RDMA_CM_EVENT_DISCONNECTED: 281 case RDMA_CM_EVENT_DISCONNECTED:
296 connstate = -ECONNABORTED; 282 connstate = -ECONNABORTED;
297connected: 283connected:
298 atomic_set(&xprt->rx_buf.rb_credits, 1); 284 xprt->rx_buf.rb_credits = 1;
299 ep->rep_connected = connstate; 285 ep->rep_connected = connstate;
300 rpcrdma_conn_func(ep); 286 rpcrdma_conn_func(ep);
301 wake_up_all(&ep->rep_connect_wait); 287 wake_up_all(&ep->rep_connect_wait);
@@ -564,16 +550,15 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
564 ep->rep_attr.cap.max_recv_sge); 550 ep->rep_attr.cap.max_recv_sge);
565 551
566 /* set trigger for requesting send completion */ 552 /* set trigger for requesting send completion */
567 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; 553 ep->rep_send_batch = min_t(unsigned int, RPCRDMA_MAX_SEND_BATCH,
568 if (ep->rep_cqinit <= 2) 554 cdata->max_requests >> 2);
569 ep->rep_cqinit = 0; /* always signal? */ 555 ep->rep_send_count = ep->rep_send_batch;
570 rpcrdma_init_cqcount(ep, 0);
571 init_waitqueue_head(&ep->rep_connect_wait); 556 init_waitqueue_head(&ep->rep_connect_wait);
572 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); 557 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
573 558
574 sendcq = ib_alloc_cq(ia->ri_device, NULL, 559 sendcq = ib_alloc_cq(ia->ri_device, NULL,
575 ep->rep_attr.cap.max_send_wr + 1, 560 ep->rep_attr.cap.max_send_wr + 1,
576 0, IB_POLL_SOFTIRQ); 561 1, IB_POLL_WORKQUEUE);
577 if (IS_ERR(sendcq)) { 562 if (IS_ERR(sendcq)) {
578 rc = PTR_ERR(sendcq); 563 rc = PTR_ERR(sendcq);
579 dprintk("RPC: %s: failed to create send CQ: %i\n", 564 dprintk("RPC: %s: failed to create send CQ: %i\n",
@@ -583,7 +568,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
583 568
584 recvcq = ib_alloc_cq(ia->ri_device, NULL, 569 recvcq = ib_alloc_cq(ia->ri_device, NULL,
585 ep->rep_attr.cap.max_recv_wr + 1, 570 ep->rep_attr.cap.max_recv_wr + 1,
586 0, IB_POLL_SOFTIRQ); 571 0, IB_POLL_WORKQUEUE);
587 if (IS_ERR(recvcq)) { 572 if (IS_ERR(recvcq)) {
588 rc = PTR_ERR(recvcq); 573 rc = PTR_ERR(recvcq);
589 dprintk("RPC: %s: failed to create recv CQ: %i\n", 574 dprintk("RPC: %s: failed to create recv CQ: %i\n",
@@ -846,6 +831,168 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
846 ib_drain_qp(ia->ri_id->qp); 831 ib_drain_qp(ia->ri_id->qp);
847} 832}
848 833
834/* Fixed-size circular FIFO queue. This implementation is wait-free and
835 * lock-free.
836 *
837 * Consumer is the code path that posts Sends. This path dequeues a
838 * sendctx for use by a Send operation. Multiple consumer threads
839 * are serialized by the RPC transport lock, which allows only one
840 * ->send_request call at a time.
841 *
842 * Producer is the code path that handles Send completions. This path
843 * enqueues a sendctx that has been completed. Multiple producer
844 * threads are serialized by the ib_poll_cq() function.
845 */
846
847/* rpcrdma_sendctxs_destroy() assumes caller has already quiesced
848 * queue activity, and ib_drain_qp has flushed all remaining Send
849 * requests.
850 */
851static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf)
852{
853 unsigned long i;
854
855 for (i = 0; i <= buf->rb_sc_last; i++)
856 kfree(buf->rb_sc_ctxs[i]);
857 kfree(buf->rb_sc_ctxs);
858}
859
860static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ia *ia)
861{
862 struct rpcrdma_sendctx *sc;
863
864 sc = kzalloc(sizeof(*sc) +
865 ia->ri_max_send_sges * sizeof(struct ib_sge),
866 GFP_KERNEL);
867 if (!sc)
868 return NULL;
869
870 sc->sc_wr.wr_cqe = &sc->sc_cqe;
871 sc->sc_wr.sg_list = sc->sc_sges;
872 sc->sc_wr.opcode = IB_WR_SEND;
873 sc->sc_cqe.done = rpcrdma_wc_send;
874 return sc;
875}
876
877static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
878{
879 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
880 struct rpcrdma_sendctx *sc;
881 unsigned long i;
882
883 /* Maximum number of concurrent outstanding Send WRs. Capping
884 * the circular queue size stops Send Queue overflow by causing
885 * the ->send_request call to fail temporarily before too many
886 * Sends are posted.
887 */
888 i = buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS;
889 dprintk("RPC: %s: allocating %lu send_ctxs\n", __func__, i);
890 buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL);
891 if (!buf->rb_sc_ctxs)
892 return -ENOMEM;
893
894 buf->rb_sc_last = i - 1;
895 for (i = 0; i <= buf->rb_sc_last; i++) {
896 sc = rpcrdma_sendctx_create(&r_xprt->rx_ia);
897 if (!sc)
898 goto out_destroy;
899
900 sc->sc_xprt = r_xprt;
901 buf->rb_sc_ctxs[i] = sc;
902 }
903
904 return 0;
905
906out_destroy:
907 rpcrdma_sendctxs_destroy(buf);
908 return -ENOMEM;
909}
910
911/* The sendctx queue is not guaranteed to have a size that is a
912 * power of two, thus the helpers in circ_buf.h cannot be used.
913 * The other option is to use modulus (%), which can be expensive.
914 */
915static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf,
916 unsigned long item)
917{
918 return likely(item < buf->rb_sc_last) ? item + 1 : 0;
919}
920
921/**
922 * rpcrdma_sendctx_get_locked - Acquire a send context
923 * @buf: transport buffers from which to acquire an unused context
924 *
925 * Returns pointer to a free send completion context; or NULL if
926 * the queue is empty.
927 *
928 * Usage: Called to acquire an SGE array before preparing a Send WR.
929 *
930 * The caller serializes calls to this function (per rpcrdma_buffer),
931 * and provides an effective memory barrier that flushes the new value
932 * of rb_sc_head.
933 */
934struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf)
935{
936 struct rpcrdma_xprt *r_xprt;
937 struct rpcrdma_sendctx *sc;
938 unsigned long next_head;
939
940 next_head = rpcrdma_sendctx_next(buf, buf->rb_sc_head);
941
942 if (next_head == READ_ONCE(buf->rb_sc_tail))
943 goto out_emptyq;
944
945 /* ORDER: item must be accessed _before_ head is updated */
946 sc = buf->rb_sc_ctxs[next_head];
947
948 /* Releasing the lock in the caller acts as a memory
949 * barrier that flushes rb_sc_head.
950 */
951 buf->rb_sc_head = next_head;
952
953 return sc;
954
955out_emptyq:
956 /* The queue is "empty" if there have not been enough Send
957 * completions recently. This is a sign the Send Queue is
958 * backing up. Cause the caller to pause and try again.
959 */
960 dprintk("RPC: %s: empty sendctx queue\n", __func__);
961 r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf);
962 r_xprt->rx_stats.empty_sendctx_q++;
963 return NULL;
964}
965
966/**
967 * rpcrdma_sendctx_put_locked - Release a send context
968 * @sc: send context to release
969 *
970 * Usage: Called from Send completion to return a sendctxt
971 * to the queue.
972 *
973 * The caller serializes calls to this function (per rpcrdma_buffer).
974 */
975void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
976{
977 struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf;
978 unsigned long next_tail;
979
980 /* Unmap SGEs of previously completed by unsignaled
981 * Sends by walking up the queue until @sc is found.
982 */
983 next_tail = buf->rb_sc_tail;
984 do {
985 next_tail = rpcrdma_sendctx_next(buf, next_tail);
986
987 /* ORDER: item must be accessed _before_ tail is updated */
988 rpcrdma_unmap_sendctx(buf->rb_sc_ctxs[next_tail]);
989
990 } while (buf->rb_sc_ctxs[next_tail] != sc);
991
992 /* Paired with READ_ONCE */
993 smp_store_release(&buf->rb_sc_tail, next_tail);
994}
995
849static void 996static void
850rpcrdma_mr_recovery_worker(struct work_struct *work) 997rpcrdma_mr_recovery_worker(struct work_struct *work)
851{ 998{
@@ -941,13 +1088,8 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
941 spin_lock(&buffer->rb_reqslock); 1088 spin_lock(&buffer->rb_reqslock);
942 list_add(&req->rl_all, &buffer->rb_allreqs); 1089 list_add(&req->rl_all, &buffer->rb_allreqs);
943 spin_unlock(&buffer->rb_reqslock); 1090 spin_unlock(&buffer->rb_reqslock);
944 req->rl_cqe.done = rpcrdma_wc_send;
945 req->rl_buffer = &r_xprt->rx_buf; 1091 req->rl_buffer = &r_xprt->rx_buf;
946 INIT_LIST_HEAD(&req->rl_registered); 1092 INIT_LIST_HEAD(&req->rl_registered);
947 req->rl_send_wr.next = NULL;
948 req->rl_send_wr.wr_cqe = &req->rl_cqe;
949 req->rl_send_wr.sg_list = req->rl_send_sge;
950 req->rl_send_wr.opcode = IB_WR_SEND;
951 return req; 1093 return req;
952} 1094}
953 1095
@@ -974,7 +1116,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
974 1116
975 rep->rr_cqe.done = rpcrdma_wc_receive; 1117 rep->rr_cqe.done = rpcrdma_wc_receive;
976 rep->rr_rxprt = r_xprt; 1118 rep->rr_rxprt = r_xprt;
977 INIT_WORK(&rep->rr_work, rpcrdma_reply_handler); 1119 INIT_WORK(&rep->rr_work, rpcrdma_deferred_completion);
978 rep->rr_recv_wr.next = NULL; 1120 rep->rr_recv_wr.next = NULL;
979 rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; 1121 rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
980 rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; 1122 rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
@@ -995,7 +1137,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
995 1137
996 buf->rb_max_requests = r_xprt->rx_data.max_requests; 1138 buf->rb_max_requests = r_xprt->rx_data.max_requests;
997 buf->rb_bc_srv_max_requests = 0; 1139 buf->rb_bc_srv_max_requests = 0;
998 atomic_set(&buf->rb_credits, 1);
999 spin_lock_init(&buf->rb_mwlock); 1140 spin_lock_init(&buf->rb_mwlock);
1000 spin_lock_init(&buf->rb_lock); 1141 spin_lock_init(&buf->rb_lock);
1001 spin_lock_init(&buf->rb_recovery_lock); 1142 spin_lock_init(&buf->rb_recovery_lock);
@@ -1022,7 +1163,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1022 rc = PTR_ERR(req); 1163 rc = PTR_ERR(req);
1023 goto out; 1164 goto out;
1024 } 1165 }
1025 req->rl_backchannel = false;
1026 list_add(&req->rl_list, &buf->rb_send_bufs); 1166 list_add(&req->rl_list, &buf->rb_send_bufs);
1027 } 1167 }
1028 1168
@@ -1040,6 +1180,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1040 list_add(&rep->rr_list, &buf->rb_recv_bufs); 1180 list_add(&rep->rr_list, &buf->rb_recv_bufs);
1041 } 1181 }
1042 1182
1183 rc = rpcrdma_sendctxs_create(r_xprt);
1184 if (rc)
1185 goto out;
1186
1043 return 0; 1187 return 0;
1044out: 1188out:
1045 rpcrdma_buffer_destroy(buf); 1189 rpcrdma_buffer_destroy(buf);
@@ -1116,6 +1260,8 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1116 cancel_delayed_work_sync(&buf->rb_recovery_worker); 1260 cancel_delayed_work_sync(&buf->rb_recovery_worker);
1117 cancel_delayed_work_sync(&buf->rb_refresh_worker); 1261 cancel_delayed_work_sync(&buf->rb_refresh_worker);
1118 1262
1263 rpcrdma_sendctxs_destroy(buf);
1264
1119 while (!list_empty(&buf->rb_recv_bufs)) { 1265 while (!list_empty(&buf->rb_recv_bufs)) {
1120 struct rpcrdma_rep *rep; 1266 struct rpcrdma_rep *rep;
1121 1267
@@ -1231,7 +1377,6 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
1231 struct rpcrdma_buffer *buffers = req->rl_buffer; 1377 struct rpcrdma_buffer *buffers = req->rl_buffer;
1232 struct rpcrdma_rep *rep = req->rl_reply; 1378 struct rpcrdma_rep *rep = req->rl_reply;
1233 1379
1234 req->rl_send_wr.num_sge = 0;
1235 req->rl_reply = NULL; 1380 req->rl_reply = NULL;
1236 1381
1237 spin_lock(&buffers->rb_lock); 1382 spin_lock(&buffers->rb_lock);
@@ -1363,7 +1508,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
1363 struct rpcrdma_ep *ep, 1508 struct rpcrdma_ep *ep,
1364 struct rpcrdma_req *req) 1509 struct rpcrdma_req *req)
1365{ 1510{
1366 struct ib_send_wr *send_wr = &req->rl_send_wr; 1511 struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
1367 struct ib_send_wr *send_wr_fail; 1512 struct ib_send_wr *send_wr_fail;
1368 int rc; 1513 int rc;
1369 1514
@@ -1377,7 +1522,14 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
1377 dprintk("RPC: %s: posting %d s/g entries\n", 1522 dprintk("RPC: %s: posting %d s/g entries\n",
1378 __func__, send_wr->num_sge); 1523 __func__, send_wr->num_sge);
1379 1524
1380 rpcrdma_set_signaled(ep, send_wr); 1525 if (!ep->rep_send_count ||
1526 test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
1527 send_wr->send_flags |= IB_SEND_SIGNALED;
1528 ep->rep_send_count = ep->rep_send_batch;
1529 } else {
1530 send_wr->send_flags &= ~IB_SEND_SIGNALED;
1531 --ep->rep_send_count;
1532 }
1381 rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); 1533 rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
1382 if (rc) 1534 if (rc)
1383 goto out_postsend_err; 1535 goto out_postsend_err;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index e26a97d2f922..51686d9eac5f 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -1,4 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014-2017 Oracle. All rights reserved.
2 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 3 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -93,8 +94,8 @@ enum {
93 */ 94 */
94 95
95struct rpcrdma_ep { 96struct rpcrdma_ep {
96 atomic_t rep_cqcount; 97 unsigned int rep_send_count;
97 int rep_cqinit; 98 unsigned int rep_send_batch;
98 int rep_connected; 99 int rep_connected;
99 struct ib_qp_init_attr rep_attr; 100 struct ib_qp_init_attr rep_attr;
100 wait_queue_head_t rep_connect_wait; 101 wait_queue_head_t rep_connect_wait;
@@ -104,25 +105,6 @@ struct rpcrdma_ep {
104 struct delayed_work rep_connect_worker; 105 struct delayed_work rep_connect_worker;
105}; 106};
106 107
107static inline void
108rpcrdma_init_cqcount(struct rpcrdma_ep *ep, int count)
109{
110 atomic_set(&ep->rep_cqcount, ep->rep_cqinit - count);
111}
112
113/* To update send queue accounting, provider must take a
114 * send completion every now and then.
115 */
116static inline void
117rpcrdma_set_signaled(struct rpcrdma_ep *ep, struct ib_send_wr *send_wr)
118{
119 send_wr->send_flags = 0;
120 if (unlikely(atomic_sub_return(1, &ep->rep_cqcount) <= 0)) {
121 rpcrdma_init_cqcount(ep, 0);
122 send_wr->send_flags = IB_SEND_SIGNALED;
123 }
124}
125
126/* Pre-allocate extra Work Requests for handling backward receives 108/* Pre-allocate extra Work Requests for handling backward receives
127 * and sends. This is a fixed value because the Work Queues are 109 * and sends. This is a fixed value because the Work Queues are
128 * allocated when the forward channel is set up. 110 * allocated when the forward channel is set up.
@@ -164,12 +146,6 @@ rdmab_lkey(struct rpcrdma_regbuf *rb)
164 return rb->rg_iov.lkey; 146 return rb->rg_iov.lkey;
165} 147}
166 148
167static inline struct rpcrdma_msg *
168rdmab_to_msg(struct rpcrdma_regbuf *rb)
169{
170 return (struct rpcrdma_msg *)rb->rg_base;
171}
172
173static inline struct ib_device * 149static inline struct ib_device *
174rdmab_device(struct rpcrdma_regbuf *rb) 150rdmab_device(struct rpcrdma_regbuf *rb)
175{ 151{
@@ -202,22 +178,24 @@ enum {
202}; 178};
203 179
204/* 180/*
205 * struct rpcrdma_rep -- this structure encapsulates state required to recv 181 * struct rpcrdma_rep -- this structure encapsulates state required
206 * and complete a reply, asychronously. It needs several pieces of 182 * to receive and complete an RPC Reply, asychronously. It needs
207 * state: 183 * several pieces of state:
208 * o recv buffer (posted to provider)
209 * o ib_sge (also donated to provider)
210 * o status of reply (length, success or not)
211 * o bookkeeping state to get run by reply handler (list, etc)
212 * 184 *
213 * These are allocated during initialization, per-transport instance. 185 * o receive buffer and ib_sge (donated to provider)
186 * o status of receive (success or not, length, inv rkey)
187 * o bookkeeping state to get run by reply handler (XDR stream)
214 * 188 *
215 * N of these are associated with a transport instance, and stored in 189 * These structures are allocated during transport initialization.
216 * struct rpcrdma_buffer. N is the max number of outstanding requests. 190 * N of these are associated with a transport instance, managed by
191 * struct rpcrdma_buffer. N is the max number of outstanding RPCs.
217 */ 192 */
218 193
219struct rpcrdma_rep { 194struct rpcrdma_rep {
220 struct ib_cqe rr_cqe; 195 struct ib_cqe rr_cqe;
196 __be32 rr_xid;
197 __be32 rr_vers;
198 __be32 rr_proc;
221 int rr_wc_flags; 199 int rr_wc_flags;
222 u32 rr_inv_rkey; 200 u32 rr_inv_rkey;
223 struct rpcrdma_regbuf *rr_rdmabuf; 201 struct rpcrdma_regbuf *rr_rdmabuf;
@@ -225,10 +203,34 @@ struct rpcrdma_rep {
225 struct work_struct rr_work; 203 struct work_struct rr_work;
226 struct xdr_buf rr_hdrbuf; 204 struct xdr_buf rr_hdrbuf;
227 struct xdr_stream rr_stream; 205 struct xdr_stream rr_stream;
206 struct rpc_rqst *rr_rqst;
228 struct list_head rr_list; 207 struct list_head rr_list;
229 struct ib_recv_wr rr_recv_wr; 208 struct ib_recv_wr rr_recv_wr;
230}; 209};
231 210
211/* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes
212 */
213struct rpcrdma_req;
214struct rpcrdma_xprt;
215struct rpcrdma_sendctx {
216 struct ib_send_wr sc_wr;
217 struct ib_cqe sc_cqe;
218 struct rpcrdma_xprt *sc_xprt;
219 struct rpcrdma_req *sc_req;
220 unsigned int sc_unmap_count;
221 struct ib_sge sc_sges[];
222};
223
224/* Limit the number of SGEs that can be unmapped during one
225 * Send completion. This caps the amount of work a single
226 * completion can do before returning to the provider.
227 *
228 * Setting this to zero disables Send completion batching.
229 */
230enum {
231 RPCRDMA_MAX_SEND_BATCH = 7,
232};
233
232/* 234/*
233 * struct rpcrdma_mw - external memory region metadata 235 * struct rpcrdma_mw - external memory region metadata
234 * 236 *
@@ -340,26 +342,30 @@ enum {
340struct rpcrdma_buffer; 342struct rpcrdma_buffer;
341struct rpcrdma_req { 343struct rpcrdma_req {
342 struct list_head rl_list; 344 struct list_head rl_list;
343 unsigned int rl_mapped_sges;
344 unsigned int rl_connect_cookie; 345 unsigned int rl_connect_cookie;
345 struct rpcrdma_buffer *rl_buffer; 346 struct rpcrdma_buffer *rl_buffer;
346 struct rpcrdma_rep *rl_reply; 347 struct rpcrdma_rep *rl_reply;
347 struct xdr_stream rl_stream; 348 struct xdr_stream rl_stream;
348 struct xdr_buf rl_hdrbuf; 349 struct xdr_buf rl_hdrbuf;
349 struct ib_send_wr rl_send_wr; 350 struct rpcrdma_sendctx *rl_sendctx;
350 struct ib_sge rl_send_sge[RPCRDMA_MAX_SEND_SGES];
351 struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */ 351 struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */
352 struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */ 352 struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */
353 struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */ 353 struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */
354 354
355 struct ib_cqe rl_cqe;
356 struct list_head rl_all; 355 struct list_head rl_all;
357 bool rl_backchannel; 356 unsigned long rl_flags;
358 357
359 struct list_head rl_registered; /* registered segments */ 358 struct list_head rl_registered; /* registered segments */
360 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; 359 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
361}; 360};
362 361
362/* rl_flags */
363enum {
364 RPCRDMA_REQ_F_BACKCHANNEL = 0,
365 RPCRDMA_REQ_F_PENDING,
366 RPCRDMA_REQ_F_TX_RESOURCES,
367};
368
363static inline void 369static inline void
364rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req) 370rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
365{ 371{
@@ -399,12 +405,17 @@ struct rpcrdma_buffer {
399 struct list_head rb_mws; 405 struct list_head rb_mws;
400 struct list_head rb_all; 406 struct list_head rb_all;
401 407
408 unsigned long rb_sc_head;
409 unsigned long rb_sc_tail;
410 unsigned long rb_sc_last;
411 struct rpcrdma_sendctx **rb_sc_ctxs;
412
402 spinlock_t rb_lock; /* protect buf lists */ 413 spinlock_t rb_lock; /* protect buf lists */
403 int rb_send_count, rb_recv_count; 414 int rb_send_count, rb_recv_count;
404 struct list_head rb_send_bufs; 415 struct list_head rb_send_bufs;
405 struct list_head rb_recv_bufs; 416 struct list_head rb_recv_bufs;
406 u32 rb_max_requests; 417 u32 rb_max_requests;
407 atomic_t rb_credits; /* most recent credit grant */ 418 u32 rb_credits; /* most recent credit grant */
408 419
409 u32 rb_bc_srv_max_requests; 420 u32 rb_bc_srv_max_requests;
410 spinlock_t rb_reqslock; /* protect rb_allreqs */ 421 spinlock_t rb_reqslock; /* protect rb_allreqs */
@@ -453,10 +464,12 @@ struct rpcrdma_stats {
453 unsigned long mrs_recovered; 464 unsigned long mrs_recovered;
454 unsigned long mrs_orphaned; 465 unsigned long mrs_orphaned;
455 unsigned long mrs_allocated; 466 unsigned long mrs_allocated;
467 unsigned long empty_sendctx_q;
456 468
457 /* accessed when receiving a reply */ 469 /* accessed when receiving a reply */
458 unsigned long long total_rdma_reply; 470 unsigned long long total_rdma_reply;
459 unsigned long long fixup_copy_count; 471 unsigned long long fixup_copy_count;
472 unsigned long reply_waits_for_send;
460 unsigned long local_inv_needed; 473 unsigned long local_inv_needed;
461 unsigned long nomsg_call_count; 474 unsigned long nomsg_call_count;
462 unsigned long bcall_count; 475 unsigned long bcall_count;
@@ -473,8 +486,6 @@ struct rpcrdma_memreg_ops {
473 struct rpcrdma_mw **); 486 struct rpcrdma_mw **);
474 void (*ro_unmap_sync)(struct rpcrdma_xprt *, 487 void (*ro_unmap_sync)(struct rpcrdma_xprt *,
475 struct list_head *); 488 struct list_head *);
476 void (*ro_unmap_safe)(struct rpcrdma_xprt *,
477 struct rpcrdma_req *, bool);
478 void (*ro_recover_mr)(struct rpcrdma_mw *); 489 void (*ro_recover_mr)(struct rpcrdma_mw *);
479 int (*ro_open)(struct rpcrdma_ia *, 490 int (*ro_open)(struct rpcrdma_ia *,
480 struct rpcrdma_ep *, 491 struct rpcrdma_ep *,
@@ -532,6 +543,8 @@ void rpcrdma_ia_close(struct rpcrdma_ia *);
532bool frwr_is_supported(struct rpcrdma_ia *); 543bool frwr_is_supported(struct rpcrdma_ia *);
533bool fmr_is_supported(struct rpcrdma_ia *); 544bool fmr_is_supported(struct rpcrdma_ia *);
534 545
546extern struct workqueue_struct *rpcrdma_receive_wq;
547
535/* 548/*
536 * Endpoint calls - xprtrdma/verbs.c 549 * Endpoint calls - xprtrdma/verbs.c
537 */ 550 */
@@ -554,6 +567,8 @@ struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
554void rpcrdma_destroy_req(struct rpcrdma_req *); 567void rpcrdma_destroy_req(struct rpcrdma_req *);
555int rpcrdma_buffer_create(struct rpcrdma_xprt *); 568int rpcrdma_buffer_create(struct rpcrdma_xprt *);
556void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); 569void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
570struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
571void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
557 572
558struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *); 573struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
559void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *); 574void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
@@ -610,12 +625,18 @@ enum rpcrdma_chunktype {
610 rpcrdma_replych 625 rpcrdma_replych
611}; 626};
612 627
613bool rpcrdma_prepare_send_sges(struct rpcrdma_ia *, struct rpcrdma_req *, 628int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
614 u32, struct xdr_buf *, enum rpcrdma_chunktype); 629 struct rpcrdma_req *req, u32 hdrlen,
615void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *); 630 struct xdr_buf *xdr,
631 enum rpcrdma_chunktype rtype);
632void rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc);
616int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst); 633int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
617void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); 634void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
618void rpcrdma_reply_handler(struct work_struct *work); 635void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
636void rpcrdma_reply_handler(struct rpcrdma_rep *rep);
637void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt,
638 struct rpcrdma_req *req);
639void rpcrdma_deferred_completion(struct work_struct *work);
619 640
620static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len) 641static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
621{ 642{
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9b5de31aa429..9cc850c2719e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * linux/net/sunrpc/xprtsock.c 3 * linux/net/sunrpc/xprtsock.c
3 * 4 *
@@ -551,6 +552,7 @@ static int xs_local_send_request(struct rpc_task *task)
551 default: 552 default:
552 dprintk("RPC: sendmsg returned unrecognized error %d\n", 553 dprintk("RPC: sendmsg returned unrecognized error %d\n",
553 -status); 554 -status);
555 /* fall through */
554 case -EPIPE: 556 case -EPIPE:
555 xs_close(xprt); 557 xs_close(xprt);
556 status = -ENOTCONN; 558 status = -ENOTCONN;
@@ -1610,6 +1612,7 @@ static void xs_tcp_state_change(struct sock *sk)
1610 xprt->connect_cookie++; 1612 xprt->connect_cookie++;
1611 clear_bit(XPRT_CONNECTED, &xprt->state); 1613 clear_bit(XPRT_CONNECTED, &xprt->state);
1612 xs_tcp_force_close(xprt); 1614 xs_tcp_force_close(xprt);
1615 /* fall through */
1613 case TCP_CLOSING: 1616 case TCP_CLOSING:
1614 /* 1617 /*
1615 * If the server closed down the connection, make sure that 1618 * If the server closed down the connection, make sure that
@@ -2203,7 +2206,7 @@ static void xs_udp_setup_socket(struct work_struct *work)
2203 struct sock_xprt *transport = 2206 struct sock_xprt *transport =
2204 container_of(work, struct sock_xprt, connect_worker.work); 2207 container_of(work, struct sock_xprt, connect_worker.work);
2205 struct rpc_xprt *xprt = &transport->xprt; 2208 struct rpc_xprt *xprt = &transport->xprt;
2206 struct socket *sock = transport->sock; 2209 struct socket *sock;
2207 int status = -EIO; 2210 int status = -EIO;
2208 2211
2209 sock = xs_create_sock(xprt, transport, 2212 sock = xs_create_sock(xprt, transport,
@@ -2367,6 +2370,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
2367 switch (ret) { 2370 switch (ret) {
2368 case 0: 2371 case 0:
2369 xs_set_srcport(transport, sock); 2372 xs_set_srcport(transport, sock);
2373 /* fall through */
2370 case -EINPROGRESS: 2374 case -EINPROGRESS:
2371 /* SYN_SENT! */ 2375 /* SYN_SENT! */
2372 if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) 2376 if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
@@ -2418,6 +2422,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
2418 default: 2422 default:
2419 printk("%s: connect returned unhandled error %d\n", 2423 printk("%s: connect returned unhandled error %d\n",
2420 __func__, status); 2424 __func__, status);
2425 /* fall through */
2421 case -EADDRNOTAVAIL: 2426 case -EADDRNOTAVAIL:
2422 /* We're probably in TIME_WAIT. Get rid of existing socket, 2427 /* We're probably in TIME_WAIT. Get rid of existing socket,
2423 * and retry 2428 * and retry
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 0531b41d1f2d..74b9d916a58b 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -345,6 +345,8 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj)
345 return sizeof(struct switchdev_obj_port_vlan); 345 return sizeof(struct switchdev_obj_port_vlan);
346 case SWITCHDEV_OBJ_ID_PORT_MDB: 346 case SWITCHDEV_OBJ_ID_PORT_MDB:
347 return sizeof(struct switchdev_obj_port_mdb); 347 return sizeof(struct switchdev_obj_port_mdb);
348 case SWITCHDEV_OBJ_ID_HOST_MDB:
349 return sizeof(struct switchdev_obj_port_mdb);
348 default: 350 default:
349 BUG(); 351 BUG();
350 } 352 }
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 31b9f9c52974..37bb0bfbd936 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the Linux TIPC layer 3# Makefile for the Linux TIPC layer
3# 4#
@@ -8,7 +9,7 @@ tipc-y += addr.o bcast.o bearer.o \
8 core.o link.o discover.o msg.o \ 9 core.o link.o discover.o msg.o \
9 name_distr.o subscr.o monitor.o name_table.o net.o \ 10 name_distr.o subscr.o monitor.o name_table.o net.o \
10 netlink.o netlink_compat.o node.o socket.o eth_media.o \ 11 netlink.o netlink_compat.o node.o socket.o eth_media.o \
11 server.o socket.o 12 server.o socket.o group.o
12 13
13tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o 14tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o
14tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o 15tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 7d99029df342..329325bd553e 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -233,7 +233,7 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
233 struct sk_buff_head xmitq; 233 struct sk_buff_head xmitq;
234 int rc = 0; 234 int rc = 0;
235 235
236 __skb_queue_head_init(&xmitq); 236 skb_queue_head_init(&xmitq);
237 tipc_bcast_lock(net); 237 tipc_bcast_lock(net);
238 if (tipc_link_bc_peers(l)) 238 if (tipc_link_bc_peers(l))
239 rc = tipc_link_xmit(l, pkts, &xmitq); 239 rc = tipc_link_xmit(l, pkts, &xmitq);
@@ -258,20 +258,20 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
258static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, 258static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts,
259 struct tipc_nlist *dests, u16 *cong_link_cnt) 259 struct tipc_nlist *dests, u16 *cong_link_cnt)
260{ 260{
261 struct tipc_dest *dst, *tmp;
261 struct sk_buff_head _pkts; 262 struct sk_buff_head _pkts;
262 struct u32_item *n, *tmp; 263 u32 dnode, selector;
263 u32 dst, selector;
264 264
265 selector = msg_link_selector(buf_msg(skb_peek(pkts))); 265 selector = msg_link_selector(buf_msg(skb_peek(pkts)));
266 __skb_queue_head_init(&_pkts); 266 skb_queue_head_init(&_pkts);
267 267
268 list_for_each_entry_safe(n, tmp, &dests->list, list) { 268 list_for_each_entry_safe(dst, tmp, &dests->list, list) {
269 dst = n->value; 269 dnode = dst->node;
270 if (!tipc_msg_pskb_copy(dst, pkts, &_pkts)) 270 if (!tipc_msg_pskb_copy(dnode, pkts, &_pkts))
271 return -ENOMEM; 271 return -ENOMEM;
272 272
273 /* Any other return value than -ELINKCONG is ignored */ 273 /* Any other return value than -ELINKCONG is ignored */
274 if (tipc_node_xmit(net, &_pkts, dst, selector) == -ELINKCONG) 274 if (tipc_node_xmit(net, &_pkts, dnode, selector) == -ELINKCONG)
275 (*cong_link_cnt)++; 275 (*cong_link_cnt)++;
276 } 276 }
277 return 0; 277 return 0;
@@ -554,7 +554,7 @@ void tipc_nlist_add(struct tipc_nlist *nl, u32 node)
554{ 554{
555 if (node == nl->self) 555 if (node == nl->self)
556 nl->local = true; 556 nl->local = true;
557 else if (u32_push(&nl->list, node)) 557 else if (tipc_dest_push(&nl->list, node, 0))
558 nl->remote++; 558 nl->remote++;
559} 559}
560 560
@@ -562,13 +562,13 @@ void tipc_nlist_del(struct tipc_nlist *nl, u32 node)
562{ 562{
563 if (node == nl->self) 563 if (node == nl->self)
564 nl->local = false; 564 nl->local = false;
565 else if (u32_del(&nl->list, node)) 565 else if (tipc_dest_del(&nl->list, node, 0))
566 nl->remote--; 566 nl->remote--;
567} 567}
568 568
569void tipc_nlist_purge(struct tipc_nlist *nl) 569void tipc_nlist_purge(struct tipc_nlist *nl)
570{ 570{
571 u32_list_purge(&nl->list); 571 tipc_dest_list_purge(&nl->list);
572 nl->remote = 0; 572 nl->remote = 0;
573 nl->local = 0; 573 nl->local = 0;
574} 574}
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 5cc5398be722..964342689f2c 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -132,6 +132,11 @@ static inline struct list_head *tipc_nodes(struct net *net)
132 return &tipc_net(net)->node_list; 132 return &tipc_net(net)->node_list;
133} 133}
134 134
135static inline struct tipc_server *tipc_topsrv(struct net *net)
136{
137 return tipc_net(net)->topsrv;
138}
139
135static inline unsigned int tipc_hashfn(u32 addr) 140static inline unsigned int tipc_hashfn(u32 addr)
136{ 141{
137 return addr & (NODE_HTABLE_SIZE - 1); 142 return addr & (NODE_HTABLE_SIZE - 1);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 02462d67d191..92e4828c6b09 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -224,9 +224,9 @@ void tipc_disc_remove_dest(struct tipc_link_req *req)
224 * 224 *
225 * Called whenever a link setup request timer associated with a bearer expires. 225 * Called whenever a link setup request timer associated with a bearer expires.
226 */ 226 */
227static void disc_timeout(unsigned long data) 227static void disc_timeout(struct timer_list *t)
228{ 228{
229 struct tipc_link_req *req = (struct tipc_link_req *)data; 229 struct tipc_link_req *req = from_timer(req, t, timer);
230 struct sk_buff *skb; 230 struct sk_buff *skb;
231 int max_delay; 231 int max_delay;
232 232
@@ -292,7 +292,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b,
292 req->num_nodes = 0; 292 req->num_nodes = 0;
293 req->timer_intv = TIPC_LINK_REQ_INIT; 293 req->timer_intv = TIPC_LINK_REQ_INIT;
294 spin_lock_init(&req->lock); 294 spin_lock_init(&req->lock);
295 setup_timer(&req->timer, disc_timeout, (unsigned long)req); 295 timer_setup(&req->timer, disc_timeout, 0);
296 mod_timer(&req->timer, jiffies + req->timer_intv); 296 mod_timer(&req->timer, jiffies + req->timer_intv);
297 b->link_req = req; 297 b->link_req = req;
298 *skb = skb_clone(req->buf, GFP_ATOMIC); 298 *skb = skb_clone(req->buf, GFP_ATOMIC);
diff --git a/net/tipc/group.c b/net/tipc/group.c
new file mode 100644
index 000000000000..12777cac638a
--- /dev/null
+++ b/net/tipc/group.c
@@ -0,0 +1,871 @@
1/*
2 * net/tipc/group.c: TIPC group messaging code
3 *
4 * Copyright (c) 2017, Ericsson AB
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the names of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18 *
19 * Alternatively, this software may be distributed under the terms of the
20 * GNU General Public License ("GPL") version 2 as published by the Free
21 * Software Foundation.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36#include "core.h"
37#include "addr.h"
38#include "group.h"
39#include "bcast.h"
40#include "server.h"
41#include "msg.h"
42#include "socket.h"
43#include "node.h"
44#include "name_table.h"
45#include "subscr.h"
46
47#define ADV_UNIT (((MAX_MSG_SIZE + MAX_H_SIZE) / FLOWCTL_BLK_SZ) + 1)
48#define ADV_IDLE ADV_UNIT
49#define ADV_ACTIVE (ADV_UNIT * 12)
50
51enum mbr_state {
52 MBR_QUARANTINED,
53 MBR_DISCOVERED,
54 MBR_JOINING,
55 MBR_PUBLISHED,
56 MBR_JOINED,
57 MBR_PENDING,
58 MBR_ACTIVE,
59 MBR_RECLAIMING,
60 MBR_REMITTED,
61 MBR_LEAVING
62};
63
64struct tipc_member {
65 struct rb_node tree_node;
66 struct list_head list;
67 struct list_head congested;
68 struct sk_buff *event_msg;
69 struct sk_buff_head deferredq;
70 struct tipc_group *group;
71 u32 node;
72 u32 port;
73 u32 instance;
74 enum mbr_state state;
75 u16 advertised;
76 u16 window;
77 u16 bc_rcv_nxt;
78 u16 bc_syncpt;
79 u16 bc_acked;
80 bool usr_pending;
81};
82
83struct tipc_group {
84 struct rb_root members;
85 struct list_head congested;
86 struct list_head pending;
87 struct list_head active;
88 struct list_head reclaiming;
89 struct tipc_nlist dests;
90 struct net *net;
91 int subid;
92 u32 type;
93 u32 instance;
94 u32 domain;
95 u32 scope;
96 u32 portid;
97 u16 member_cnt;
98 u16 active_cnt;
99 u16 max_active;
100 u16 bc_snd_nxt;
101 u16 bc_ackers;
102 bool loopback;
103 bool events;
104};
105
106static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
107 int mtyp, struct sk_buff_head *xmitq);
108
109static void tipc_group_decr_active(struct tipc_group *grp,
110 struct tipc_member *m)
111{
112 if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING)
113 grp->active_cnt--;
114}
115
116static int tipc_group_rcvbuf_limit(struct tipc_group *grp)
117{
118 int max_active, active_pool, idle_pool;
119 int mcnt = grp->member_cnt + 1;
120
121 /* Limit simultaneous reception from other members */
122 max_active = min(mcnt / 8, 64);
123 max_active = max(max_active, 16);
124 grp->max_active = max_active;
125
126 /* Reserve blocks for active and idle members */
127 active_pool = max_active * ADV_ACTIVE;
128 idle_pool = (mcnt - max_active) * ADV_IDLE;
129
130 /* Scale to bytes, considering worst-case truesize/msgsize ratio */
131 return (active_pool + idle_pool) * FLOWCTL_BLK_SZ * 4;
132}
133
134u16 tipc_group_bc_snd_nxt(struct tipc_group *grp)
135{
136 return grp->bc_snd_nxt;
137}
138
139static bool tipc_group_is_enabled(struct tipc_member *m)
140{
141 return m->state != MBR_QUARANTINED && m->state != MBR_LEAVING;
142}
143
144static bool tipc_group_is_receiver(struct tipc_member *m)
145{
146 return m && m->state >= MBR_JOINED;
147}
148
149u32 tipc_group_exclude(struct tipc_group *grp)
150{
151 if (!grp->loopback)
152 return grp->portid;
153 return 0;
154}
155
156int tipc_group_size(struct tipc_group *grp)
157{
158 return grp->member_cnt;
159}
160
161struct tipc_group *tipc_group_create(struct net *net, u32 portid,
162 struct tipc_group_req *mreq)
163{
164 struct tipc_group *grp;
165 u32 type = mreq->type;
166
167 grp = kzalloc(sizeof(*grp), GFP_ATOMIC);
168 if (!grp)
169 return NULL;
170 tipc_nlist_init(&grp->dests, tipc_own_addr(net));
171 INIT_LIST_HEAD(&grp->congested);
172 INIT_LIST_HEAD(&grp->active);
173 INIT_LIST_HEAD(&grp->pending);
174 INIT_LIST_HEAD(&grp->reclaiming);
175 grp->members = RB_ROOT;
176 grp->net = net;
177 grp->portid = portid;
178 grp->domain = addr_domain(net, mreq->scope);
179 grp->type = type;
180 grp->instance = mreq->instance;
181 grp->scope = mreq->scope;
182 grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK;
183 grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS;
184 if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, &grp->subid))
185 return grp;
186 kfree(grp);
187 return NULL;
188}
189
190void tipc_group_delete(struct net *net, struct tipc_group *grp)
191{
192 struct rb_root *tree = &grp->members;
193 struct tipc_member *m, *tmp;
194 struct sk_buff_head xmitq;
195
196 __skb_queue_head_init(&xmitq);
197
198 rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) {
199 tipc_group_proto_xmit(grp, m, GRP_LEAVE_MSG, &xmitq);
200 list_del(&m->list);
201 kfree(m);
202 }
203 tipc_node_distr_xmit(net, &xmitq);
204 tipc_nlist_purge(&grp->dests);
205 tipc_topsrv_kern_unsubscr(net, grp->subid);
206 kfree(grp);
207}
208
209struct tipc_member *tipc_group_find_member(struct tipc_group *grp,
210 u32 node, u32 port)
211{
212 struct rb_node *n = grp->members.rb_node;
213 u64 nkey, key = (u64)node << 32 | port;
214 struct tipc_member *m;
215
216 while (n) {
217 m = container_of(n, struct tipc_member, tree_node);
218 nkey = (u64)m->node << 32 | m->port;
219 if (key < nkey)
220 n = n->rb_left;
221 else if (key > nkey)
222 n = n->rb_right;
223 else
224 return m;
225 }
226 return NULL;
227}
228
229static struct tipc_member *tipc_group_find_dest(struct tipc_group *grp,
230 u32 node, u32 port)
231{
232 struct tipc_member *m;
233
234 m = tipc_group_find_member(grp, node, port);
235 if (m && tipc_group_is_enabled(m))
236 return m;
237 return NULL;
238}
239
240static struct tipc_member *tipc_group_find_node(struct tipc_group *grp,
241 u32 node)
242{
243 struct tipc_member *m;
244 struct rb_node *n;
245
246 for (n = rb_first(&grp->members); n; n = rb_next(n)) {
247 m = container_of(n, struct tipc_member, tree_node);
248 if (m->node == node)
249 return m;
250 }
251 return NULL;
252}
253
254static void tipc_group_add_to_tree(struct tipc_group *grp,
255 struct tipc_member *m)
256{
257 u64 nkey, key = (u64)m->node << 32 | m->port;
258 struct rb_node **n, *parent = NULL;
259 struct tipc_member *tmp;
260
261 n = &grp->members.rb_node;
262 while (*n) {
263 tmp = container_of(*n, struct tipc_member, tree_node);
264 parent = *n;
265 tmp = container_of(parent, struct tipc_member, tree_node);
266 nkey = (u64)tmp->node << 32 | tmp->port;
267 if (key < nkey)
268 n = &(*n)->rb_left;
269 else if (key > nkey)
270 n = &(*n)->rb_right;
271 else
272 return;
273 }
274 rb_link_node(&m->tree_node, parent, n);
275 rb_insert_color(&m->tree_node, &grp->members);
276}
277
278static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
279 u32 node, u32 port,
280 int state)
281{
282 struct tipc_member *m;
283
284 m = kzalloc(sizeof(*m), GFP_ATOMIC);
285 if (!m)
286 return NULL;
287 INIT_LIST_HEAD(&m->list);
288 INIT_LIST_HEAD(&m->congested);
289 __skb_queue_head_init(&m->deferredq);
290 m->group = grp;
291 m->node = node;
292 m->port = port;
293 m->bc_acked = grp->bc_snd_nxt - 1;
294 grp->member_cnt++;
295 tipc_group_add_to_tree(grp, m);
296 tipc_nlist_add(&grp->dests, m->node);
297 m->state = state;
298 return m;
299}
300
301void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port)
302{
303 tipc_group_create_member(grp, node, port, MBR_DISCOVERED);
304}
305
306static void tipc_group_delete_member(struct tipc_group *grp,
307 struct tipc_member *m)
308{
309 rb_erase(&m->tree_node, &grp->members);
310 grp->member_cnt--;
311
312 /* Check if we were waiting for replicast ack from this member */
313 if (grp->bc_ackers && less(m->bc_acked, grp->bc_snd_nxt - 1))
314 grp->bc_ackers--;
315
316 list_del_init(&m->list);
317 list_del_init(&m->congested);
318 tipc_group_decr_active(grp, m);
319
320 /* If last member on a node, remove node from dest list */
321 if (!tipc_group_find_node(grp, m->node))
322 tipc_nlist_del(&grp->dests, m->node);
323
324 kfree(m);
325}
326
327struct tipc_nlist *tipc_group_dests(struct tipc_group *grp)
328{
329 return &grp->dests;
330}
331
332void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq,
333 int *scope)
334{
335 seq->type = grp->type;
336 seq->lower = grp->instance;
337 seq->upper = grp->instance;
338 *scope = grp->scope;
339}
340
341void tipc_group_update_member(struct tipc_member *m, int len)
342{
343 struct tipc_group *grp = m->group;
344 struct tipc_member *_m, *tmp;
345
346 if (!tipc_group_is_enabled(m))
347 return;
348
349 m->window -= len;
350
351 if (m->window >= ADV_IDLE)
352 return;
353
354 if (!list_empty(&m->congested))
355 return;
356
357 /* Sort member into congested members' list */
358 list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
359 if (m->window > _m->window)
360 continue;
361 list_add_tail(&m->congested, &_m->congested);
362 return;
363 }
364 list_add_tail(&m->congested, &grp->congested);
365}
366
367void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
368{
369 u16 prev = grp->bc_snd_nxt - 1;
370 struct tipc_member *m;
371 struct rb_node *n;
372
373 for (n = rb_first(&grp->members); n; n = rb_next(n)) {
374 m = container_of(n, struct tipc_member, tree_node);
375 if (tipc_group_is_enabled(m)) {
376 tipc_group_update_member(m, len);
377 m->bc_acked = prev;
378 }
379 }
380
381 /* Mark number of acknowledges to expect, if any */
382 if (ack)
383 grp->bc_ackers = grp->member_cnt;
384 grp->bc_snd_nxt++;
385}
386
387bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
388 int len, struct tipc_member **mbr)
389{
390 struct sk_buff_head xmitq;
391 struct tipc_member *m;
392 int adv, state;
393
394 m = tipc_group_find_dest(grp, dnode, dport);
395 *mbr = m;
396 if (!m)
397 return false;
398 if (m->usr_pending)
399 return true;
400 if (m->window >= len)
401 return false;
402 m->usr_pending = true;
403
404 /* If not fully advertised, do it now to prevent mutual blocking */
405 adv = m->advertised;
406 state = m->state;
407 if (state < MBR_JOINED)
408 return true;
409 if (state == MBR_JOINED && adv == ADV_IDLE)
410 return true;
411 if (state == MBR_ACTIVE && adv == ADV_ACTIVE)
412 return true;
413 if (state == MBR_PENDING && adv == ADV_IDLE)
414 return true;
415 skb_queue_head_init(&xmitq);
416 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq);
417 tipc_node_distr_xmit(grp->net, &xmitq);
418 return true;
419}
420
421bool tipc_group_bc_cong(struct tipc_group *grp, int len)
422{
423 struct tipc_member *m = NULL;
424
425 /* If prev bcast was replicast, reject until all receivers have acked */
426 if (grp->bc_ackers)
427 return true;
428
429 if (list_empty(&grp->congested))
430 return false;
431
432 m = list_first_entry(&grp->congested, struct tipc_member, congested);
433 if (m->window >= len)
434 return false;
435
436 return tipc_group_cong(grp, m->node, m->port, len, &m);
437}
438
439/* tipc_group_sort_msg() - sort msg into queue by bcast sequence number
440 */
441static void tipc_group_sort_msg(struct sk_buff *skb, struct sk_buff_head *defq)
442{
443 struct tipc_msg *_hdr, *hdr = buf_msg(skb);
444 u16 bc_seqno = msg_grp_bc_seqno(hdr);
445 struct sk_buff *_skb, *tmp;
446 int mtyp = msg_type(hdr);
447
448 /* Bcast/mcast may be bypassed by ucast or other bcast, - sort it in */
449 if (mtyp == TIPC_GRP_BCAST_MSG || mtyp == TIPC_GRP_MCAST_MSG) {
450 skb_queue_walk_safe(defq, _skb, tmp) {
451 _hdr = buf_msg(_skb);
452 if (!less(bc_seqno, msg_grp_bc_seqno(_hdr)))
453 continue;
454 __skb_queue_before(defq, _skb, skb);
455 return;
456 }
457 /* Bcast was not bypassed, - add to tail */
458 }
459 /* Unicasts are never bypassed, - always add to tail */
460 __skb_queue_tail(defq, skb);
461}
462
463/* tipc_group_filter_msg() - determine if we should accept arriving message
464 */
465void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
466 struct sk_buff_head *xmitq)
467{
468 struct sk_buff *skb = __skb_dequeue(inputq);
469 bool ack, deliver, update, leave = false;
470 struct sk_buff_head *defq;
471 struct tipc_member *m;
472 struct tipc_msg *hdr;
473 u32 node, port;
474 int mtyp, blks;
475
476 if (!skb)
477 return;
478
479 hdr = buf_msg(skb);
480 node = msg_orignode(hdr);
481 port = msg_origport(hdr);
482
483 if (!msg_in_group(hdr))
484 goto drop;
485
486 m = tipc_group_find_member(grp, node, port);
487 if (!tipc_group_is_receiver(m))
488 goto drop;
489
490 if (less(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
491 goto drop;
492
493 TIPC_SKB_CB(skb)->orig_member = m->instance;
494 defq = &m->deferredq;
495 tipc_group_sort_msg(skb, defq);
496
497 while ((skb = skb_peek(defq))) {
498 hdr = buf_msg(skb);
499 mtyp = msg_type(hdr);
500 deliver = true;
501 ack = false;
502 update = false;
503
504 if (more(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
505 break;
506
507 /* Decide what to do with message */
508 switch (mtyp) {
509 case TIPC_GRP_MCAST_MSG:
510 if (msg_nameinst(hdr) != grp->instance) {
511 update = true;
512 deliver = false;
513 }
514 /* Fall thru */
515 case TIPC_GRP_BCAST_MSG:
516 m->bc_rcv_nxt++;
517 ack = msg_grp_bc_ack_req(hdr);
518 break;
519 case TIPC_GRP_UCAST_MSG:
520 break;
521 case TIPC_GRP_MEMBER_EVT:
522 if (m->state == MBR_LEAVING)
523 leave = true;
524 if (!grp->events)
525 deliver = false;
526 break;
527 default:
528 break;
529 }
530
531 /* Execute decisions */
532 __skb_dequeue(defq);
533 if (deliver)
534 __skb_queue_tail(inputq, skb);
535 else
536 kfree_skb(skb);
537
538 if (ack)
539 tipc_group_proto_xmit(grp, m, GRP_ACK_MSG, xmitq);
540
541 if (leave) {
542 __skb_queue_purge(defq);
543 tipc_group_delete_member(grp, m);
544 break;
545 }
546 if (!update)
547 continue;
548
549 blks = msg_blocks(hdr);
550 tipc_group_update_rcv_win(grp, blks, node, port, xmitq);
551 }
552 return;
553drop:
554 kfree_skb(skb);
555}
556
557void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
558 u32 port, struct sk_buff_head *xmitq)
559{
560 struct list_head *active = &grp->active;
561 int max_active = grp->max_active;
562 int reclaim_limit = max_active * 3 / 4;
563 int active_cnt = grp->active_cnt;
564 struct tipc_member *m, *rm;
565
566 m = tipc_group_find_member(grp, node, port);
567 if (!m)
568 return;
569
570 m->advertised -= blks;
571
572 switch (m->state) {
573 case MBR_JOINED:
574 /* Reclaim advertised space from least active member */
575 if (!list_empty(active) && active_cnt >= reclaim_limit) {
576 rm = list_first_entry(active, struct tipc_member, list);
577 rm->state = MBR_RECLAIMING;
578 list_move_tail(&rm->list, &grp->reclaiming);
579 tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq);
580 }
581 /* If max active, become pending and wait for reclaimed space */
582 if (active_cnt >= max_active) {
583 m->state = MBR_PENDING;
584 list_add_tail(&m->list, &grp->pending);
585 break;
586 }
587 /* Otherwise become active */
588 m->state = MBR_ACTIVE;
589 list_add_tail(&m->list, &grp->active);
590 grp->active_cnt++;
591 /* Fall through */
592 case MBR_ACTIVE:
593 if (!list_is_last(&m->list, &grp->active))
594 list_move_tail(&m->list, &grp->active);
595 if (m->advertised > (ADV_ACTIVE * 3 / 4))
596 break;
597 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
598 break;
599 case MBR_REMITTED:
600 if (m->advertised > ADV_IDLE)
601 break;
602 m->state = MBR_JOINED;
603 if (m->advertised < ADV_IDLE) {
604 pr_warn_ratelimited("Rcv unexpected msg after REMIT\n");
605 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
606 }
607 break;
608 case MBR_RECLAIMING:
609 case MBR_DISCOVERED:
610 case MBR_JOINING:
611 case MBR_LEAVING:
612 default:
613 break;
614 }
615}
616
617static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
618 int mtyp, struct sk_buff_head *xmitq)
619{
620 struct tipc_msg *hdr;
621 struct sk_buff *skb;
622 int adv = 0;
623
624 skb = tipc_msg_create(GROUP_PROTOCOL, mtyp, INT_H_SIZE, 0,
625 m->node, tipc_own_addr(grp->net),
626 m->port, grp->portid, 0);
627 if (!skb)
628 return;
629
630 if (m->state == MBR_ACTIVE)
631 adv = ADV_ACTIVE - m->advertised;
632 else if (m->state == MBR_JOINED || m->state == MBR_PENDING)
633 adv = ADV_IDLE - m->advertised;
634
635 hdr = buf_msg(skb);
636
637 if (mtyp == GRP_JOIN_MSG) {
638 msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt);
639 msg_set_adv_win(hdr, adv);
640 m->advertised += adv;
641 } else if (mtyp == GRP_LEAVE_MSG) {
642 msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt);
643 } else if (mtyp == GRP_ADV_MSG) {
644 msg_set_adv_win(hdr, adv);
645 m->advertised += adv;
646 } else if (mtyp == GRP_ACK_MSG) {
647 msg_set_grp_bc_acked(hdr, m->bc_rcv_nxt);
648 } else if (mtyp == GRP_REMIT_MSG) {
649 msg_set_grp_remitted(hdr, m->window);
650 }
651 __skb_queue_tail(xmitq, skb);
652}
653
654void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
655 struct tipc_msg *hdr, struct sk_buff_head *inputq,
656 struct sk_buff_head *xmitq)
657{
658 u32 node = msg_orignode(hdr);
659 u32 port = msg_origport(hdr);
660 struct tipc_member *m, *pm;
661 struct tipc_msg *ehdr;
662 u16 remitted, in_flight;
663
664 if (!grp)
665 return;
666
667 m = tipc_group_find_member(grp, node, port);
668
669 switch (msg_type(hdr)) {
670 case GRP_JOIN_MSG:
671 if (!m)
672 m = tipc_group_create_member(grp, node, port,
673 MBR_QUARANTINED);
674 if (!m)
675 return;
676 m->bc_syncpt = msg_grp_bc_syncpt(hdr);
677 m->bc_rcv_nxt = m->bc_syncpt;
678 m->window += msg_adv_win(hdr);
679
680 /* Wait until PUBLISH event is received */
681 if (m->state == MBR_DISCOVERED) {
682 m->state = MBR_JOINING;
683 } else if (m->state == MBR_PUBLISHED) {
684 m->state = MBR_JOINED;
685 *usr_wakeup = true;
686 m->usr_pending = false;
687 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
688 ehdr = buf_msg(m->event_msg);
689 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
690 __skb_queue_tail(inputq, m->event_msg);
691 }
692 if (m->window < ADV_IDLE)
693 tipc_group_update_member(m, 0);
694 else
695 list_del_init(&m->congested);
696 return;
697 case GRP_LEAVE_MSG:
698 if (!m)
699 return;
700 m->bc_syncpt = msg_grp_bc_syncpt(hdr);
701
702 /* Wait until WITHDRAW event is received */
703 if (m->state != MBR_LEAVING) {
704 tipc_group_decr_active(grp, m);
705 m->state = MBR_LEAVING;
706 return;
707 }
708 /* Otherwise deliver already received WITHDRAW event */
709 ehdr = buf_msg(m->event_msg);
710 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
711 __skb_queue_tail(inputq, m->event_msg);
712 *usr_wakeup = true;
713 list_del_init(&m->congested);
714 return;
715 case GRP_ADV_MSG:
716 if (!m)
717 return;
718 m->window += msg_adv_win(hdr);
719 *usr_wakeup = m->usr_pending;
720 m->usr_pending = false;
721 list_del_init(&m->congested);
722 return;
723 case GRP_ACK_MSG:
724 if (!m)
725 return;
726 m->bc_acked = msg_grp_bc_acked(hdr);
727 if (--grp->bc_ackers)
728 break;
729 *usr_wakeup = true;
730 m->usr_pending = false;
731 return;
732 case GRP_RECLAIM_MSG:
733 if (!m)
734 return;
735 *usr_wakeup = m->usr_pending;
736 m->usr_pending = false;
737 tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq);
738 m->window = ADV_IDLE;
739 return;
740 case GRP_REMIT_MSG:
741 if (!m || m->state != MBR_RECLAIMING)
742 return;
743
744 list_del_init(&m->list);
745 grp->active_cnt--;
746 remitted = msg_grp_remitted(hdr);
747
748 /* Messages preceding the REMIT still in receive queue */
749 if (m->advertised > remitted) {
750 m->state = MBR_REMITTED;
751 in_flight = m->advertised - remitted;
752 }
753 /* All messages preceding the REMIT have been read */
754 if (m->advertised <= remitted) {
755 m->state = MBR_JOINED;
756 in_flight = 0;
757 }
758 /* ..and the REMIT overtaken by more messages => re-advertise */
759 if (m->advertised < remitted)
760 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
761
762 m->advertised = ADV_IDLE + in_flight;
763
764 /* Set oldest pending member to active and advertise */
765 if (list_empty(&grp->pending))
766 return;
767 pm = list_first_entry(&grp->pending, struct tipc_member, list);
768 pm->state = MBR_ACTIVE;
769 list_move_tail(&pm->list, &grp->active);
770 grp->active_cnt++;
771 if (pm->advertised <= (ADV_ACTIVE * 3 / 4))
772 tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
773 return;
774 default:
775 pr_warn("Received unknown GROUP_PROTO message\n");
776 }
777}
778
779/* tipc_group_member_evt() - receive and handle a member up/down event
780 */
781void tipc_group_member_evt(struct tipc_group *grp,
782 bool *usr_wakeup,
783 int *sk_rcvbuf,
784 struct sk_buff *skb,
785 struct sk_buff_head *inputq,
786 struct sk_buff_head *xmitq)
787{
788 struct tipc_msg *hdr = buf_msg(skb);
789 struct tipc_event *evt = (void *)msg_data(hdr);
790 u32 instance = evt->found_lower;
791 u32 node = evt->port.node;
792 u32 port = evt->port.ref;
793 int event = evt->event;
794 struct tipc_member *m;
795 struct net *net;
796 bool node_up;
797 u32 self;
798
799 if (!grp)
800 goto drop;
801
802 net = grp->net;
803 self = tipc_own_addr(net);
804 if (!grp->loopback && node == self && port == grp->portid)
805 goto drop;
806
807 /* Convert message before delivery to user */
808 msg_set_hdr_sz(hdr, GROUP_H_SIZE);
809 msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE);
810 msg_set_type(hdr, TIPC_GRP_MEMBER_EVT);
811 msg_set_origport(hdr, port);
812 msg_set_orignode(hdr, node);
813 msg_set_nametype(hdr, grp->type);
814 msg_set_grp_evt(hdr, event);
815
816 m = tipc_group_find_member(grp, node, port);
817
818 if (event == TIPC_PUBLISHED) {
819 if (!m)
820 m = tipc_group_create_member(grp, node, port,
821 MBR_DISCOVERED);
822 if (!m)
823 goto drop;
824
825 /* Hold back event if JOIN message not yet received */
826 if (m->state == MBR_DISCOVERED) {
827 m->event_msg = skb;
828 m->state = MBR_PUBLISHED;
829 } else {
830 msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
831 __skb_queue_tail(inputq, skb);
832 m->state = MBR_JOINED;
833 *usr_wakeup = true;
834 m->usr_pending = false;
835 }
836 m->instance = instance;
837 TIPC_SKB_CB(skb)->orig_member = m->instance;
838 tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq);
839 if (m->window < ADV_IDLE)
840 tipc_group_update_member(m, 0);
841 else
842 list_del_init(&m->congested);
843 } else if (event == TIPC_WITHDRAWN) {
844 if (!m)
845 goto drop;
846
847 TIPC_SKB_CB(skb)->orig_member = m->instance;
848
849 *usr_wakeup = true;
850 m->usr_pending = false;
851 node_up = tipc_node_is_up(net, node);
852
853 /* Hold back event if more messages might be expected */
854 if (m->state != MBR_LEAVING && node_up) {
855 m->event_msg = skb;
856 tipc_group_decr_active(grp, m);
857 m->state = MBR_LEAVING;
858 } else {
859 if (node_up)
860 msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
861 else
862 msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
863 __skb_queue_tail(inputq, skb);
864 }
865 list_del_init(&m->congested);
866 }
867 *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
868 return;
869drop:
870 kfree_skb(skb);
871}
diff --git a/net/tipc/group.h b/net/tipc/group.h
new file mode 100644
index 000000000000..d525e1cd7de5
--- /dev/null
+++ b/net/tipc/group.h
@@ -0,0 +1,73 @@
1/*
2 * net/tipc/group.h: Include file for TIPC group unicast/multicast functions
3 *
4 * Copyright (c) 2017, Ericsson AB
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the names of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18 *
19 * Alternatively, this software may be distributed under the terms of the
20 * GNU General Public License ("GPL") version 2 as published by the Free
21 * Software Foundation.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36#ifndef _TIPC_GROUP_H
37#define _TIPC_GROUP_H
38
39#include "core.h"
40
41struct tipc_group;
42struct tipc_member;
43struct tipc_msg;
44
45struct tipc_group *tipc_group_create(struct net *net, u32 portid,
46 struct tipc_group_req *mreq);
47void tipc_group_delete(struct net *net, struct tipc_group *grp);
48void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port);
49struct tipc_nlist *tipc_group_dests(struct tipc_group *grp);
50void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq,
51 int *scope);
52u32 tipc_group_exclude(struct tipc_group *grp);
53void tipc_group_filter_msg(struct tipc_group *grp,
54 struct sk_buff_head *inputq,
55 struct sk_buff_head *xmitq);
56void tipc_group_member_evt(struct tipc_group *grp, bool *wakeup,
57 int *sk_rcvbuf, struct sk_buff *skb,
58 struct sk_buff_head *inputq,
59 struct sk_buff_head *xmitq);
60void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup,
61 struct tipc_msg *hdr,
62 struct sk_buff_head *inputq,
63 struct sk_buff_head *xmitq);
64void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack);
65bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
66 int len, struct tipc_member **m);
67bool tipc_group_bc_cong(struct tipc_group *grp, int len);
68void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
69 u32 port, struct sk_buff_head *xmitq);
70u16 tipc_group_bc_snd_nxt(struct tipc_group *grp);
71void tipc_group_update_member(struct tipc_member *m, int len);
72int tipc_group_size(struct tipc_group *grp);
73#endif
diff --git a/net/tipc/link.c b/net/tipc/link.c
index ac0144f532aa..6bce0b1117bd 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -239,7 +239,8 @@ static int link_is_up(struct tipc_link *l)
239static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, 239static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
240 struct sk_buff_head *xmitq); 240 struct sk_buff_head *xmitq);
241static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, 241static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
242 u16 rcvgap, int tolerance, int priority, 242 bool probe_reply, u16 rcvgap,
243 int tolerance, int priority,
243 struct sk_buff_head *xmitq); 244 struct sk_buff_head *xmitq);
244static void link_print(struct tipc_link *l, const char *str); 245static void link_print(struct tipc_link *l, const char *str);
245static int tipc_link_build_nack_msg(struct tipc_link *l, 246static int tipc_link_build_nack_msg(struct tipc_link *l,
@@ -773,7 +774,7 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
773 } 774 }
774 775
775 if (state || probe || setup) 776 if (state || probe || setup)
776 tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, xmitq); 777 tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, 0, xmitq);
777 778
778 return rc; 779 return rc;
779} 780}
@@ -1039,6 +1040,7 @@ int tipc_link_retrans(struct tipc_link *l, struct tipc_link *nacker,
1039static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, 1040static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
1040 struct sk_buff_head *inputq) 1041 struct sk_buff_head *inputq)
1041{ 1042{
1043 struct sk_buff_head *mc_inputq = l->bc_rcvlink->inputq;
1042 struct tipc_msg *hdr = buf_msg(skb); 1044 struct tipc_msg *hdr = buf_msg(skb);
1043 1045
1044 switch (msg_user(hdr)) { 1046 switch (msg_user(hdr)) {
@@ -1046,13 +1048,16 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
1046 case TIPC_MEDIUM_IMPORTANCE: 1048 case TIPC_MEDIUM_IMPORTANCE:
1047 case TIPC_HIGH_IMPORTANCE: 1049 case TIPC_HIGH_IMPORTANCE:
1048 case TIPC_CRITICAL_IMPORTANCE: 1050 case TIPC_CRITICAL_IMPORTANCE:
1049 if (unlikely(msg_type(hdr) == TIPC_MCAST_MSG)) { 1051 if (unlikely(msg_in_group(hdr) || msg_mcast(hdr))) {
1050 skb_queue_tail(l->bc_rcvlink->inputq, skb); 1052 skb_queue_tail(mc_inputq, skb);
1051 return true; 1053 return true;
1052 } 1054 }
1053 case CONN_MANAGER: 1055 case CONN_MANAGER:
1054 skb_queue_tail(inputq, skb); 1056 skb_queue_tail(inputq, skb);
1055 return true; 1057 return true;
1058 case GROUP_PROTOCOL:
1059 skb_queue_tail(mc_inputq, skb);
1060 return true;
1056 case NAME_DISTRIBUTOR: 1061 case NAME_DISTRIBUTOR:
1057 l->bc_rcvlink->state = LINK_ESTABLISHED; 1062 l->bc_rcvlink->state = LINK_ESTABLISHED;
1058 skb_queue_tail(l->namedq, skb); 1063 skb_queue_tail(l->namedq, skb);
@@ -1170,7 +1175,7 @@ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
1170 /* Unicast ACK */ 1175 /* Unicast ACK */
1171 l->rcv_unacked = 0; 1176 l->rcv_unacked = 0;
1172 l->stats.sent_acks++; 1177 l->stats.sent_acks++;
1173 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); 1178 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq);
1174 return 0; 1179 return 0;
1175} 1180}
1176 1181
@@ -1184,7 +1189,7 @@ void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
1184 if (l->state == LINK_ESTABLISHING) 1189 if (l->state == LINK_ESTABLISHING)
1185 mtyp = ACTIVATE_MSG; 1190 mtyp = ACTIVATE_MSG;
1186 1191
1187 tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq); 1192 tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, 0, xmitq);
1188 1193
1189 /* Inform peer that this endpoint is going down if applicable */ 1194 /* Inform peer that this endpoint is going down if applicable */
1190 skb = skb_peek_tail(xmitq); 1195 skb = skb_peek_tail(xmitq);
@@ -1211,7 +1216,7 @@ static int tipc_link_build_nack_msg(struct tipc_link *l,
1211 } 1216 }
1212 1217
1213 if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV)) 1218 if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV))
1214 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); 1219 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq);
1215 return 0; 1220 return 0;
1216} 1221}
1217 1222
@@ -1285,7 +1290,8 @@ drop:
1285} 1290}
1286 1291
1287static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, 1292static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
1288 u16 rcvgap, int tolerance, int priority, 1293 bool probe_reply, u16 rcvgap,
1294 int tolerance, int priority,
1289 struct sk_buff_head *xmitq) 1295 struct sk_buff_head *xmitq)
1290{ 1296{
1291 struct tipc_link *bcl = l->bc_rcvlink; 1297 struct tipc_link *bcl = l->bc_rcvlink;
@@ -1333,6 +1339,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
1333 msg_set_seq_gap(hdr, rcvgap); 1339 msg_set_seq_gap(hdr, rcvgap);
1334 msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl)); 1340 msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
1335 msg_set_probe(hdr, probe); 1341 msg_set_probe(hdr, probe);
1342 msg_set_is_keepalive(hdr, probe || probe_reply);
1336 tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id); 1343 tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id);
1337 msg_set_size(hdr, INT_H_SIZE + dlen); 1344 msg_set_size(hdr, INT_H_SIZE + dlen);
1338 skb_trim(skb, INT_H_SIZE + dlen); 1345 skb_trim(skb, INT_H_SIZE + dlen);
@@ -1438,6 +1445,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
1438 u16 rcv_nxt = l->rcv_nxt; 1445 u16 rcv_nxt = l->rcv_nxt;
1439 u16 dlen = msg_data_sz(hdr); 1446 u16 dlen = msg_data_sz(hdr);
1440 int mtyp = msg_type(hdr); 1447 int mtyp = msg_type(hdr);
1448 bool reply = msg_probe(hdr);
1441 void *data; 1449 void *data;
1442 char *if_name; 1450 char *if_name;
1443 int rc = 0; 1451 int rc = 0;
@@ -1524,9 +1532,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
1524 /* Send NACK if peer has sent pkts we haven't received yet */ 1532 /* Send NACK if peer has sent pkts we haven't received yet */
1525 if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l)) 1533 if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l))
1526 rcvgap = peers_snd_nxt - l->rcv_nxt; 1534 rcvgap = peers_snd_nxt - l->rcv_nxt;
1527 if (rcvgap || (msg_probe(hdr))) 1535 if (rcvgap || reply)
1528 tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap, 1536 tipc_link_build_proto_msg(l, STATE_MSG, 0, reply,
1529 0, 0, xmitq); 1537 rcvgap, 0, 0, xmitq);
1530 tipc_link_release_pkts(l, ack); 1538 tipc_link_release_pkts(l, ack);
1531 1539
1532 /* If NACK, retransmit will now start at right position */ 1540 /* If NACK, retransmit will now start at right position */
@@ -2118,14 +2126,14 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
2118 struct sk_buff_head *xmitq) 2126 struct sk_buff_head *xmitq)
2119{ 2127{
2120 l->tolerance = tol; 2128 l->tolerance = tol;
2121 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, tol, 0, xmitq); 2129 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
2122} 2130}
2123 2131
2124void tipc_link_set_prio(struct tipc_link *l, u32 prio, 2132void tipc_link_set_prio(struct tipc_link *l, u32 prio,
2125 struct sk_buff_head *xmitq) 2133 struct sk_buff_head *xmitq)
2126{ 2134{
2127 l->priority = prio; 2135 l->priority = prio;
2128 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, prio, xmitq); 2136 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, prio, xmitq);
2129} 2137}
2130 2138
2131void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit) 2139void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit)
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 9e109bb1a207..8e884ed06d4b 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -530,8 +530,11 @@ void tipc_mon_prep(struct net *net, void *data, int *dlen,
530 u16 gen = mon->dom_gen; 530 u16 gen = mon->dom_gen;
531 u16 len; 531 u16 len;
532 532
533 if (!tipc_mon_is_active(net, mon)) 533 /* Send invalid record if not active */
534 if (!tipc_mon_is_active(net, mon)) {
535 dom->len = 0;
534 return; 536 return;
537 }
535 538
536 /* Send only a dummy record with ack if peer has acked our last sent */ 539 /* Send only a dummy record with ack if peer has acked our last sent */
537 if (likely(state->acked_gen == gen)) { 540 if (likely(state->acked_gen == gen)) {
@@ -559,6 +562,12 @@ void tipc_mon_get_state(struct net *net, u32 addr,
559 struct tipc_monitor *mon = tipc_monitor(net, bearer_id); 562 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
560 struct tipc_peer *peer; 563 struct tipc_peer *peer;
561 564
565 if (!tipc_mon_is_active(net, mon)) {
566 state->probing = false;
567 state->monitoring = true;
568 return;
569 }
570
562 /* Used cached state if table has not changed */ 571 /* Used cached state if table has not changed */
563 if (!state->probing && 572 if (!state->probing &&
564 (state->list_gen == mon->list_gen) && 573 (state->list_gen == mon->list_gen) &&
@@ -578,9 +587,9 @@ void tipc_mon_get_state(struct net *net, u32 addr,
578 read_unlock_bh(&mon->lock); 587 read_unlock_bh(&mon->lock);
579} 588}
580 589
581static void mon_timeout(unsigned long m) 590static void mon_timeout(struct timer_list *t)
582{ 591{
583 struct tipc_monitor *mon = (void *)m; 592 struct tipc_monitor *mon = from_timer(mon, t, timer);
584 struct tipc_peer *self; 593 struct tipc_peer *self;
585 int best_member_cnt = dom_size(mon->peer_cnt) - 1; 594 int best_member_cnt = dom_size(mon->peer_cnt) - 1;
586 595
@@ -623,7 +632,7 @@ int tipc_mon_create(struct net *net, int bearer_id)
623 self->is_up = true; 632 self->is_up = true;
624 self->is_head = true; 633 self->is_head = true;
625 INIT_LIST_HEAD(&self->list); 634 INIT_LIST_HEAD(&self->list);
626 setup_timer(&mon->timer, mon_timeout, (unsigned long)mon); 635 timer_setup(&mon->timer, mon_timeout, 0);
627 mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff)); 636 mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff));
628 mod_timer(&mon->timer, jiffies + mon->timer_intv); 637 mod_timer(&mon->timer, jiffies + mon->timer_intv);
629 return 0; 638 return 0;
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 6ef379f004ac..b0d07b35909d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -174,7 +174,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
174 174
175 if (fragid == LAST_FRAGMENT) { 175 if (fragid == LAST_FRAGMENT) {
176 TIPC_SKB_CB(head)->validated = false; 176 TIPC_SKB_CB(head)->validated = false;
177 if (unlikely(!tipc_msg_validate(head))) 177 if (unlikely(!tipc_msg_validate(&head)))
178 goto err; 178 goto err;
179 *buf = head; 179 *buf = head;
180 TIPC_SKB_CB(head)->tail = NULL; 180 TIPC_SKB_CB(head)->tail = NULL;
@@ -201,11 +201,21 @@ err:
201 * TIPC will ignore the excess, under the assumption that it is optional info 201 * TIPC will ignore the excess, under the assumption that it is optional info
202 * introduced by a later release of the protocol. 202 * introduced by a later release of the protocol.
203 */ 203 */
204bool tipc_msg_validate(struct sk_buff *skb) 204bool tipc_msg_validate(struct sk_buff **_skb)
205{ 205{
206 struct tipc_msg *msg; 206 struct sk_buff *skb = *_skb;
207 struct tipc_msg *hdr;
207 int msz, hsz; 208 int msz, hsz;
208 209
210 /* Ensure that flow control ratio condition is satisfied */
211 if (unlikely(skb->truesize / buf_roundup_len(skb) > 4)) {
212 skb = skb_copy(skb, GFP_ATOMIC);
213 if (!skb)
214 return false;
215 kfree_skb(*_skb);
216 *_skb = skb;
217 }
218
209 if (unlikely(TIPC_SKB_CB(skb)->validated)) 219 if (unlikely(TIPC_SKB_CB(skb)->validated))
210 return true; 220 return true;
211 if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE))) 221 if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE)))
@@ -217,11 +227,11 @@ bool tipc_msg_validate(struct sk_buff *skb)
217 if (unlikely(!pskb_may_pull(skb, hsz))) 227 if (unlikely(!pskb_may_pull(skb, hsz)))
218 return false; 228 return false;
219 229
220 msg = buf_msg(skb); 230 hdr = buf_msg(skb);
221 if (unlikely(msg_version(msg) != TIPC_VERSION)) 231 if (unlikely(msg_version(hdr) != TIPC_VERSION))
222 return false; 232 return false;
223 233
224 msz = msg_size(msg); 234 msz = msg_size(hdr);
225 if (unlikely(msz < hsz)) 235 if (unlikely(msz < hsz))
226 return false; 236 return false;
227 if (unlikely((msz - hsz) > TIPC_MAX_USER_MSG_SIZE)) 237 if (unlikely((msz - hsz) > TIPC_MAX_USER_MSG_SIZE))
@@ -411,7 +421,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos)
411 skb_pull(*iskb, offset); 421 skb_pull(*iskb, offset);
412 imsz = msg_size(buf_msg(*iskb)); 422 imsz = msg_size(buf_msg(*iskb));
413 skb_trim(*iskb, imsz); 423 skb_trim(*iskb, imsz);
414 if (unlikely(!tipc_msg_validate(*iskb))) 424 if (unlikely(!tipc_msg_validate(iskb)))
415 goto none; 425 goto none;
416 *pos += align(imsz); 426 *pos += align(imsz);
417 return true; 427 return true;
@@ -551,7 +561,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
551 return false; 561 return false;
552 if (msg_errcode(msg)) 562 if (msg_errcode(msg))
553 return false; 563 return false;
554 *err = -TIPC_ERR_NO_NAME; 564 *err = TIPC_ERR_NO_NAME;
555 if (skb_linearize(skb)) 565 if (skb_linearize(skb))
556 return false; 566 return false;
557 msg = buf_msg(skb); 567 msg = buf_msg(skb);
@@ -568,6 +578,14 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
568 msg_set_destnode(msg, dnode); 578 msg_set_destnode(msg, dnode);
569 msg_set_destport(msg, dport); 579 msg_set_destport(msg, dport);
570 *err = TIPC_OK; 580 *err = TIPC_OK;
581
582 if (!skb_cloned(skb))
583 return true;
584
585 /* Unclone buffer in case it was bundled */
586 if (pskb_expand_head(skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC))
587 return false;
588
571 return true; 589 return true;
572} 590}
573 591
@@ -658,3 +676,10 @@ void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
658 } 676 }
659 kfree_skb(skb); 677 kfree_skb(skb);
660} 678}
679
680void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
681 struct sk_buff_head *xmitq)
682{
683 if (tipc_msg_reverse(tipc_own_addr(net), &skb, err))
684 __skb_queue_tail(xmitq, skb);
685}
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index c843fd2bc48d..3e4384c222f7 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * net/tipc/msg.h: Include file for TIPC message header routines 2 * net/tipc/msg.h: Include file for TIPC message header routines
3 * 3 *
4 * Copyright (c) 2000-2007, 2014-2015 Ericsson AB 4 * Copyright (c) 2000-2007, 2014-2017 Ericsson AB
5 * Copyright (c) 2005-2008, 2010-2011, Wind River Systems 5 * Copyright (c) 2005-2008, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
@@ -61,10 +61,14 @@ struct plist;
61/* 61/*
62 * Payload message types 62 * Payload message types
63 */ 63 */
64#define TIPC_CONN_MSG 0 64#define TIPC_CONN_MSG 0
65#define TIPC_MCAST_MSG 1 65#define TIPC_MCAST_MSG 1
66#define TIPC_NAMED_MSG 2 66#define TIPC_NAMED_MSG 2
67#define TIPC_DIRECT_MSG 3 67#define TIPC_DIRECT_MSG 3
68#define TIPC_GRP_MEMBER_EVT 4
69#define TIPC_GRP_BCAST_MSG 5
70#define TIPC_GRP_MCAST_MSG 6
71#define TIPC_GRP_UCAST_MSG 7
68 72
69/* 73/*
70 * Internal message users 74 * Internal message users
@@ -73,11 +77,13 @@ struct plist;
73#define MSG_BUNDLER 6 77#define MSG_BUNDLER 6
74#define LINK_PROTOCOL 7 78#define LINK_PROTOCOL 7
75#define CONN_MANAGER 8 79#define CONN_MANAGER 8
80#define GROUP_PROTOCOL 9
76#define TUNNEL_PROTOCOL 10 81#define TUNNEL_PROTOCOL 10
77#define NAME_DISTRIBUTOR 11 82#define NAME_DISTRIBUTOR 11
78#define MSG_FRAGMENTER 12 83#define MSG_FRAGMENTER 12
79#define LINK_CONFIG 13 84#define LINK_CONFIG 13
80#define SOCK_WAKEUP 14 /* pseudo user */ 85#define SOCK_WAKEUP 14 /* pseudo user */
86#define TOP_SRV 15 /* pseudo user */
81 87
82/* 88/*
83 * Message header sizes 89 * Message header sizes
@@ -86,6 +92,7 @@ struct plist;
86#define BASIC_H_SIZE 32 /* Basic payload message */ 92#define BASIC_H_SIZE 32 /* Basic payload message */
87#define NAMED_H_SIZE 40 /* Named payload message */ 93#define NAMED_H_SIZE 40 /* Named payload message */
88#define MCAST_H_SIZE 44 /* Multicast payload message */ 94#define MCAST_H_SIZE 44 /* Multicast payload message */
95#define GROUP_H_SIZE 44 /* Group payload message */
89#define INT_H_SIZE 40 /* Internal messages */ 96#define INT_H_SIZE 40 /* Internal messages */
90#define MIN_H_SIZE 24 /* Smallest legal TIPC header size */ 97#define MIN_H_SIZE 24 /* Smallest legal TIPC header size */
91#define MAX_H_SIZE 60 /* Largest possible TIPC header size */ 98#define MAX_H_SIZE 60 /* Largest possible TIPC header size */
@@ -96,6 +103,7 @@ struct plist;
96 103
97struct tipc_skb_cb { 104struct tipc_skb_cb {
98 u32 bytes_read; 105 u32 bytes_read;
106 u32 orig_member;
99 struct sk_buff *tail; 107 struct sk_buff *tail;
100 bool validated; 108 bool validated;
101 u16 chain_imp; 109 u16 chain_imp;
@@ -188,6 +196,11 @@ static inline u32 msg_size(struct tipc_msg *m)
188 return msg_bits(m, 0, 0, 0x1ffff); 196 return msg_bits(m, 0, 0, 0x1ffff);
189} 197}
190 198
199static inline u32 msg_blocks(struct tipc_msg *m)
200{
201 return (msg_size(m) / 1024) + 1;
202}
203
191static inline u32 msg_data_sz(struct tipc_msg *m) 204static inline u32 msg_data_sz(struct tipc_msg *m)
192{ 205{
193 return msg_size(m) - msg_hdr_sz(m); 206 return msg_size(m) - msg_hdr_sz(m);
@@ -213,6 +226,16 @@ static inline void msg_set_dest_droppable(struct tipc_msg *m, u32 d)
213 msg_set_bits(m, 0, 19, 1, d); 226 msg_set_bits(m, 0, 19, 1, d);
214} 227}
215 228
229static inline int msg_is_keepalive(struct tipc_msg *m)
230{
231 return msg_bits(m, 0, 19, 1);
232}
233
234static inline void msg_set_is_keepalive(struct tipc_msg *m, u32 d)
235{
236 msg_set_bits(m, 0, 19, 1, d);
237}
238
216static inline int msg_src_droppable(struct tipc_msg *m) 239static inline int msg_src_droppable(struct tipc_msg *m)
217{ 240{
218 return msg_bits(m, 0, 18, 1); 241 return msg_bits(m, 0, 18, 1);
@@ -251,6 +274,18 @@ static inline void msg_set_type(struct tipc_msg *m, u32 n)
251 msg_set_bits(m, 1, 29, 0x7, n); 274 msg_set_bits(m, 1, 29, 0x7, n);
252} 275}
253 276
277static inline int msg_in_group(struct tipc_msg *m)
278{
279 int mtyp = msg_type(m);
280
281 return mtyp >= TIPC_GRP_MEMBER_EVT && mtyp <= TIPC_GRP_UCAST_MSG;
282}
283
284static inline bool msg_is_grp_evt(struct tipc_msg *m)
285{
286 return msg_type(m) == TIPC_GRP_MEMBER_EVT;
287}
288
254static inline u32 msg_named(struct tipc_msg *m) 289static inline u32 msg_named(struct tipc_msg *m)
255{ 290{
256 return msg_type(m) == TIPC_NAMED_MSG; 291 return msg_type(m) == TIPC_NAMED_MSG;
@@ -258,7 +293,10 @@ static inline u32 msg_named(struct tipc_msg *m)
258 293
259static inline u32 msg_mcast(struct tipc_msg *m) 294static inline u32 msg_mcast(struct tipc_msg *m)
260{ 295{
261 return msg_type(m) == TIPC_MCAST_MSG; 296 int mtyp = msg_type(m);
297
298 return ((mtyp == TIPC_MCAST_MSG) || (mtyp == TIPC_GRP_BCAST_MSG) ||
299 (mtyp == TIPC_GRP_MCAST_MSG));
262} 300}
263 301
264static inline u32 msg_connected(struct tipc_msg *m) 302static inline u32 msg_connected(struct tipc_msg *m)
@@ -514,6 +552,16 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n)
514#define DSC_RESP_MSG 1 552#define DSC_RESP_MSG 1
515 553
516/* 554/*
555 * Group protocol message types
556 */
557#define GRP_JOIN_MSG 0
558#define GRP_LEAVE_MSG 1
559#define GRP_ADV_MSG 2
560#define GRP_ACK_MSG 3
561#define GRP_RECLAIM_MSG 4
562#define GRP_REMIT_MSG 5
563
564/*
517 * Word 1 565 * Word 1
518 */ 566 */
519static inline u32 msg_seq_gap(struct tipc_msg *m) 567static inline u32 msg_seq_gap(struct tipc_msg *m)
@@ -764,12 +812,12 @@ static inline void msg_set_conn_ack(struct tipc_msg *m, u32 n)
764 msg_set_bits(m, 9, 16, 0xffff, n); 812 msg_set_bits(m, 9, 16, 0xffff, n);
765} 813}
766 814
767static inline u32 msg_adv_win(struct tipc_msg *m) 815static inline u16 msg_adv_win(struct tipc_msg *m)
768{ 816{
769 return msg_bits(m, 9, 0, 0xffff); 817 return msg_bits(m, 9, 0, 0xffff);
770} 818}
771 819
772static inline void msg_set_adv_win(struct tipc_msg *m, u32 n) 820static inline void msg_set_adv_win(struct tipc_msg *m, u16 n)
773{ 821{
774 msg_set_bits(m, 9, 0, 0xffff, n); 822 msg_set_bits(m, 9, 0, 0xffff, n);
775} 823}
@@ -794,6 +842,68 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n)
794 msg_set_bits(m, 9, 0, 0xffff, n); 842 msg_set_bits(m, 9, 0, 0xffff, n);
795} 843}
796 844
845static inline u16 msg_grp_bc_syncpt(struct tipc_msg *m)
846{
847 return msg_bits(m, 9, 16, 0xffff);
848}
849
850static inline void msg_set_grp_bc_syncpt(struct tipc_msg *m, u16 n)
851{
852 msg_set_bits(m, 9, 16, 0xffff, n);
853}
854
855static inline u16 msg_grp_bc_acked(struct tipc_msg *m)
856{
857 return msg_bits(m, 9, 16, 0xffff);
858}
859
860static inline void msg_set_grp_bc_acked(struct tipc_msg *m, u16 n)
861{
862 msg_set_bits(m, 9, 16, 0xffff, n);
863}
864
865static inline u16 msg_grp_remitted(struct tipc_msg *m)
866{
867 return msg_bits(m, 9, 16, 0xffff);
868}
869
870static inline void msg_set_grp_remitted(struct tipc_msg *m, u16 n)
871{
872 msg_set_bits(m, 9, 16, 0xffff, n);
873}
874
875/* Word 10
876 */
877static inline u16 msg_grp_evt(struct tipc_msg *m)
878{
879 return msg_bits(m, 10, 0, 0x3);
880}
881
882static inline void msg_set_grp_evt(struct tipc_msg *m, int n)
883{
884 msg_set_bits(m, 10, 0, 0x3, n);
885}
886
887static inline u16 msg_grp_bc_ack_req(struct tipc_msg *m)
888{
889 return msg_bits(m, 10, 0, 0x1);
890}
891
892static inline void msg_set_grp_bc_ack_req(struct tipc_msg *m, bool n)
893{
894 msg_set_bits(m, 10, 0, 0x1, n);
895}
896
897static inline u16 msg_grp_bc_seqno(struct tipc_msg *m)
898{
899 return msg_bits(m, 10, 16, 0xffff);
900}
901
902static inline void msg_set_grp_bc_seqno(struct tipc_msg *m, u32 n)
903{
904 msg_set_bits(m, 10, 16, 0xffff, n);
905}
906
797static inline bool msg_peer_link_is_up(struct tipc_msg *m) 907static inline bool msg_peer_link_is_up(struct tipc_msg *m)
798{ 908{
799 if (likely(msg_user(m) != LINK_PROTOCOL)) 909 if (likely(msg_user(m) != LINK_PROTOCOL))
@@ -816,8 +926,10 @@ static inline bool msg_is_reset(struct tipc_msg *hdr)
816} 926}
817 927
818struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp); 928struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp);
819bool tipc_msg_validate(struct sk_buff *skb); 929bool tipc_msg_validate(struct sk_buff **_skb);
820bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err); 930bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err);
931void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
932 struct sk_buff_head *xmitq);
821void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type, 933void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type,
822 u32 hsize, u32 destnode); 934 u32 hsize, u32 destnode);
823struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, 935struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
@@ -842,6 +954,11 @@ static inline u16 buf_seqno(struct sk_buff *skb)
842 return msg_seqno(buf_msg(skb)); 954 return msg_seqno(buf_msg(skb));
843} 955}
844 956
957static inline int buf_roundup_len(struct sk_buff *skb)
958{
959 return (skb->len / 1024 + 1) * 1024;
960}
961
845/* tipc_skb_peek(): peek and reserve first buffer in list 962/* tipc_skb_peek(): peek and reserve first buffer in list
846 * @list: list to be peeked in 963 * @list: list to be peeked in
847 * Returns pointer to first buffer in list, if any 964 * Returns pointer to first buffer in list, if any
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index bd0aac87b41a..b3829bcf63c7 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -43,6 +43,7 @@
43#include "bcast.h" 43#include "bcast.h"
44#include "addr.h" 44#include "addr.h"
45#include "node.h" 45#include "node.h"
46#include "group.h"
46#include <net/genetlink.h> 47#include <net/genetlink.h>
47 48
48#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ 49#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */
@@ -596,18 +597,47 @@ not_found:
596 return ref; 597 return ref;
597} 598}
598 599
599/** 600bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
600 * tipc_nametbl_mc_translate - find multicast destinations 601 struct list_head *dsts, int *dstcnt, u32 exclude,
601 * 602 bool all)
602 * Creates list of all local ports that overlap the given multicast address; 603{
603 * also determines if any off-node ports overlap. 604 u32 self = tipc_own_addr(net);
604 * 605 struct publication *publ;
605 * Note: Publications with a scope narrower than 'limit' are ignored. 606 struct name_info *info;
606 * (i.e. local node-scope publications mustn't receive messages arriving 607 struct name_seq *seq;
607 * from another node, even if the multcast link brought it here) 608 struct sub_seq *sseq;
608 * 609
609 * Returns non-zero if any off-node ports overlap 610 if (!tipc_in_scope(domain, self))
610 */ 611 return false;
612
613 *dstcnt = 0;
614 rcu_read_lock();
615 seq = nametbl_find_seq(net, type);
616 if (unlikely(!seq))
617 goto exit;
618 spin_lock_bh(&seq->lock);
619 sseq = nameseq_find_subseq(seq, instance);
620 if (likely(sseq)) {
621 info = sseq->info;
622 list_for_each_entry(publ, &info->zone_list, zone_list) {
623 if (!tipc_in_scope(domain, publ->node))
624 continue;
625 if (publ->ref == exclude && publ->node == self)
626 continue;
627 tipc_dest_push(dsts, publ->node, publ->ref);
628 (*dstcnt)++;
629 if (all)
630 continue;
631 list_move_tail(&publ->zone_list, &info->zone_list);
632 break;
633 }
634 }
635 spin_unlock_bh(&seq->lock);
636exit:
637 rcu_read_unlock();
638 return !list_empty(dsts);
639}
640
611int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, 641int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
612 u32 limit, struct list_head *dports) 642 u32 limit, struct list_head *dports)
613{ 643{
@@ -634,7 +664,7 @@ int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
634 info = sseq->info; 664 info = sseq->info;
635 list_for_each_entry(publ, &info->node_list, node_list) { 665 list_for_each_entry(publ, &info->node_list, node_list) {
636 if (publ->scope <= limit) 666 if (publ->scope <= limit)
637 u32_push(dports, publ->ref); 667 tipc_dest_push(dports, 0, publ->ref);
638 } 668 }
639 669
640 if (info->cluster_list_size != info->node_list_size) 670 if (info->cluster_list_size != info->node_list_size)
@@ -667,7 +697,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
667 spin_lock_bh(&seq->lock); 697 spin_lock_bh(&seq->lock);
668 sseq = seq->sseqs + nameseq_locate_subseq(seq, lower); 698 sseq = seq->sseqs + nameseq_locate_subseq(seq, lower);
669 stop = seq->sseqs + seq->first_free; 699 stop = seq->sseqs + seq->first_free;
670 for (; sseq->lower <= upper && sseq != stop; sseq++) { 700 for (; sseq != stop && sseq->lower <= upper; sseq++) {
671 info = sseq->info; 701 info = sseq->info;
672 list_for_each_entry(publ, &info->zone_list, zone_list) { 702 list_for_each_entry(publ, &info->zone_list, zone_list) {
673 if (tipc_in_scope(domain, publ->node)) 703 if (tipc_in_scope(domain, publ->node))
@@ -679,6 +709,37 @@ exit:
679 rcu_read_unlock(); 709 rcu_read_unlock();
680} 710}
681 711
712/* tipc_nametbl_build_group - build list of communication group members
713 */
714void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
715 u32 type, u32 domain)
716{
717 struct sub_seq *sseq, *stop;
718 struct name_info *info;
719 struct publication *p;
720 struct name_seq *seq;
721
722 rcu_read_lock();
723 seq = nametbl_find_seq(net, type);
724 if (!seq)
725 goto exit;
726
727 spin_lock_bh(&seq->lock);
728 sseq = seq->sseqs;
729 stop = seq->sseqs + seq->first_free;
730 for (; sseq != stop; sseq++) {
731 info = sseq->info;
732 list_for_each_entry(p, &info->zone_list, zone_list) {
733 if (!tipc_in_scope(domain, p->node))
734 continue;
735 tipc_group_add_member(grp, p->node, p->ref);
736 }
737 }
738 spin_unlock_bh(&seq->lock);
739exit:
740 rcu_read_unlock();
741}
742
682/* 743/*
683 * tipc_nametbl_publish - add name publication to network name tables 744 * tipc_nametbl_publish - add name publication to network name tables
684 */ 745 */
@@ -1057,78 +1118,79 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
1057 return skb->len; 1118 return skb->len;
1058} 1119}
1059 1120
1060bool u32_find(struct list_head *l, u32 value) 1121struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port)
1061{ 1122{
1062 struct u32_item *item; 1123 u64 value = (u64)node << 32 | port;
1124 struct tipc_dest *dst;
1063 1125
1064 list_for_each_entry(item, l, list) { 1126 list_for_each_entry(dst, l, list) {
1065 if (item->value == value) 1127 if (dst->value != value)
1066 return true; 1128 continue;
1129 return dst;
1067 } 1130 }
1068 return false; 1131 return NULL;
1069} 1132}
1070 1133
1071bool u32_push(struct list_head *l, u32 value) 1134bool tipc_dest_push(struct list_head *l, u32 node, u32 port)
1072{ 1135{
1073 struct u32_item *item; 1136 u64 value = (u64)node << 32 | port;
1137 struct tipc_dest *dst;
1074 1138
1075 list_for_each_entry(item, l, list) { 1139 if (tipc_dest_find(l, node, port))
1076 if (item->value == value)
1077 return false;
1078 }
1079 item = kmalloc(sizeof(*item), GFP_ATOMIC);
1080 if (unlikely(!item))
1081 return false; 1140 return false;
1082 1141
1083 item->value = value; 1142 dst = kmalloc(sizeof(*dst), GFP_ATOMIC);
1084 list_add(&item->list, l); 1143 if (unlikely(!dst))
1144 return false;
1145 dst->value = value;
1146 list_add(&dst->list, l);
1085 return true; 1147 return true;
1086} 1148}
1087 1149
1088u32 u32_pop(struct list_head *l) 1150bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port)
1089{ 1151{
1090 struct u32_item *item; 1152 struct tipc_dest *dst;
1091 u32 value = 0;
1092 1153
1093 if (list_empty(l)) 1154 if (list_empty(l))
1094 return 0; 1155 return false;
1095 item = list_first_entry(l, typeof(*item), list); 1156 dst = list_first_entry(l, typeof(*dst), list);
1096 value = item->value; 1157 if (port)
1097 list_del(&item->list); 1158 *port = dst->port;
1098 kfree(item); 1159 if (node)
1099 return value; 1160 *node = dst->node;
1161 list_del(&dst->list);
1162 kfree(dst);
1163 return true;
1100} 1164}
1101 1165
1102bool u32_del(struct list_head *l, u32 value) 1166bool tipc_dest_del(struct list_head *l, u32 node, u32 port)
1103{ 1167{
1104 struct u32_item *item, *tmp; 1168 struct tipc_dest *dst;
1105 1169
1106 list_for_each_entry_safe(item, tmp, l, list) { 1170 dst = tipc_dest_find(l, node, port);
1107 if (item->value != value) 1171 if (!dst)
1108 continue; 1172 return false;
1109 list_del(&item->list); 1173 list_del(&dst->list);
1110 kfree(item); 1174 kfree(dst);
1111 return true; 1175 return true;
1112 }
1113 return false;
1114} 1176}
1115 1177
1116void u32_list_purge(struct list_head *l) 1178void tipc_dest_list_purge(struct list_head *l)
1117{ 1179{
1118 struct u32_item *item, *tmp; 1180 struct tipc_dest *dst, *tmp;
1119 1181
1120 list_for_each_entry_safe(item, tmp, l, list) { 1182 list_for_each_entry_safe(dst, tmp, l, list) {
1121 list_del(&item->list); 1183 list_del(&dst->list);
1122 kfree(item); 1184 kfree(dst);
1123 } 1185 }
1124} 1186}
1125 1187
1126int u32_list_len(struct list_head *l) 1188int tipc_dest_list_len(struct list_head *l)
1127{ 1189{
1128 struct u32_item *item; 1190 struct tipc_dest *dst;
1129 int i = 0; 1191 int i = 0;
1130 1192
1131 list_for_each_entry(item, l, list) { 1193 list_for_each_entry(dst, l, list) {
1132 i++; 1194 i++;
1133 } 1195 }
1134 return i; 1196 return i;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 6ebdeb1d84a5..71926e429446 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -40,6 +40,7 @@
40struct tipc_subscription; 40struct tipc_subscription;
41struct tipc_plist; 41struct tipc_plist;
42struct tipc_nlist; 42struct tipc_nlist;
43struct tipc_group;
43 44
44/* 45/*
45 * TIPC name types reserved for internal TIPC use (both current and planned) 46 * TIPC name types reserved for internal TIPC use (both current and planned)
@@ -101,9 +102,14 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
101u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node); 102u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
102int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, 103int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
103 u32 limit, struct list_head *dports); 104 u32 limit, struct list_head *dports);
105void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
106 u32 type, u32 domain);
104void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, 107void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
105 u32 upper, u32 domain, 108 u32 upper, u32 domain,
106 struct tipc_nlist *nodes); 109 struct tipc_nlist *nodes);
110bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
111 struct list_head *dsts, int *dstcnt, u32 exclude,
112 bool all);
107struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, 113struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
108 u32 upper, u32 scope, u32 port_ref, 114 u32 upper, u32 scope, u32 port_ref,
109 u32 key); 115 u32 key);
@@ -120,16 +126,22 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
120int tipc_nametbl_init(struct net *net); 126int tipc_nametbl_init(struct net *net);
121void tipc_nametbl_stop(struct net *net); 127void tipc_nametbl_stop(struct net *net);
122 128
123struct u32_item { 129struct tipc_dest {
124 struct list_head list; 130 struct list_head list;
125 u32 value; 131 union {
132 struct {
133 u32 port;
134 u32 node;
135 };
136 u64 value;
137 };
126}; 138};
127 139
128bool u32_push(struct list_head *l, u32 value); 140struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port);
129u32 u32_pop(struct list_head *l); 141bool tipc_dest_push(struct list_head *l, u32 node, u32 port);
130bool u32_find(struct list_head *l, u32 value); 142bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port);
131bool u32_del(struct list_head *l, u32 value); 143bool tipc_dest_del(struct list_head *l, u32 node, u32 port);
132void u32_list_purge(struct list_head *l); 144void tipc_dest_list_purge(struct list_head *l);
133int u32_list_len(struct list_head *l); 145int tipc_dest_list_len(struct list_head *l);
134 146
135#endif 147#endif
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 198dbc7adbe1..507017fe0f1b 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -153,11 +153,11 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id,
153 bool delete); 153 bool delete);
154static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq); 154static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq);
155static void tipc_node_delete(struct tipc_node *node); 155static void tipc_node_delete(struct tipc_node *node);
156static void tipc_node_timeout(unsigned long data); 156static void tipc_node_timeout(struct timer_list *t);
157static void tipc_node_fsm_evt(struct tipc_node *n, int evt); 157static void tipc_node_fsm_evt(struct tipc_node *n, int evt);
158static struct tipc_node *tipc_node_find(struct net *net, u32 addr); 158static struct tipc_node *tipc_node_find(struct net *net, u32 addr);
159static void tipc_node_put(struct tipc_node *node); 159static void tipc_node_put(struct tipc_node *node);
160static bool tipc_node_is_up(struct tipc_node *n); 160static bool node_is_up(struct tipc_node *n);
161 161
162struct tipc_sock_conn { 162struct tipc_sock_conn {
163 u32 port; 163 u32 port;
@@ -361,7 +361,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
361 goto exit; 361 goto exit;
362 } 362 }
363 tipc_node_get(n); 363 tipc_node_get(n);
364 setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n); 364 timer_setup(&n->timer, tipc_node_timeout, 0);
365 n->keepalive_intv = U32_MAX; 365 n->keepalive_intv = U32_MAX;
366 hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]); 366 hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
367 list_for_each_entry_rcu(temp_node, &tn->node_list, list) { 367 list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
@@ -500,9 +500,9 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
500 500
501/* tipc_node_timeout - handle expiration of node timer 501/* tipc_node_timeout - handle expiration of node timer
502 */ 502 */
503static void tipc_node_timeout(unsigned long data) 503static void tipc_node_timeout(struct timer_list *t)
504{ 504{
505 struct tipc_node *n = (struct tipc_node *)data; 505 struct tipc_node *n = from_timer(n, t, timer);
506 struct tipc_link_entry *le; 506 struct tipc_link_entry *le;
507 struct sk_buff_head xmitq; 507 struct sk_buff_head xmitq;
508 int bearer_id; 508 int bearer_id;
@@ -657,7 +657,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
657 *slot1 = i; 657 *slot1 = i;
658 } 658 }
659 659
660 if (!tipc_node_is_up(n)) { 660 if (!node_is_up(n)) {
661 if (tipc_link_peer_is_down(l)) 661 if (tipc_link_peer_is_down(l))
662 tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT); 662 tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT);
663 tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT); 663 tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT);
@@ -717,11 +717,27 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
717 tipc_sk_rcv(n->net, &le->inputq); 717 tipc_sk_rcv(n->net, &le->inputq);
718} 718}
719 719
720static bool tipc_node_is_up(struct tipc_node *n) 720static bool node_is_up(struct tipc_node *n)
721{ 721{
722 return n->active_links[0] != INVALID_BEARER_ID; 722 return n->active_links[0] != INVALID_BEARER_ID;
723} 723}
724 724
725bool tipc_node_is_up(struct net *net, u32 addr)
726{
727 struct tipc_node *n;
728 bool retval = false;
729
730 if (in_own_node(net, addr))
731 return true;
732
733 n = tipc_node_find(net, addr);
734 if (!n)
735 return false;
736 retval = node_is_up(n);
737 tipc_node_put(n);
738 return retval;
739}
740
725void tipc_node_check_dest(struct net *net, u32 onode, 741void tipc_node_check_dest(struct net *net, u32 onode,
726 struct tipc_bearer *b, 742 struct tipc_bearer *b,
727 u16 capabilities, u32 signature, 743 u16 capabilities, u32 signature,
@@ -1149,7 +1165,7 @@ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node)
1149 1165
1150 if (nla_put_u32(msg->skb, TIPC_NLA_NODE_ADDR, node->addr)) 1166 if (nla_put_u32(msg->skb, TIPC_NLA_NODE_ADDR, node->addr))
1151 goto attr_msg_full; 1167 goto attr_msg_full;
1152 if (tipc_node_is_up(node)) 1168 if (node_is_up(node))
1153 if (nla_put_flag(msg->skb, TIPC_NLA_NODE_UP)) 1169 if (nla_put_flag(msg->skb, TIPC_NLA_NODE_UP))
1154 goto attr_msg_full; 1170 goto attr_msg_full;
1155 1171
@@ -1238,6 +1254,22 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode,
1238 return 0; 1254 return 0;
1239} 1255}
1240 1256
1257/* tipc_node_distr_xmit(): send single buffer msgs to individual destinations
1258 * Note: this is only for SYSTEM_IMPORTANCE messages, which cannot be rejected
1259 */
1260int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *xmitq)
1261{
1262 struct sk_buff *skb;
1263 u32 selector, dnode;
1264
1265 while ((skb = __skb_dequeue(xmitq))) {
1266 selector = msg_origport(buf_msg(skb));
1267 dnode = msg_destnode(buf_msg(skb));
1268 tipc_node_xmit_skb(net, skb, dnode, selector);
1269 }
1270 return 0;
1271}
1272
1241void tipc_node_broadcast(struct net *net, struct sk_buff *skb) 1273void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
1242{ 1274{
1243 struct sk_buff *txskb; 1275 struct sk_buff *txskb;
@@ -1249,7 +1281,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
1249 dst = n->addr; 1281 dst = n->addr;
1250 if (in_own_node(net, dst)) 1282 if (in_own_node(net, dst))
1251 continue; 1283 continue;
1252 if (!tipc_node_is_up(n)) 1284 if (!node_is_up(n))
1253 continue; 1285 continue;
1254 txskb = pskb_copy(skb, GFP_ATOMIC); 1286 txskb = pskb_copy(skb, GFP_ATOMIC);
1255 if (!txskb) 1287 if (!txskb)
@@ -1507,7 +1539,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
1507 __skb_queue_head_init(&xmitq); 1539 __skb_queue_head_init(&xmitq);
1508 1540
1509 /* Ensure message is well-formed before touching the header */ 1541 /* Ensure message is well-formed before touching the header */
1510 if (unlikely(!tipc_msg_validate(skb))) 1542 if (unlikely(!tipc_msg_validate(&skb)))
1511 goto discard; 1543 goto discard;
1512 hdr = buf_msg(skb); 1544 hdr = buf_msg(skb);
1513 usr = msg_user(hdr); 1545 usr = msg_user(hdr);
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 898c22916984..acd58d23a70e 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -48,7 +48,8 @@ enum {
48 TIPC_BCAST_SYNCH = (1 << 1), 48 TIPC_BCAST_SYNCH = (1 << 1),
49 TIPC_BCAST_STATE_NACK = (1 << 2), 49 TIPC_BCAST_STATE_NACK = (1 << 2),
50 TIPC_BLOCK_FLOWCTL = (1 << 3), 50 TIPC_BLOCK_FLOWCTL = (1 << 3),
51 TIPC_BCAST_RCAST = (1 << 4) 51 TIPC_BCAST_RCAST = (1 << 4),
52 TIPC_MCAST_GROUPS = (1 << 5)
52}; 53};
53 54
54#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \ 55#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \
@@ -68,6 +69,7 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
68 char *linkname, size_t len); 69 char *linkname, size_t len);
69int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, 70int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
70 int selector); 71 int selector);
72int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *list);
71int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, 73int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
72 u32 selector); 74 u32 selector);
73void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr); 75void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr);
@@ -76,6 +78,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb);
76int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); 78int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
77void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); 79void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
78int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel); 80int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel);
81bool tipc_node_is_up(struct net *net, u32 addr);
79u16 tipc_node_get_capabilities(struct net *net, u32 addr); 82u16 tipc_node_get_capabilities(struct net *net, u32 addr);
80int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); 83int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb);
81int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb); 84int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb);
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 3cd6402e812c..acaef80fb88c 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -36,6 +36,8 @@
36#include "server.h" 36#include "server.h"
37#include "core.h" 37#include "core.h"
38#include "socket.h" 38#include "socket.h"
39#include "addr.h"
40#include "msg.h"
39#include <net/sock.h> 41#include <net/sock.h>
40#include <linux/module.h> 42#include <linux/module.h>
41 43
@@ -105,13 +107,11 @@ static void tipc_conn_kref_release(struct kref *kref)
105 kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); 107 kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr));
106 sock_release(sock); 108 sock_release(sock);
107 con->sock = NULL; 109 con->sock = NULL;
108
109 spin_lock_bh(&s->idr_lock);
110 idr_remove(&s->conn_idr, con->conid);
111 s->idr_in_use--;
112 spin_unlock_bh(&s->idr_lock);
113 } 110 }
114 111 spin_lock_bh(&s->idr_lock);
112 idr_remove(&s->conn_idr, con->conid);
113 s->idr_in_use--;
114 spin_unlock_bh(&s->idr_lock);
115 tipc_clean_outqueues(con); 115 tipc_clean_outqueues(con);
116 kfree(con); 116 kfree(con);
117} 117}
@@ -197,7 +197,8 @@ static void tipc_close_conn(struct tipc_conn *con)
197 struct tipc_server *s = con->server; 197 struct tipc_server *s = con->server;
198 198
199 if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { 199 if (test_and_clear_bit(CF_CONNECTED, &con->flags)) {
200 tipc_unregister_callbacks(con); 200 if (con->sock)
201 tipc_unregister_callbacks(con);
201 202
202 if (con->conid) 203 if (con->conid)
203 s->tipc_conn_release(con->conid, con->usr_data); 204 s->tipc_conn_release(con->conid, con->usr_data);
@@ -207,8 +208,8 @@ static void tipc_close_conn(struct tipc_conn *con)
207 * are harmless for us here as we have already deleted this 208 * are harmless for us here as we have already deleted this
208 * connection from server connection list. 209 * connection from server connection list.
209 */ 210 */
210 kernel_sock_shutdown(con->sock, SHUT_RDWR); 211 if (con->sock)
211 212 kernel_sock_shutdown(con->sock, SHUT_RDWR);
212 conn_put(con); 213 conn_put(con);
213 } 214 }
214} 215}
@@ -487,38 +488,104 @@ void tipc_conn_terminate(struct tipc_server *s, int conid)
487 } 488 }
488} 489}
489 490
491bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
492 u32 lower, u32 upper, int *conid)
493{
494 struct tipc_subscriber *scbr;
495 struct tipc_subscr sub;
496 struct tipc_server *s;
497 struct tipc_conn *con;
498
499 sub.seq.type = type;
500 sub.seq.lower = lower;
501 sub.seq.upper = upper;
502 sub.timeout = TIPC_WAIT_FOREVER;
503 sub.filter = TIPC_SUB_PORTS;
504 *(u32 *)&sub.usr_handle = port;
505
506 con = tipc_alloc_conn(tipc_topsrv(net));
507 if (IS_ERR(con))
508 return false;
509
510 *conid = con->conid;
511 s = con->server;
512 scbr = s->tipc_conn_new(*conid);
513 if (!scbr) {
514 tipc_close_conn(con);
515 return false;
516 }
517
518 con->usr_data = scbr;
519 con->sock = NULL;
520 s->tipc_conn_recvmsg(net, *conid, NULL, scbr, &sub, sizeof(sub));
521 return true;
522}
523
524void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
525{
526 struct tipc_conn *con;
527
528 con = tipc_conn_lookup(tipc_topsrv(net), conid);
529 if (!con)
530 return;
531 tipc_close_conn(con);
532 conn_put(con);
533}
534
535static void tipc_send_kern_top_evt(struct net *net, struct tipc_event *evt)
536{
537 u32 port = *(u32 *)&evt->s.usr_handle;
538 u32 self = tipc_own_addr(net);
539 struct sk_buff_head evtq;
540 struct sk_buff *skb;
541
542 skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
543 self, self, port, port, 0);
544 if (!skb)
545 return;
546 msg_set_dest_droppable(buf_msg(skb), true);
547 memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
548 skb_queue_head_init(&evtq);
549 __skb_queue_tail(&evtq, skb);
550 tipc_sk_rcv(net, &evtq);
551}
552
490static void tipc_send_to_sock(struct tipc_conn *con) 553static void tipc_send_to_sock(struct tipc_conn *con)
491{ 554{
492 int count = 0;
493 struct tipc_server *s = con->server; 555 struct tipc_server *s = con->server;
494 struct outqueue_entry *e; 556 struct outqueue_entry *e;
557 struct tipc_event *evt;
495 struct msghdr msg; 558 struct msghdr msg;
559 int count = 0;
496 int ret; 560 int ret;
497 561
498 spin_lock_bh(&con->outqueue_lock); 562 spin_lock_bh(&con->outqueue_lock);
499 while (test_bit(CF_CONNECTED, &con->flags)) { 563 while (test_bit(CF_CONNECTED, &con->flags)) {
500 e = list_entry(con->outqueue.next, struct outqueue_entry, 564 e = list_entry(con->outqueue.next, struct outqueue_entry, list);
501 list);
502 if ((struct list_head *) e == &con->outqueue) 565 if ((struct list_head *) e == &con->outqueue)
503 break; 566 break;
504 spin_unlock_bh(&con->outqueue_lock);
505 567
506 memset(&msg, 0, sizeof(msg)); 568 spin_unlock_bh(&con->outqueue_lock);
507 msg.msg_flags = MSG_DONTWAIT;
508 569
509 if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) { 570 if (con->sock) {
510 msg.msg_name = &e->dest; 571 memset(&msg, 0, sizeof(msg));
511 msg.msg_namelen = sizeof(struct sockaddr_tipc); 572 msg.msg_flags = MSG_DONTWAIT;
512 } 573 if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) {
513 ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1, 574 msg.msg_name = &e->dest;
514 e->iov.iov_len); 575 msg.msg_namelen = sizeof(struct sockaddr_tipc);
515 if (ret == -EWOULDBLOCK || ret == 0) { 576 }
516 cond_resched(); 577 ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
517 goto out; 578 e->iov.iov_len);
518 } else if (ret < 0) { 579 if (ret == -EWOULDBLOCK || ret == 0) {
519 goto send_err; 580 cond_resched();
581 goto out;
582 } else if (ret < 0) {
583 goto send_err;
584 }
585 } else {
586 evt = e->iov.iov_base;
587 tipc_send_kern_top_evt(s->net, evt);
520 } 588 }
521
522 /* Don't starve users filling buffers */ 589 /* Don't starve users filling buffers */
523 if (++count >= MAX_SEND_MSG_COUNT) { 590 if (++count >= MAX_SEND_MSG_COUNT) {
524 cond_resched(); 591 cond_resched();
diff --git a/net/tipc/server.h b/net/tipc/server.h
index 34f8055afa3b..2113c9192633 100644
--- a/net/tipc/server.h
+++ b/net/tipc/server.h
@@ -83,13 +83,16 @@ struct tipc_server {
83int tipc_conn_sendmsg(struct tipc_server *s, int conid, 83int tipc_conn_sendmsg(struct tipc_server *s, int conid,
84 struct sockaddr_tipc *addr, void *data, size_t len); 84 struct sockaddr_tipc *addr, void *data, size_t len);
85 85
86bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
87 u32 lower, u32 upper, int *conid);
88void tipc_topsrv_kern_unsubscr(struct net *net, int conid);
89
86/** 90/**
87 * tipc_conn_terminate - terminate connection with server 91 * tipc_conn_terminate - terminate connection with server
88 * 92 *
89 * Note: Must call it in process context since it might sleep 93 * Note: Must call it in process context since it might sleep
90 */ 94 */
91void tipc_conn_terminate(struct tipc_server *s, int conid); 95void tipc_conn_terminate(struct tipc_server *s, int conid);
92
93int tipc_server_start(struct tipc_server *s); 96int tipc_server_start(struct tipc_server *s);
94 97
95void tipc_server_stop(struct tipc_server *s); 98void tipc_server_stop(struct tipc_server *s);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index d50edd6e0019..5d18c0caa92b 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * net/tipc/socket.c: TIPC socket API 2 * net/tipc/socket.c: TIPC socket API
3 * 3 *
4 * Copyright (c) 2001-2007, 2012-2016, Ericsson AB 4 * Copyright (c) 2001-2007, 2012-2017, Ericsson AB
5 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems 5 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
@@ -45,9 +45,10 @@
45#include "socket.h" 45#include "socket.h"
46#include "bcast.h" 46#include "bcast.h"
47#include "netlink.h" 47#include "netlink.h"
48#include "group.h"
48 49
49#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ 50#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
50#define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */ 51#define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */
51#define TIPC_FWD_MSG 1 52#define TIPC_FWD_MSG 1
52#define TIPC_MAX_PORT 0xffffffff 53#define TIPC_MAX_PORT 0xffffffff
53#define TIPC_MIN_PORT 1 54#define TIPC_MIN_PORT 1
@@ -61,6 +62,11 @@ enum {
61 TIPC_CONNECTING = TCP_SYN_SENT, 62 TIPC_CONNECTING = TCP_SYN_SENT,
62}; 63};
63 64
65struct sockaddr_pair {
66 struct sockaddr_tipc sock;
67 struct sockaddr_tipc member;
68};
69
64/** 70/**
65 * struct tipc_sock - TIPC socket structure 71 * struct tipc_sock - TIPC socket structure
66 * @sk: socket - interacts with 'port' and with user via the socket API 72 * @sk: socket - interacts with 'port' and with user via the socket API
@@ -78,7 +84,7 @@ enum {
78 * @conn_timeout: the time we can wait for an unresponded setup request 84 * @conn_timeout: the time we can wait for an unresponded setup request
79 * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue 85 * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
80 * @cong_link_cnt: number of congested links 86 * @cong_link_cnt: number of congested links
81 * @sent_unacked: # messages sent by socket, and not yet acked by peer 87 * @snt_unacked: # messages sent by socket, and not yet acked by peer
82 * @rcv_unacked: # messages read by user, but not yet acked back to peer 88 * @rcv_unacked: # messages read by user, but not yet acked back to peer
83 * @peer: 'connected' peer for dgram/rdm 89 * @peer: 'connected' peer for dgram/rdm
84 * @node: hash table node 90 * @node: hash table node
@@ -109,20 +115,22 @@ struct tipc_sock {
109 struct rhash_head node; 115 struct rhash_head node;
110 struct tipc_mc_method mc_method; 116 struct tipc_mc_method mc_method;
111 struct rcu_head rcu; 117 struct rcu_head rcu;
118 struct tipc_group *group;
112}; 119};
113 120
114static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); 121static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
115static void tipc_data_ready(struct sock *sk); 122static void tipc_data_ready(struct sock *sk);
116static void tipc_write_space(struct sock *sk); 123static void tipc_write_space(struct sock *sk);
117static void tipc_sock_destruct(struct sock *sk); 124static void tipc_sock_destruct(struct sock *sk);
118static int tipc_release(struct socket *sock); 125static int tipc_release(struct socket *sock);
119static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, 126static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
120 bool kern); 127 bool kern);
121static void tipc_sk_timeout(unsigned long data); 128static void tipc_sk_timeout(struct timer_list *t);
122static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, 129static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
123 struct tipc_name_seq const *seq); 130 struct tipc_name_seq const *seq);
124static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, 131static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
125 struct tipc_name_seq const *seq); 132 struct tipc_name_seq const *seq);
133static int tipc_sk_leave(struct tipc_sock *tsk);
126static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); 134static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
127static int tipc_sk_insert(struct tipc_sock *tsk); 135static int tipc_sk_insert(struct tipc_sock *tsk);
128static void tipc_sk_remove(struct tipc_sock *tsk); 136static void tipc_sk_remove(struct tipc_sock *tsk);
@@ -193,6 +201,11 @@ static bool tsk_conn_cong(struct tipc_sock *tsk)
193 return tsk->snt_unacked > tsk->snd_win; 201 return tsk->snt_unacked > tsk->snd_win;
194} 202}
195 203
204static u16 tsk_blocks(int len)
205{
206 return ((len / FLOWCTL_BLK_SZ) + 1);
207}
208
196/* tsk_blocks(): translate a buffer size in bytes to number of 209/* tsk_blocks(): translate a buffer size in bytes to number of
197 * advertisable blocks, taking into account the ratio truesize(len)/len 210 * advertisable blocks, taking into account the ratio truesize(len)/len
198 * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ 211 * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
@@ -451,9 +464,9 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
451 NAMED_H_SIZE, 0); 464 NAMED_H_SIZE, 0);
452 465
453 msg_set_origport(msg, tsk->portid); 466 msg_set_origport(msg, tsk->portid);
454 setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); 467 timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
455 sk->sk_shutdown = 0; 468 sk->sk_shutdown = 0;
456 sk->sk_backlog_rcv = tipc_backlog_rcv; 469 sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
457 sk->sk_rcvbuf = sysctl_tipc_rmem[1]; 470 sk->sk_rcvbuf = sysctl_tipc_rmem[1];
458 sk->sk_data_ready = tipc_data_ready; 471 sk->sk_data_ready = tipc_data_ready;
459 sk->sk_write_space = tipc_write_space; 472 sk->sk_write_space = tipc_write_space;
@@ -559,13 +572,14 @@ static int tipc_release(struct socket *sock)
559 572
560 __tipc_shutdown(sock, TIPC_ERR_NO_PORT); 573 __tipc_shutdown(sock, TIPC_ERR_NO_PORT);
561 sk->sk_shutdown = SHUTDOWN_MASK; 574 sk->sk_shutdown = SHUTDOWN_MASK;
575 tipc_sk_leave(tsk);
562 tipc_sk_withdraw(tsk, 0, NULL); 576 tipc_sk_withdraw(tsk, 0, NULL);
563 sk_stop_timer(sk, &sk->sk_timer); 577 sk_stop_timer(sk, &sk->sk_timer);
564 tipc_sk_remove(tsk); 578 tipc_sk_remove(tsk);
565 579
566 /* Reject any messages that accumulated in backlog queue */ 580 /* Reject any messages that accumulated in backlog queue */
567 release_sock(sk); 581 release_sock(sk);
568 u32_list_purge(&tsk->cong_links); 582 tipc_dest_list_purge(&tsk->cong_links);
569 tsk->cong_link_cnt = 0; 583 tsk->cong_link_cnt = 0;
570 call_rcu(&tsk->rcu, tipc_sk_callback); 584 call_rcu(&tsk->rcu, tipc_sk_callback);
571 sock->sk = NULL; 585 sock->sk = NULL;
@@ -601,7 +615,10 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
601 res = tipc_sk_withdraw(tsk, 0, NULL); 615 res = tipc_sk_withdraw(tsk, 0, NULL);
602 goto exit; 616 goto exit;
603 } 617 }
604 618 if (tsk->group) {
619 res = -EACCES;
620 goto exit;
621 }
605 if (uaddr_len < sizeof(struct sockaddr_tipc)) { 622 if (uaddr_len < sizeof(struct sockaddr_tipc)) {
606 res = -EINVAL; 623 res = -EINVAL;
607 goto exit; 624 goto exit;
@@ -698,38 +715,41 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
698{ 715{
699 struct sock *sk = sock->sk; 716 struct sock *sk = sock->sk;
700 struct tipc_sock *tsk = tipc_sk(sk); 717 struct tipc_sock *tsk = tipc_sk(sk);
701 u32 mask = 0; 718 struct tipc_group *grp = tsk->group;
719 u32 revents = 0;
702 720
703 sock_poll_wait(file, sk_sleep(sk), wait); 721 sock_poll_wait(file, sk_sleep(sk), wait);
704 722
705 if (sk->sk_shutdown & RCV_SHUTDOWN) 723 if (sk->sk_shutdown & RCV_SHUTDOWN)
706 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 724 revents |= POLLRDHUP | POLLIN | POLLRDNORM;
707 if (sk->sk_shutdown == SHUTDOWN_MASK) 725 if (sk->sk_shutdown == SHUTDOWN_MASK)
708 mask |= POLLHUP; 726 revents |= POLLHUP;
709 727
710 switch (sk->sk_state) { 728 switch (sk->sk_state) {
711 case TIPC_ESTABLISHED: 729 case TIPC_ESTABLISHED:
712 if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) 730 if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
713 mask |= POLLOUT; 731 revents |= POLLOUT;
714 /* fall thru' */ 732 /* fall thru' */
715 case TIPC_LISTEN: 733 case TIPC_LISTEN:
716 case TIPC_CONNECTING: 734 case TIPC_CONNECTING:
717 if (!skb_queue_empty(&sk->sk_receive_queue)) 735 if (!skb_queue_empty(&sk->sk_receive_queue))
718 mask |= (POLLIN | POLLRDNORM); 736 revents |= POLLIN | POLLRDNORM;
719 break; 737 break;
720 case TIPC_OPEN: 738 case TIPC_OPEN:
721 if (!tsk->cong_link_cnt) 739 if (!grp || tipc_group_size(grp))
722 mask |= POLLOUT; 740 if (!tsk->cong_link_cnt)
723 if (tipc_sk_type_connectionless(sk) && 741 revents |= POLLOUT;
724 (!skb_queue_empty(&sk->sk_receive_queue))) 742 if (!tipc_sk_type_connectionless(sk))
725 mask |= (POLLIN | POLLRDNORM); 743 break;
744 if (skb_queue_empty(&sk->sk_receive_queue))
745 break;
746 revents |= POLLIN | POLLRDNORM;
726 break; 747 break;
727 case TIPC_DISCONNECTING: 748 case TIPC_DISCONNECTING:
728 mask = (POLLIN | POLLRDNORM | POLLHUP); 749 revents = POLLIN | POLLRDNORM | POLLHUP;
729 break; 750 break;
730 } 751 }
731 752 return revents;
732 return mask;
733} 753}
734 754
735/** 755/**
@@ -757,6 +777,9 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
757 struct tipc_nlist dsts; 777 struct tipc_nlist dsts;
758 int rc; 778 int rc;
759 779
780 if (tsk->group)
781 return -EACCES;
782
760 /* Block or return if any destination link is congested */ 783 /* Block or return if any destination link is congested */
761 rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt); 784 rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt);
762 if (unlikely(rc)) 785 if (unlikely(rc))
@@ -794,6 +817,296 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
794} 817}
795 818
796/** 819/**
820 * tipc_send_group_msg - send a message to a member in the group
821 * @net: network namespace
822 * @m: message to send
823 * @mb: group member
824 * @dnode: destination node
825 * @dport: destination port
826 * @dlen: total length of message data
827 */
828static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk,
829 struct msghdr *m, struct tipc_member *mb,
830 u32 dnode, u32 dport, int dlen)
831{
832 u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group);
833 struct tipc_mc_method *method = &tsk->mc_method;
834 int blks = tsk_blocks(GROUP_H_SIZE + dlen);
835 struct tipc_msg *hdr = &tsk->phdr;
836 struct sk_buff_head pkts;
837 int mtu, rc;
838
839 /* Complete message header */
840 msg_set_type(hdr, TIPC_GRP_UCAST_MSG);
841 msg_set_hdr_sz(hdr, GROUP_H_SIZE);
842 msg_set_destport(hdr, dport);
843 msg_set_destnode(hdr, dnode);
844 msg_set_grp_bc_seqno(hdr, bc_snd_nxt);
845
846 /* Build message as chain of buffers */
847 skb_queue_head_init(&pkts);
848 mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
849 rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
850 if (unlikely(rc != dlen))
851 return rc;
852
853 /* Send message */
854 rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
855 if (unlikely(rc == -ELINKCONG)) {
856 tipc_dest_push(&tsk->cong_links, dnode, 0);
857 tsk->cong_link_cnt++;
858 }
859
860 /* Update send window */
861 tipc_group_update_member(mb, blks);
862
863 /* A broadcast sent within next EXPIRE period must follow same path */
864 method->rcast = true;
865 method->mandatory = true;
866 return dlen;
867}
868
869/**
870 * tipc_send_group_unicast - send message to a member in the group
871 * @sock: socket structure
872 * @m: message to send
873 * @dlen: total length of message data
874 * @timeout: timeout to wait for wakeup
875 *
876 * Called from function tipc_sendmsg(), which has done all sanity checks
877 * Returns the number of bytes sent on success, or errno
878 */
879static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
880 int dlen, long timeout)
881{
882 struct sock *sk = sock->sk;
883 DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
884 int blks = tsk_blocks(GROUP_H_SIZE + dlen);
885 struct tipc_sock *tsk = tipc_sk(sk);
886 struct tipc_group *grp = tsk->group;
887 struct net *net = sock_net(sk);
888 struct tipc_member *mb = NULL;
889 u32 node, port;
890 int rc;
891
892 node = dest->addr.id.node;
893 port = dest->addr.id.ref;
894 if (!port && !node)
895 return -EHOSTUNREACH;
896
897 /* Block or return if destination link or member is congested */
898 rc = tipc_wait_for_cond(sock, &timeout,
899 !tipc_dest_find(&tsk->cong_links, node, 0) &&
900 !tipc_group_cong(grp, node, port, blks, &mb));
901 if (unlikely(rc))
902 return rc;
903
904 if (unlikely(!mb))
905 return -EHOSTUNREACH;
906
907 rc = tipc_send_group_msg(net, tsk, m, mb, node, port, dlen);
908
909 return rc ? rc : dlen;
910}
911
912/**
913 * tipc_send_group_anycast - send message to any member with given identity
914 * @sock: socket structure
915 * @m: message to send
916 * @dlen: total length of message data
917 * @timeout: timeout to wait for wakeup
918 *
919 * Called from function tipc_sendmsg(), which has done all sanity checks
920 * Returns the number of bytes sent on success, or errno
921 */
922static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
923 int dlen, long timeout)
924{
925 DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
926 struct sock *sk = sock->sk;
927 struct tipc_sock *tsk = tipc_sk(sk);
928 struct list_head *cong_links = &tsk->cong_links;
929 int blks = tsk_blocks(GROUP_H_SIZE + dlen);
930 struct tipc_group *grp = tsk->group;
931 struct tipc_member *first = NULL;
932 struct tipc_member *mbr = NULL;
933 struct net *net = sock_net(sk);
934 u32 node, port, exclude;
935 u32 type, inst, domain;
936 struct list_head dsts;
937 int lookups = 0;
938 int dstcnt, rc;
939 bool cong;
940
941 INIT_LIST_HEAD(&dsts);
942
943 type = dest->addr.name.name.type;
944 inst = dest->addr.name.name.instance;
945 domain = addr_domain(net, dest->scope);
946 exclude = tipc_group_exclude(grp);
947
948 while (++lookups < 4) {
949 first = NULL;
950
951 /* Look for a non-congested destination member, if any */
952 while (1) {
953 if (!tipc_nametbl_lookup(net, type, inst, domain, &dsts,
954 &dstcnt, exclude, false))
955 return -EHOSTUNREACH;
956 tipc_dest_pop(&dsts, &node, &port);
957 cong = tipc_group_cong(grp, node, port, blks, &mbr);
958 if (!cong)
959 break;
960 if (mbr == first)
961 break;
962 if (!first)
963 first = mbr;
964 }
965
966 /* Start over if destination was not in member list */
967 if (unlikely(!mbr))
968 continue;
969
970 if (likely(!cong && !tipc_dest_find(cong_links, node, 0)))
971 break;
972
973 /* Block or return if destination link or member is congested */
974 rc = tipc_wait_for_cond(sock, &timeout,
975 !tipc_dest_find(cong_links, node, 0) &&
976 !tipc_group_cong(grp, node, port,
977 blks, &mbr));
978 if (unlikely(rc))
979 return rc;
980
981 /* Send, unless destination disappeared while waiting */
982 if (likely(mbr))
983 break;
984 }
985
986 if (unlikely(lookups >= 4))
987 return -EHOSTUNREACH;
988
989 rc = tipc_send_group_msg(net, tsk, m, mbr, node, port, dlen);
990
991 return rc ? rc : dlen;
992}
993
994/**
995 * tipc_send_group_bcast - send message to all members in communication group
996 * @sk: socket structure
997 * @m: message to send
998 * @dlen: total length of message data
999 * @timeout: timeout to wait for wakeup
1000 *
1001 * Called from function tipc_sendmsg(), which has done all sanity checks
1002 * Returns the number of bytes sent on success, or errno
1003 */
1004static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
1005 int dlen, long timeout)
1006{
1007 DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
1008 struct sock *sk = sock->sk;
1009 struct net *net = sock_net(sk);
1010 struct tipc_sock *tsk = tipc_sk(sk);
1011 struct tipc_group *grp = tsk->group;
1012 struct tipc_nlist *dsts = tipc_group_dests(grp);
1013 struct tipc_mc_method *method = &tsk->mc_method;
1014 bool ack = method->mandatory && method->rcast;
1015 int blks = tsk_blocks(MCAST_H_SIZE + dlen);
1016 struct tipc_msg *hdr = &tsk->phdr;
1017 int mtu = tipc_bcast_get_mtu(net);
1018 struct sk_buff_head pkts;
1019 int rc = -EHOSTUNREACH;
1020
1021 if (!dsts->local && !dsts->remote)
1022 return -EHOSTUNREACH;
1023
1024 /* Block or return if any destination link or member is congested */
1025 rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt &&
1026 !tipc_group_bc_cong(grp, blks));
1027 if (unlikely(rc))
1028 return rc;
1029
1030 /* Complete message header */
1031 if (dest) {
1032 msg_set_type(hdr, TIPC_GRP_MCAST_MSG);
1033 msg_set_nameinst(hdr, dest->addr.name.name.instance);
1034 } else {
1035 msg_set_type(hdr, TIPC_GRP_BCAST_MSG);
1036 msg_set_nameinst(hdr, 0);
1037 }
1038 msg_set_hdr_sz(hdr, GROUP_H_SIZE);
1039 msg_set_destport(hdr, 0);
1040 msg_set_destnode(hdr, 0);
1041 msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp));
1042
1043 /* Avoid getting stuck with repeated forced replicasts */
1044 msg_set_grp_bc_ack_req(hdr, ack);
1045
1046 /* Build message as chain of buffers */
1047 skb_queue_head_init(&pkts);
1048 rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
1049 if (unlikely(rc != dlen))
1050 return rc;
1051
1052 /* Send message */
1053 rc = tipc_mcast_xmit(net, &pkts, method, dsts, &tsk->cong_link_cnt);
1054 if (unlikely(rc))
1055 return rc;
1056
1057 /* Update broadcast sequence number and send windows */
1058 tipc_group_update_bc_members(tsk->group, blks, ack);
1059
1060 /* Broadcast link is now free to choose method for next broadcast */
1061 method->mandatory = false;
1062 method->expires = jiffies;
1063
1064 return dlen;
1065}
1066
1067/**
1068 * tipc_send_group_mcast - send message to all members with given identity
1069 * @sock: socket structure
1070 * @m: message to send
1071 * @dlen: total length of message data
1072 * @timeout: timeout to wait for wakeup
1073 *
1074 * Called from function tipc_sendmsg(), which has done all sanity checks
1075 * Returns the number of bytes sent on success, or errno
1076 */
1077static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
1078 int dlen, long timeout)
1079{
1080 struct sock *sk = sock->sk;
1081 DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
1082 struct tipc_name_seq *seq = &dest->addr.nameseq;
1083 struct tipc_sock *tsk = tipc_sk(sk);
1084 struct tipc_group *grp = tsk->group;
1085 struct net *net = sock_net(sk);
1086 u32 domain, exclude, dstcnt;
1087 struct list_head dsts;
1088
1089 INIT_LIST_HEAD(&dsts);
1090
1091 if (seq->lower != seq->upper)
1092 return -ENOTSUPP;
1093
1094 domain = addr_domain(net, dest->scope);
1095 exclude = tipc_group_exclude(grp);
1096 if (!tipc_nametbl_lookup(net, seq->type, seq->lower, domain,
1097 &dsts, &dstcnt, exclude, true))
1098 return -EHOSTUNREACH;
1099
1100 if (dstcnt == 1) {
1101 tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref);
1102 return tipc_send_group_unicast(sock, m, dlen, timeout);
1103 }
1104
1105 tipc_dest_list_purge(&dsts);
1106 return tipc_send_group_bcast(sock, m, dlen, timeout);
1107}
1108
1109/**
797 * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets 1110 * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
798 * @arrvq: queue with arriving messages, to be cloned after destination lookup 1111 * @arrvq: queue with arriving messages, to be cloned after destination lookup
799 * @inputq: queue with cloned messages, delivered to socket after dest lookup 1112 * @inputq: queue with cloned messages, delivered to socket after dest lookup
@@ -803,13 +1116,15 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
803void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, 1116void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
804 struct sk_buff_head *inputq) 1117 struct sk_buff_head *inputq)
805{ 1118{
806 struct tipc_msg *msg;
807 struct list_head dports;
808 u32 portid;
809 u32 scope = TIPC_CLUSTER_SCOPE; 1119 u32 scope = TIPC_CLUSTER_SCOPE;
810 struct sk_buff_head tmpq; 1120 u32 self = tipc_own_addr(net);
811 uint hsz;
812 struct sk_buff *skb, *_skb; 1121 struct sk_buff *skb, *_skb;
1122 u32 lower = 0, upper = ~0;
1123 struct sk_buff_head tmpq;
1124 u32 portid, oport, onode;
1125 struct list_head dports;
1126 struct tipc_msg *msg;
1127 int user, mtyp, hsz;
813 1128
814 __skb_queue_head_init(&tmpq); 1129 __skb_queue_head_init(&tmpq);
815 INIT_LIST_HEAD(&dports); 1130 INIT_LIST_HEAD(&dports);
@@ -817,17 +1132,32 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
817 skb = tipc_skb_peek(arrvq, &inputq->lock); 1132 skb = tipc_skb_peek(arrvq, &inputq->lock);
818 for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { 1133 for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
819 msg = buf_msg(skb); 1134 msg = buf_msg(skb);
1135 user = msg_user(msg);
1136 mtyp = msg_type(msg);
1137 if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
1138 spin_lock_bh(&inputq->lock);
1139 if (skb_peek(arrvq) == skb) {
1140 __skb_dequeue(arrvq);
1141 __skb_queue_tail(inputq, skb);
1142 }
1143 refcount_dec(&skb->users);
1144 spin_unlock_bh(&inputq->lock);
1145 continue;
1146 }
820 hsz = skb_headroom(skb) + msg_hdr_sz(msg); 1147 hsz = skb_headroom(skb) + msg_hdr_sz(msg);
821 1148 oport = msg_origport(msg);
822 if (in_own_node(net, msg_orignode(msg))) 1149 onode = msg_orignode(msg);
1150 if (onode == self)
823 scope = TIPC_NODE_SCOPE; 1151 scope = TIPC_NODE_SCOPE;
824 1152
825 /* Create destination port list and message clones: */ 1153 /* Create destination port list and message clones: */
826 tipc_nametbl_mc_translate(net, 1154 if (!msg_in_group(msg)) {
827 msg_nametype(msg), msg_namelower(msg), 1155 lower = msg_namelower(msg);
828 msg_nameupper(msg), scope, &dports); 1156 upper = msg_nameupper(msg);
829 portid = u32_pop(&dports); 1157 }
830 for (; portid; portid = u32_pop(&dports)) { 1158 tipc_nametbl_mc_translate(net, msg_nametype(msg), lower, upper,
1159 scope, &dports);
1160 while (tipc_dest_pop(&dports, NULL, &portid)) {
831 _skb = __pskb_copy(skb, hsz, GFP_ATOMIC); 1161 _skb = __pskb_copy(skb, hsz, GFP_ATOMIC);
832 if (_skb) { 1162 if (_skb) {
833 msg_set_destport(buf_msg(_skb), portid); 1163 msg_set_destport(buf_msg(_skb), portid);
@@ -850,16 +1180,16 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
850} 1180}
851 1181
852/** 1182/**
853 * tipc_sk_proto_rcv - receive a connection mng protocol message 1183 * tipc_sk_conn_proto_rcv - receive a connection mng protocol message
854 * @tsk: receiving socket 1184 * @tsk: receiving socket
855 * @skb: pointer to message buffer. 1185 * @skb: pointer to message buffer.
856 */ 1186 */
857static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, 1187static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
858 struct sk_buff_head *xmitq) 1188 struct sk_buff_head *xmitq)
859{ 1189{
860 struct sock *sk = &tsk->sk;
861 u32 onode = tsk_own_node(tsk);
862 struct tipc_msg *hdr = buf_msg(skb); 1190 struct tipc_msg *hdr = buf_msg(skb);
1191 u32 onode = tsk_own_node(tsk);
1192 struct sock *sk = &tsk->sk;
863 int mtyp = msg_type(hdr); 1193 int mtyp = msg_type(hdr);
864 bool conn_cong; 1194 bool conn_cong;
865 1195
@@ -931,6 +1261,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
931 long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); 1261 long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
932 struct list_head *clinks = &tsk->cong_links; 1262 struct list_head *clinks = &tsk->cong_links;
933 bool syn = !tipc_sk_type_connectionless(sk); 1263 bool syn = !tipc_sk_type_connectionless(sk);
1264 struct tipc_group *grp = tsk->group;
934 struct tipc_msg *hdr = &tsk->phdr; 1265 struct tipc_msg *hdr = &tsk->phdr;
935 struct tipc_name_seq *seq; 1266 struct tipc_name_seq *seq;
936 struct sk_buff_head pkts; 1267 struct sk_buff_head pkts;
@@ -941,18 +1272,31 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
941 if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE)) 1272 if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
942 return -EMSGSIZE; 1273 return -EMSGSIZE;
943 1274
1275 if (likely(dest)) {
1276 if (unlikely(m->msg_namelen < sizeof(*dest)))
1277 return -EINVAL;
1278 if (unlikely(dest->family != AF_TIPC))
1279 return -EINVAL;
1280 }
1281
1282 if (grp) {
1283 if (!dest)
1284 return tipc_send_group_bcast(sock, m, dlen, timeout);
1285 if (dest->addrtype == TIPC_ADDR_NAME)
1286 return tipc_send_group_anycast(sock, m, dlen, timeout);
1287 if (dest->addrtype == TIPC_ADDR_ID)
1288 return tipc_send_group_unicast(sock, m, dlen, timeout);
1289 if (dest->addrtype == TIPC_ADDR_MCAST)
1290 return tipc_send_group_mcast(sock, m, dlen, timeout);
1291 return -EINVAL;
1292 }
1293
944 if (unlikely(!dest)) { 1294 if (unlikely(!dest)) {
945 dest = &tsk->peer; 1295 dest = &tsk->peer;
946 if (!syn || dest->family != AF_TIPC) 1296 if (!syn || dest->family != AF_TIPC)
947 return -EDESTADDRREQ; 1297 return -EDESTADDRREQ;
948 } 1298 }
949 1299
950 if (unlikely(m->msg_namelen < sizeof(*dest)))
951 return -EINVAL;
952
953 if (unlikely(dest->family != AF_TIPC))
954 return -EINVAL;
955
956 if (unlikely(syn)) { 1300 if (unlikely(syn)) {
957 if (sk->sk_state == TIPC_LISTEN) 1301 if (sk->sk_state == TIPC_LISTEN)
958 return -EPIPE; 1302 return -EPIPE;
@@ -985,7 +1329,6 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
985 msg_set_destport(hdr, dport); 1329 msg_set_destport(hdr, dport);
986 if (unlikely(!dport && !dnode)) 1330 if (unlikely(!dport && !dnode))
987 return -EHOSTUNREACH; 1331 return -EHOSTUNREACH;
988
989 } else if (dest->addrtype == TIPC_ADDR_ID) { 1332 } else if (dest->addrtype == TIPC_ADDR_ID) {
990 dnode = dest->addr.id.node; 1333 dnode = dest->addr.id.node;
991 msg_set_type(hdr, TIPC_DIRECT_MSG); 1334 msg_set_type(hdr, TIPC_DIRECT_MSG);
@@ -996,7 +1339,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
996 } 1339 }
997 1340
998 /* Block or return if destination link is congested */ 1341 /* Block or return if destination link is congested */
999 rc = tipc_wait_for_cond(sock, &timeout, !u32_find(clinks, dnode)); 1342 rc = tipc_wait_for_cond(sock, &timeout,
1343 !tipc_dest_find(clinks, dnode, 0));
1000 if (unlikely(rc)) 1344 if (unlikely(rc))
1001 return rc; 1345 return rc;
1002 1346
@@ -1008,7 +1352,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
1008 1352
1009 rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); 1353 rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
1010 if (unlikely(rc == -ELINKCONG)) { 1354 if (unlikely(rc == -ELINKCONG)) {
1011 u32_push(clinks, dnode); 1355 tipc_dest_push(clinks, dnode, 0);
1012 tsk->cong_link_cnt++; 1356 tsk->cong_link_cnt++;
1013 rc = 0; 1357 rc = 0;
1014 } 1358 }
@@ -1128,7 +1472,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
1128 msg_set_lookup_scope(msg, 0); 1472 msg_set_lookup_scope(msg, 0);
1129 msg_set_hdr_sz(msg, SHORT_H_SIZE); 1473 msg_set_hdr_sz(msg, SHORT_H_SIZE);
1130 1474
1131 sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); 1475 sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
1132 tipc_set_sk_state(sk, TIPC_ESTABLISHED); 1476 tipc_set_sk_state(sk, TIPC_ESTABLISHED);
1133 tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); 1477 tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
1134 tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); 1478 tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
@@ -1142,26 +1486,38 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
1142} 1486}
1143 1487
1144/** 1488/**
1145 * set_orig_addr - capture sender's address for received message 1489 * tipc_sk_set_orig_addr - capture sender's address for received message
1146 * @m: descriptor for message info 1490 * @m: descriptor for message info
1147 * @msg: received message header 1491 * @hdr: received message header
1148 * 1492 *
1149 * Note: Address is not captured if not requested by receiver. 1493 * Note: Address is not captured if not requested by receiver.
1150 */ 1494 */
1151static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg) 1495static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
1152{ 1496{
1153 DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name); 1497 DECLARE_SOCKADDR(struct sockaddr_pair *, srcaddr, m->msg_name);
1498 struct tipc_msg *hdr = buf_msg(skb);
1154 1499
1155 if (addr) { 1500 if (!srcaddr)
1156 addr->family = AF_TIPC; 1501 return;
1157 addr->addrtype = TIPC_ADDR_ID; 1502
1158 memset(&addr->addr, 0, sizeof(addr->addr)); 1503 srcaddr->sock.family = AF_TIPC;
1159 addr->addr.id.ref = msg_origport(msg); 1504 srcaddr->sock.addrtype = TIPC_ADDR_ID;
1160 addr->addr.id.node = msg_orignode(msg); 1505 srcaddr->sock.addr.id.ref = msg_origport(hdr);
1161 addr->addr.name.domain = 0; /* could leave uninitialized */ 1506 srcaddr->sock.addr.id.node = msg_orignode(hdr);
1162 addr->scope = 0; /* could leave uninitialized */ 1507 srcaddr->sock.addr.name.domain = 0;
1163 m->msg_namelen = sizeof(struct sockaddr_tipc); 1508 srcaddr->sock.scope = 0;
1164 } 1509 m->msg_namelen = sizeof(struct sockaddr_tipc);
1510
1511 if (!msg_in_group(hdr))
1512 return;
1513
1514 /* Group message users may also want to know sending member's id */
1515 srcaddr->member.family = AF_TIPC;
1516 srcaddr->member.addrtype = TIPC_ADDR_NAME;
1517 srcaddr->member.addr.name.name.type = msg_nametype(hdr);
1518 srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member;
1519 srcaddr->member.addr.name.domain = 0;
1520 m->msg_namelen = sizeof(*srcaddr);
1165} 1521}
1166 1522
1167/** 1523/**
@@ -1318,11 +1674,13 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
1318 size_t buflen, int flags) 1674 size_t buflen, int flags)
1319{ 1675{
1320 struct sock *sk = sock->sk; 1676 struct sock *sk = sock->sk;
1321 struct tipc_sock *tsk = tipc_sk(sk);
1322 struct sk_buff *skb;
1323 struct tipc_msg *hdr;
1324 bool connected = !tipc_sk_type_connectionless(sk); 1677 bool connected = !tipc_sk_type_connectionless(sk);
1678 struct tipc_sock *tsk = tipc_sk(sk);
1325 int rc, err, hlen, dlen, copy; 1679 int rc, err, hlen, dlen, copy;
1680 struct sk_buff_head xmitq;
1681 struct tipc_msg *hdr;
1682 struct sk_buff *skb;
1683 bool grp_evt;
1326 long timeout; 1684 long timeout;
1327 1685
1328 /* Catch invalid receive requests */ 1686 /* Catch invalid receive requests */
@@ -1336,8 +1694,8 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
1336 } 1694 }
1337 timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1695 timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1338 1696
1697 /* Step rcv queue to first msg with data or error; wait if necessary */
1339 do { 1698 do {
1340 /* Look at first msg in receive queue; wait if necessary */
1341 rc = tipc_wait_for_rcvmsg(sock, &timeout); 1699 rc = tipc_wait_for_rcvmsg(sock, &timeout);
1342 if (unlikely(rc)) 1700 if (unlikely(rc))
1343 goto exit; 1701 goto exit;
@@ -1346,13 +1704,14 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
1346 dlen = msg_data_sz(hdr); 1704 dlen = msg_data_sz(hdr);
1347 hlen = msg_hdr_sz(hdr); 1705 hlen = msg_hdr_sz(hdr);
1348 err = msg_errcode(hdr); 1706 err = msg_errcode(hdr);
1707 grp_evt = msg_is_grp_evt(hdr);
1349 if (likely(dlen || err)) 1708 if (likely(dlen || err))
1350 break; 1709 break;
1351 tsk_advance_rx_queue(sk); 1710 tsk_advance_rx_queue(sk);
1352 } while (1); 1711 } while (1);
1353 1712
1354 /* Collect msg meta data, including error code and rejected data */ 1713 /* Collect msg meta data, including error code and rejected data */
1355 set_orig_addr(m, hdr); 1714 tipc_sk_set_orig_addr(m, skb);
1356 rc = tipc_sk_anc_data_recv(m, hdr, tsk); 1715 rc = tipc_sk_anc_data_recv(m, hdr, tsk);
1357 if (unlikely(rc)) 1716 if (unlikely(rc))
1358 goto exit; 1717 goto exit;
@@ -1372,15 +1731,33 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
1372 if (unlikely(rc)) 1731 if (unlikely(rc))
1373 goto exit; 1732 goto exit;
1374 1733
1734 /* Mark message as group event if applicable */
1735 if (unlikely(grp_evt)) {
1736 if (msg_grp_evt(hdr) == TIPC_WITHDRAWN)
1737 m->msg_flags |= MSG_EOR;
1738 m->msg_flags |= MSG_OOB;
1739 copy = 0;
1740 }
1741
1375 /* Caption of data or error code/rejected data was successful */ 1742 /* Caption of data or error code/rejected data was successful */
1376 if (unlikely(flags & MSG_PEEK)) 1743 if (unlikely(flags & MSG_PEEK))
1377 goto exit; 1744 goto exit;
1378 1745
1746 /* Send group flow control advertisement when applicable */
1747 if (tsk->group && msg_in_group(hdr) && !grp_evt) {
1748 skb_queue_head_init(&xmitq);
1749 tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen),
1750 msg_orignode(hdr), msg_origport(hdr),
1751 &xmitq);
1752 tipc_node_distr_xmit(sock_net(sk), &xmitq);
1753 }
1754
1379 tsk_advance_rx_queue(sk); 1755 tsk_advance_rx_queue(sk);
1756
1380 if (likely(!connected)) 1757 if (likely(!connected))
1381 goto exit; 1758 goto exit;
1382 1759
1383 /* Send connection flow control ack when applicable */ 1760 /* Send connection flow control advertisement when applicable */
1384 tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen); 1761 tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
1385 if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE) 1762 if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
1386 tipc_sk_send_ack(tsk); 1763 tipc_sk_send_ack(tsk);
@@ -1446,7 +1823,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
1446 1823
1447 /* Collect msg meta data, incl. error code and rejected data */ 1824 /* Collect msg meta data, incl. error code and rejected data */
1448 if (!copied) { 1825 if (!copied) {
1449 set_orig_addr(m, hdr); 1826 tipc_sk_set_orig_addr(m, skb);
1450 rc = tipc_sk_anc_data_recv(m, hdr, tsk); 1827 rc = tipc_sk_anc_data_recv(m, hdr, tsk);
1451 if (rc) 1828 if (rc)
1452 break; 1829 break;
@@ -1532,14 +1909,51 @@ static void tipc_sock_destruct(struct sock *sk)
1532 __skb_queue_purge(&sk->sk_receive_queue); 1909 __skb_queue_purge(&sk->sk_receive_queue);
1533} 1910}
1534 1911
1912static void tipc_sk_proto_rcv(struct sock *sk,
1913 struct sk_buff_head *inputq,
1914 struct sk_buff_head *xmitq)
1915{
1916 struct sk_buff *skb = __skb_dequeue(inputq);
1917 struct tipc_sock *tsk = tipc_sk(sk);
1918 struct tipc_msg *hdr = buf_msg(skb);
1919 struct tipc_group *grp = tsk->group;
1920 bool wakeup = false;
1921
1922 switch (msg_user(hdr)) {
1923 case CONN_MANAGER:
1924 tipc_sk_conn_proto_rcv(tsk, skb, xmitq);
1925 return;
1926 case SOCK_WAKEUP:
1927 tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0);
1928 tsk->cong_link_cnt--;
1929 wakeup = true;
1930 break;
1931 case GROUP_PROTOCOL:
1932 tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq);
1933 break;
1934 case TOP_SRV:
1935 tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf,
1936 skb, inputq, xmitq);
1937 skb = NULL;
1938 break;
1939 default:
1940 break;
1941 }
1942
1943 if (wakeup)
1944 sk->sk_write_space(sk);
1945
1946 kfree_skb(skb);
1947}
1948
1535/** 1949/**
1536 * filter_connect - Handle all incoming messages for a connection-based socket 1950 * tipc_filter_connect - Handle incoming message for a connection-based socket
1537 * @tsk: TIPC socket 1951 * @tsk: TIPC socket
1538 * @skb: pointer to message buffer. Set to NULL if buffer is consumed 1952 * @skb: pointer to message buffer. Set to NULL if buffer is consumed
1539 * 1953 *
1540 * Returns true if everything ok, false otherwise 1954 * Returns true if everything ok, false otherwise
1541 */ 1955 */
1542static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) 1956static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
1543{ 1957{
1544 struct sock *sk = &tsk->sk; 1958 struct sock *sk = &tsk->sk;
1545 struct net *net = sock_net(sk); 1959 struct net *net = sock_net(sk);
@@ -1643,6 +2057,9 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
1643 struct tipc_sock *tsk = tipc_sk(sk); 2057 struct tipc_sock *tsk = tipc_sk(sk);
1644 struct tipc_msg *hdr = buf_msg(skb); 2058 struct tipc_msg *hdr = buf_msg(skb);
1645 2059
2060 if (unlikely(msg_in_group(hdr)))
2061 return sk->sk_rcvbuf;
2062
1646 if (unlikely(!msg_connected(hdr))) 2063 if (unlikely(!msg_connected(hdr)))
1647 return sk->sk_rcvbuf << msg_importance(hdr); 2064 return sk->sk_rcvbuf << msg_importance(hdr);
1648 2065
@@ -1653,7 +2070,7 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
1653} 2070}
1654 2071
1655/** 2072/**
1656 * filter_rcv - validate incoming message 2073 * tipc_sk_filter_rcv - validate incoming message
1657 * @sk: socket 2074 * @sk: socket
1658 * @skb: pointer to message. 2075 * @skb: pointer to message.
1659 * 2076 *
@@ -1662,99 +2079,71 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
1662 * 2079 *
1663 * Called with socket lock already taken 2080 * Called with socket lock already taken
1664 * 2081 *
1665 * Returns true if message was added to socket receive queue, otherwise false
1666 */ 2082 */
1667static bool filter_rcv(struct sock *sk, struct sk_buff *skb, 2083static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
1668 struct sk_buff_head *xmitq) 2084 struct sk_buff_head *xmitq)
1669{ 2085{
2086 bool sk_conn = !tipc_sk_type_connectionless(sk);
1670 struct tipc_sock *tsk = tipc_sk(sk); 2087 struct tipc_sock *tsk = tipc_sk(sk);
2088 struct tipc_group *grp = tsk->group;
1671 struct tipc_msg *hdr = buf_msg(skb); 2089 struct tipc_msg *hdr = buf_msg(skb);
1672 unsigned int limit = rcvbuf_limit(sk, skb); 2090 struct net *net = sock_net(sk);
1673 int err = TIPC_OK; 2091 struct sk_buff_head inputq;
1674 int usr = msg_user(hdr); 2092 int limit, err = TIPC_OK;
1675 u32 onode;
1676 2093
1677 if (unlikely(msg_user(hdr) == CONN_MANAGER)) { 2094 TIPC_SKB_CB(skb)->bytes_read = 0;
1678 tipc_sk_proto_rcv(tsk, skb, xmitq); 2095 __skb_queue_head_init(&inputq);
1679 return false; 2096 __skb_queue_tail(&inputq, skb);
1680 }
1681 2097
1682 if (unlikely(usr == SOCK_WAKEUP)) { 2098 if (unlikely(!msg_isdata(hdr)))
1683 onode = msg_orignode(hdr); 2099 tipc_sk_proto_rcv(sk, &inputq, xmitq);
1684 kfree_skb(skb);
1685 u32_del(&tsk->cong_links, onode);
1686 tsk->cong_link_cnt--;
1687 sk->sk_write_space(sk);
1688 return false;
1689 }
1690 2100
1691 /* Drop if illegal message type */ 2101 if (unlikely(grp))
1692 if (unlikely(msg_type(hdr) > TIPC_DIRECT_MSG)) { 2102 tipc_group_filter_msg(grp, &inputq, xmitq);
1693 kfree_skb(skb);
1694 return false;
1695 }
1696 2103
1697 /* Reject if wrong message type for current socket state */ 2104 /* Validate and add to receive buffer if there is space */
1698 if (tipc_sk_type_connectionless(sk)) { 2105 while ((skb = __skb_dequeue(&inputq))) {
1699 if (msg_connected(hdr)) { 2106 hdr = buf_msg(skb);
2107 limit = rcvbuf_limit(sk, skb);
2108 if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) ||
2109 (!sk_conn && msg_connected(hdr)) ||
2110 (!grp && msg_in_group(hdr)))
1700 err = TIPC_ERR_NO_PORT; 2111 err = TIPC_ERR_NO_PORT;
1701 goto reject; 2112 else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit)
1702 } 2113 err = TIPC_ERR_OVERLOAD;
1703 } else if (unlikely(!filter_connect(tsk, skb))) {
1704 err = TIPC_ERR_NO_PORT;
1705 goto reject;
1706 }
1707 2114
1708 /* Reject message if there isn't room to queue it */ 2115 if (unlikely(err)) {
1709 if (unlikely(sk_rmem_alloc_get(sk) + skb->truesize >= limit)) { 2116 tipc_skb_reject(net, err, skb, xmitq);
1710 err = TIPC_ERR_OVERLOAD; 2117 err = TIPC_OK;
1711 goto reject; 2118 continue;
2119 }
2120 __skb_queue_tail(&sk->sk_receive_queue, skb);
2121 skb_set_owner_r(skb, sk);
2122 sk->sk_data_ready(sk);
1712 } 2123 }
1713
1714 /* Enqueue message */
1715 TIPC_SKB_CB(skb)->bytes_read = 0;
1716 __skb_queue_tail(&sk->sk_receive_queue, skb);
1717 skb_set_owner_r(skb, sk);
1718
1719 sk->sk_data_ready(sk);
1720 return true;
1721
1722reject:
1723 if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err))
1724 __skb_queue_tail(xmitq, skb);
1725 return false;
1726} 2124}
1727 2125
1728/** 2126/**
1729 * tipc_backlog_rcv - handle incoming message from backlog queue 2127 * tipc_sk_backlog_rcv - handle incoming message from backlog queue
1730 * @sk: socket 2128 * @sk: socket
1731 * @skb: message 2129 * @skb: message
1732 * 2130 *
1733 * Caller must hold socket lock 2131 * Caller must hold socket lock
1734 *
1735 * Returns 0
1736 */ 2132 */
1737static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) 2133static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
1738{ 2134{
1739 unsigned int truesize = skb->truesize; 2135 unsigned int before = sk_rmem_alloc_get(sk);
1740 struct sk_buff_head xmitq; 2136 struct sk_buff_head xmitq;
1741 u32 dnode, selector; 2137 unsigned int added;
1742 2138
1743 __skb_queue_head_init(&xmitq); 2139 __skb_queue_head_init(&xmitq);
1744 2140
1745 if (likely(filter_rcv(sk, skb, &xmitq))) { 2141 tipc_sk_filter_rcv(sk, skb, &xmitq);
1746 atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt); 2142 added = sk_rmem_alloc_get(sk) - before;
1747 return 0; 2143 atomic_add(added, &tipc_sk(sk)->dupl_rcvcnt);
1748 }
1749 2144
1750 if (skb_queue_empty(&xmitq)) 2145 /* Send pending response/rejected messages, if any */
1751 return 0; 2146 tipc_node_distr_xmit(sock_net(sk), &xmitq);
1752
1753 /* Send response/rejected message */
1754 skb = __skb_dequeue(&xmitq);
1755 dnode = msg_destnode(buf_msg(skb));
1756 selector = msg_origport(buf_msg(skb));
1757 tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
1758 return 0; 2147 return 0;
1759} 2148}
1760 2149
@@ -1786,7 +2175,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
1786 2175
1787 /* Add message directly to receive queue if possible */ 2176 /* Add message directly to receive queue if possible */
1788 if (!sock_owned_by_user(sk)) { 2177 if (!sock_owned_by_user(sk)) {
1789 filter_rcv(sk, skb, xmitq); 2178 tipc_sk_filter_rcv(sk, skb, xmitq);
1790 continue; 2179 continue;
1791 } 2180 }
1792 2181
@@ -1833,14 +2222,10 @@ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
1833 spin_unlock_bh(&sk->sk_lock.slock); 2222 spin_unlock_bh(&sk->sk_lock.slock);
1834 } 2223 }
1835 /* Send pending response/rejected messages, if any */ 2224 /* Send pending response/rejected messages, if any */
1836 while ((skb = __skb_dequeue(&xmitq))) { 2225 tipc_node_distr_xmit(sock_net(sk), &xmitq);
1837 dnode = msg_destnode(buf_msg(skb));
1838 tipc_node_xmit_skb(net, skb, dnode, dport);
1839 }
1840 sock_put(sk); 2226 sock_put(sk);
1841 continue; 2227 continue;
1842 } 2228 }
1843
1844 /* No destination socket => dequeue skb if still there */ 2229 /* No destination socket => dequeue skb if still there */
1845 skb = tipc_skb_dequeue(inputq, dport); 2230 skb = tipc_skb_dequeue(inputq, dport);
1846 if (!skb) 2231 if (!skb)
@@ -1903,28 +2288,32 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
1903 int previous; 2288 int previous;
1904 int res = 0; 2289 int res = 0;
1905 2290
2291 if (destlen != sizeof(struct sockaddr_tipc))
2292 return -EINVAL;
2293
1906 lock_sock(sk); 2294 lock_sock(sk);
1907 2295
1908 /* DGRAM/RDM connect(), just save the destaddr */ 2296 if (tsk->group) {
1909 if (tipc_sk_type_connectionless(sk)) { 2297 res = -EINVAL;
1910 if (dst->family == AF_UNSPEC) {
1911 memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
1912 } else if (destlen != sizeof(struct sockaddr_tipc)) {
1913 res = -EINVAL;
1914 } else {
1915 memcpy(&tsk->peer, dest, destlen);
1916 }
1917 goto exit; 2298 goto exit;
1918 } 2299 }
1919 2300
1920 /* 2301 if (dst->family == AF_UNSPEC) {
1921 * Reject connection attempt using multicast address 2302 memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
1922 * 2303 if (!tipc_sk_type_connectionless(sk))
1923 * Note: send_msg() validates the rest of the address fields, 2304 res = -EINVAL;
1924 * so there's no need to do it here 2305 goto exit;
1925 */ 2306 } else if (dst->family != AF_TIPC) {
1926 if (dst->addrtype == TIPC_ADDR_MCAST) {
1927 res = -EINVAL; 2307 res = -EINVAL;
2308 }
2309 if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME)
2310 res = -EINVAL;
2311 if (res)
2312 goto exit;
2313
2314 /* DGRAM/RDM connect(), just save the destaddr */
2315 if (tipc_sk_type_connectionless(sk)) {
2316 memcpy(&tsk->peer, dest, destlen);
1928 goto exit; 2317 goto exit;
1929 } 2318 }
1930 2319
@@ -2141,46 +2530,43 @@ static int tipc_shutdown(struct socket *sock, int how)
2141 return res; 2530 return res;
2142} 2531}
2143 2532
2144static void tipc_sk_timeout(unsigned long data) 2533static void tipc_sk_timeout(struct timer_list *t)
2145{ 2534{
2146 struct tipc_sock *tsk = (struct tipc_sock *)data; 2535 struct sock *sk = from_timer(sk, t, sk_timer);
2147 struct sock *sk = &tsk->sk; 2536 struct tipc_sock *tsk = tipc_sk(sk);
2148 struct sk_buff *skb = NULL; 2537 u32 peer_port = tsk_peer_port(tsk);
2149 u32 peer_port, peer_node; 2538 u32 peer_node = tsk_peer_node(tsk);
2150 u32 own_node = tsk_own_node(tsk); 2539 u32 own_node = tsk_own_node(tsk);
2540 u32 own_port = tsk->portid;
2541 struct net *net = sock_net(sk);
2542 struct sk_buff *skb = NULL;
2151 2543
2152 bh_lock_sock(sk); 2544 bh_lock_sock(sk);
2153 if (!tipc_sk_connected(sk)) { 2545 if (!tipc_sk_connected(sk))
2154 bh_unlock_sock(sk); 2546 goto exit;
2547
2548 /* Try again later if socket is busy */
2549 if (sock_owned_by_user(sk)) {
2550 sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20);
2155 goto exit; 2551 goto exit;
2156 } 2552 }
2157 peer_port = tsk_peer_port(tsk);
2158 peer_node = tsk_peer_node(tsk);
2159 2553
2160 if (tsk->probe_unacked) { 2554 if (tsk->probe_unacked) {
2161 if (!sock_owned_by_user(sk)) { 2555 tipc_set_sk_state(sk, TIPC_DISCONNECTING);
2162 tipc_set_sk_state(sk, TIPC_DISCONNECTING); 2556 tipc_node_remove_conn(net, peer_node, peer_port);
2163 tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), 2557 sk->sk_state_change(sk);
2164 tsk_peer_port(tsk));
2165 sk->sk_state_change(sk);
2166 } else {
2167 /* Try again later */
2168 sk_reset_timer(sk, &sk->sk_timer, (HZ / 20));
2169 }
2170
2171 bh_unlock_sock(sk);
2172 goto exit; 2558 goto exit;
2173 } 2559 }
2174 2560 /* Send new probe */
2175 skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, 2561 skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0,
2176 INT_H_SIZE, 0, peer_node, own_node, 2562 peer_node, own_node, peer_port, own_port,
2177 peer_port, tsk->portid, TIPC_OK); 2563 TIPC_OK);
2178 tsk->probe_unacked = true; 2564 tsk->probe_unacked = true;
2179 sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); 2565 sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
2566exit:
2180 bh_unlock_sock(sk); 2567 bh_unlock_sock(sk);
2181 if (skb) 2568 if (skb)
2182 tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid); 2569 tipc_node_xmit_skb(net, skb, peer_node, own_port);
2183exit:
2184 sock_put(sk); 2570 sock_put(sk);
2185} 2571}
2186 2572
@@ -2345,6 +2731,58 @@ void tipc_sk_rht_destroy(struct net *net)
2345 rhashtable_destroy(&tn->sk_rht); 2731 rhashtable_destroy(&tn->sk_rht);
2346} 2732}
2347 2733
2734static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
2735{
2736 struct net *net = sock_net(&tsk->sk);
2737 u32 domain = addr_domain(net, mreq->scope);
2738 struct tipc_group *grp = tsk->group;
2739 struct tipc_msg *hdr = &tsk->phdr;
2740 struct tipc_name_seq seq;
2741 int rc;
2742
2743 if (mreq->type < TIPC_RESERVED_TYPES)
2744 return -EACCES;
2745 if (grp)
2746 return -EACCES;
2747 grp = tipc_group_create(net, tsk->portid, mreq);
2748 if (!grp)
2749 return -ENOMEM;
2750 tsk->group = grp;
2751 msg_set_lookup_scope(hdr, mreq->scope);
2752 msg_set_nametype(hdr, mreq->type);
2753 msg_set_dest_droppable(hdr, true);
2754 seq.type = mreq->type;
2755 seq.lower = mreq->instance;
2756 seq.upper = seq.lower;
2757 tipc_nametbl_build_group(net, grp, mreq->type, domain);
2758 rc = tipc_sk_publish(tsk, mreq->scope, &seq);
2759 if (rc) {
2760 tipc_group_delete(net, grp);
2761 tsk->group = NULL;
2762 }
2763
2764 /* Eliminate any risk that a broadcast overtakes the sent JOIN */
2765 tsk->mc_method.rcast = true;
2766 tsk->mc_method.mandatory = true;
2767 return rc;
2768}
2769
2770static int tipc_sk_leave(struct tipc_sock *tsk)
2771{
2772 struct net *net = sock_net(&tsk->sk);
2773 struct tipc_group *grp = tsk->group;
2774 struct tipc_name_seq seq;
2775 int scope;
2776
2777 if (!grp)
2778 return -EINVAL;
2779 tipc_group_self(grp, &seq, &scope);
2780 tipc_group_delete(net, grp);
2781 tsk->group = NULL;
2782 tipc_sk_withdraw(tsk, scope, &seq);
2783 return 0;
2784}
2785
2348/** 2786/**
2349 * tipc_setsockopt - set socket option 2787 * tipc_setsockopt - set socket option
2350 * @sock: socket structure 2788 * @sock: socket structure
@@ -2363,6 +2801,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
2363{ 2801{
2364 struct sock *sk = sock->sk; 2802 struct sock *sk = sock->sk;
2365 struct tipc_sock *tsk = tipc_sk(sk); 2803 struct tipc_sock *tsk = tipc_sk(sk);
2804 struct tipc_group_req mreq;
2366 u32 value = 0; 2805 u32 value = 0;
2367 int res = 0; 2806 int res = 0;
2368 2807
@@ -2378,9 +2817,14 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
2378 case TIPC_CONN_TIMEOUT: 2817 case TIPC_CONN_TIMEOUT:
2379 if (ol < sizeof(value)) 2818 if (ol < sizeof(value))
2380 return -EINVAL; 2819 return -EINVAL;
2381 res = get_user(value, (u32 __user *)ov); 2820 if (get_user(value, (u32 __user *)ov))
2382 if (res) 2821 return -EFAULT;
2383 return res; 2822 break;
2823 case TIPC_GROUP_JOIN:
2824 if (ol < sizeof(mreq))
2825 return -EINVAL;
2826 if (copy_from_user(&mreq, ov, sizeof(mreq)))
2827 return -EFAULT;
2384 break; 2828 break;
2385 default: 2829 default:
2386 if (ov || ol) 2830 if (ov || ol)
@@ -2413,6 +2857,12 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
2413 tsk->mc_method.rcast = true; 2857 tsk->mc_method.rcast = true;
2414 tsk->mc_method.mandatory = true; 2858 tsk->mc_method.mandatory = true;
2415 break; 2859 break;
2860 case TIPC_GROUP_JOIN:
2861 res = tipc_sk_join(tsk, &mreq);
2862 break;
2863 case TIPC_GROUP_LEAVE:
2864 res = tipc_sk_leave(tsk);
2865 break;
2416 default: 2866 default:
2417 res = -EINVAL; 2867 res = -EINVAL;
2418 } 2868 }
@@ -2440,7 +2890,8 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
2440{ 2890{
2441 struct sock *sk = sock->sk; 2891 struct sock *sk = sock->sk;
2442 struct tipc_sock *tsk = tipc_sk(sk); 2892 struct tipc_sock *tsk = tipc_sk(sk);
2443 int len; 2893 struct tipc_name_seq seq;
2894 int len, scope;
2444 u32 value; 2895 u32 value;
2445 int res; 2896 int res;
2446 2897
@@ -2474,6 +2925,12 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
2474 case TIPC_SOCK_RECVQ_DEPTH: 2925 case TIPC_SOCK_RECVQ_DEPTH:
2475 value = skb_queue_len(&sk->sk_receive_queue); 2926 value = skb_queue_len(&sk->sk_receive_queue);
2476 break; 2927 break;
2928 case TIPC_GROUP_JOIN:
2929 seq.type = 0;
2930 if (tsk->group)
2931 tipc_group_self(tsk->group, &seq, &scope);
2932 value = seq.type;
2933 break;
2477 default: 2934 default:
2478 res = -EINVAL; 2935 res = -EINVAL;
2479 } 2936 }
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index be3d9e3183dc..251065dfd8df 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -133,9 +133,9 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
133 node); 133 node);
134} 134}
135 135
136static void tipc_subscrp_timeout(unsigned long data) 136static void tipc_subscrp_timeout(struct timer_list *t)
137{ 137{
138 struct tipc_subscription *sub = (struct tipc_subscription *)data; 138 struct tipc_subscription *sub = from_timer(sub, t, timer);
139 struct tipc_subscriber *subscriber = sub->subscriber; 139 struct tipc_subscriber *subscriber = sub->subscriber;
140 140
141 spin_lock_bh(&subscriber->lock); 141 spin_lock_bh(&subscriber->lock);
@@ -303,7 +303,7 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
303 tipc_subscrb_get(subscriber); 303 tipc_subscrb_get(subscriber);
304 spin_unlock_bh(&subscriber->lock); 304 spin_unlock_bh(&subscriber->lock);
305 305
306 setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub); 306 timer_setup(&sub->timer, tipc_subscrp_timeout, 0);
307 timeout = htohl(sub->evt.s.timeout, swap); 307 timeout = htohl(sub->evt.s.timeout, swap);
308 308
309 if (timeout != TIPC_WAIT_FOREVER) 309 if (timeout != TIPC_WAIT_FOREVER)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 60aff60e30ad..e07ee3ae0023 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -45,8 +45,18 @@ MODULE_AUTHOR("Mellanox Technologies");
45MODULE_DESCRIPTION("Transport Layer Security Support"); 45MODULE_DESCRIPTION("Transport Layer Security Support");
46MODULE_LICENSE("Dual BSD/GPL"); 46MODULE_LICENSE("Dual BSD/GPL");
47 47
48static struct proto tls_base_prot; 48enum {
49static struct proto tls_sw_prot; 49 TLS_BASE_TX,
50 TLS_SW_TX,
51 TLS_NUM_CONFIG,
52};
53
54static struct proto tls_prots[TLS_NUM_CONFIG];
55
56static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx)
57{
58 sk->sk_prot = &tls_prots[ctx->tx_conf];
59}
50 60
51int wait_on_pending_writer(struct sock *sk, long *timeo) 61int wait_on_pending_writer(struct sock *sk, long *timeo)
52{ 62{
@@ -216,6 +226,12 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
216 void (*sk_proto_close)(struct sock *sk, long timeout); 226 void (*sk_proto_close)(struct sock *sk, long timeout);
217 227
218 lock_sock(sk); 228 lock_sock(sk);
229 sk_proto_close = ctx->sk_proto_close;
230
231 if (ctx->tx_conf == TLS_BASE_TX) {
232 kfree(ctx);
233 goto skip_tx_cleanup;
234 }
219 235
220 if (!tls_complete_pending_work(sk, ctx, 0, &timeo)) 236 if (!tls_complete_pending_work(sk, ctx, 0, &timeo))
221 tls_handle_open_record(sk, 0); 237 tls_handle_open_record(sk, 0);
@@ -232,13 +248,14 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
232 sg++; 248 sg++;
233 } 249 }
234 } 250 }
235 ctx->free_resources(sk); 251
236 kfree(ctx->rec_seq); 252 kfree(ctx->rec_seq);
237 kfree(ctx->iv); 253 kfree(ctx->iv);
238 254
239 sk_proto_close = ctx->sk_proto_close; 255 if (ctx->tx_conf == TLS_SW_TX)
240 kfree(ctx); 256 tls_sw_free_tx_resources(sk);
241 257
258skip_tx_cleanup:
242 release_sock(sk); 259 release_sock(sk);
243 sk_proto_close(sk, timeout); 260 sk_proto_close(sk, timeout);
244} 261}
@@ -338,46 +355,41 @@ static int tls_getsockopt(struct sock *sk, int level, int optname,
338static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, 355static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
339 unsigned int optlen) 356 unsigned int optlen)
340{ 357{
341 struct tls_crypto_info *crypto_info, tmp_crypto_info; 358 struct tls_crypto_info *crypto_info;
342 struct tls_context *ctx = tls_get_ctx(sk); 359 struct tls_context *ctx = tls_get_ctx(sk);
343 struct proto *prot = NULL;
344 int rc = 0; 360 int rc = 0;
361 int tx_conf;
345 362
346 if (!optval || (optlen < sizeof(*crypto_info))) { 363 if (!optval || (optlen < sizeof(*crypto_info))) {
347 rc = -EINVAL; 364 rc = -EINVAL;
348 goto out; 365 goto out;
349 } 366 }
350 367
351 rc = copy_from_user(&tmp_crypto_info, optval, sizeof(*crypto_info)); 368 crypto_info = &ctx->crypto_send;
369 /* Currently we don't support set crypto info more than one time */
370 if (TLS_CRYPTO_INFO_READY(crypto_info))
371 goto out;
372
373 rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info));
352 if (rc) { 374 if (rc) {
353 rc = -EFAULT; 375 rc = -EFAULT;
354 goto out; 376 goto out;
355 } 377 }
356 378
357 /* check version */ 379 /* check version */
358 if (tmp_crypto_info.version != TLS_1_2_VERSION) { 380 if (crypto_info->version != TLS_1_2_VERSION) {
359 rc = -ENOTSUPP; 381 rc = -ENOTSUPP;
360 goto out; 382 goto err_crypto_info;
361 } 383 }
362 384
363 /* get user crypto info */ 385 switch (crypto_info->cipher_type) {
364 crypto_info = &ctx->crypto_send;
365
366 /* Currently we don't support set crypto info more than one time */
367 if (TLS_CRYPTO_INFO_READY(crypto_info))
368 goto out;
369
370 switch (tmp_crypto_info.cipher_type) {
371 case TLS_CIPHER_AES_GCM_128: { 386 case TLS_CIPHER_AES_GCM_128: {
372 if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) { 387 if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) {
373 rc = -EINVAL; 388 rc = -EINVAL;
374 goto out; 389 goto out;
375 } 390 }
376 rc = copy_from_user( 391 rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info),
377 crypto_info, 392 optlen - sizeof(*crypto_info));
378 optval,
379 sizeof(struct tls12_crypto_info_aes_gcm_128));
380
381 if (rc) { 393 if (rc) {
382 rc = -EFAULT; 394 rc = -EFAULT;
383 goto err_crypto_info; 395 goto err_crypto_info;
@@ -389,18 +401,16 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
389 goto out; 401 goto out;
390 } 402 }
391 403
392 ctx->sk_write_space = sk->sk_write_space;
393 sk->sk_write_space = tls_write_space;
394
395 ctx->sk_proto_close = sk->sk_prot->close;
396
397 /* currently SW is default, we will have ethtool in future */ 404 /* currently SW is default, we will have ethtool in future */
398 rc = tls_set_sw_offload(sk, ctx); 405 rc = tls_set_sw_offload(sk, ctx);
399 prot = &tls_sw_prot; 406 tx_conf = TLS_SW_TX;
400 if (rc) 407 if (rc)
401 goto err_crypto_info; 408 goto err_crypto_info;
402 409
403 sk->sk_prot = prot; 410 ctx->tx_conf = tx_conf;
411 update_sk_prot(sk, ctx);
412 ctx->sk_write_space = sk->sk_write_space;
413 sk->sk_write_space = tls_write_space;
404 goto out; 414 goto out;
405 415
406err_crypto_info: 416err_crypto_info:
@@ -453,7 +463,10 @@ static int tls_init(struct sock *sk)
453 icsk->icsk_ulp_data = ctx; 463 icsk->icsk_ulp_data = ctx;
454 ctx->setsockopt = sk->sk_prot->setsockopt; 464 ctx->setsockopt = sk->sk_prot->setsockopt;
455 ctx->getsockopt = sk->sk_prot->getsockopt; 465 ctx->getsockopt = sk->sk_prot->getsockopt;
456 sk->sk_prot = &tls_base_prot; 466 ctx->sk_proto_close = sk->sk_prot->close;
467
468 ctx->tx_conf = TLS_BASE_TX;
469 update_sk_prot(sk, ctx);
457out: 470out:
458 return rc; 471 return rc;
459} 472}
@@ -464,16 +477,21 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
464 .init = tls_init, 477 .init = tls_init,
465}; 478};
466 479
480static void build_protos(struct proto *prot, struct proto *base)
481{
482 prot[TLS_BASE_TX] = *base;
483 prot[TLS_BASE_TX].setsockopt = tls_setsockopt;
484 prot[TLS_BASE_TX].getsockopt = tls_getsockopt;
485 prot[TLS_BASE_TX].close = tls_sk_proto_close;
486
487 prot[TLS_SW_TX] = prot[TLS_BASE_TX];
488 prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg;
489 prot[TLS_SW_TX].sendpage = tls_sw_sendpage;
490}
491
467static int __init tls_register(void) 492static int __init tls_register(void)
468{ 493{
469 tls_base_prot = tcp_prot; 494 build_protos(tls_prots, &tcp_prot);
470 tls_base_prot.setsockopt = tls_setsockopt;
471 tls_base_prot.getsockopt = tls_getsockopt;
472
473 tls_sw_prot = tls_base_prot;
474 tls_sw_prot.sendmsg = tls_sw_sendmsg;
475 tls_sw_prot.sendpage = tls_sw_sendpage;
476 tls_sw_prot.close = tls_sk_proto_close;
477 495
478 tcp_register_ulp(&tcp_tls_ulp_ops); 496 tcp_register_ulp(&tcp_tls_ulp_ops);
479 497
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 7d80040a37b6..73d19210dd49 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -39,22 +39,6 @@
39 39
40#include <net/tls.h> 40#include <net/tls.h>
41 41
42static inline void tls_make_aad(int recv,
43 char *buf,
44 size_t size,
45 char *record_sequence,
46 int record_sequence_size,
47 unsigned char record_type)
48{
49 memcpy(buf, record_sequence, record_sequence_size);
50
51 buf[8] = record_type;
52 buf[9] = TLS_1_2_VERSION_MAJOR;
53 buf[10] = TLS_1_2_VERSION_MINOR;
54 buf[11] = size >> 8;
55 buf[12] = size & 0xFF;
56}
57
58static void trim_sg(struct sock *sk, struct scatterlist *sg, 42static void trim_sg(struct sock *sk, struct scatterlist *sg,
59 int *sg_num_elem, unsigned int *sg_size, int target_size) 43 int *sg_num_elem, unsigned int *sg_size, int target_size)
60{ 44{
@@ -219,7 +203,7 @@ static int tls_do_encryption(struct tls_context *tls_ctx,
219 struct aead_request *aead_req; 203 struct aead_request *aead_req;
220 int rc; 204 int rc;
221 205
222 aead_req = kmalloc(req_size, flags); 206 aead_req = kzalloc(req_size, flags);
223 if (!aead_req) 207 if (!aead_req)
224 return -ENOMEM; 208 return -ENOMEM;
225 209
@@ -249,7 +233,7 @@ static int tls_push_record(struct sock *sk, int flags,
249 sg_mark_end(ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem - 1); 233 sg_mark_end(ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem - 1);
250 sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1); 234 sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1);
251 235
252 tls_make_aad(0, ctx->aad_space, ctx->sg_plaintext_size, 236 tls_make_aad(ctx->aad_space, ctx->sg_plaintext_size,
253 tls_ctx->rec_seq, tls_ctx->rec_seq_size, 237 tls_ctx->rec_seq, tls_ctx->rec_seq_size,
254 record_type); 238 record_type);
255 239
@@ -639,7 +623,7 @@ sendpage_end:
639 return ret; 623 return ret;
640} 624}
641 625
642static void tls_sw_free_resources(struct sock *sk) 626void tls_sw_free_tx_resources(struct sock *sk)
643{ 627{
644 struct tls_context *tls_ctx = tls_get_ctx(sk); 628 struct tls_context *tls_ctx = tls_get_ctx(sk);
645 struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); 629 struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
@@ -650,6 +634,7 @@ static void tls_sw_free_resources(struct sock *sk)
650 tls_free_both_sg(sk); 634 tls_free_both_sg(sk);
651 635
652 kfree(ctx); 636 kfree(ctx);
637 kfree(tls_ctx);
653} 638}
654 639
655int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) 640int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
@@ -679,7 +664,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
679 } 664 }
680 665
681 ctx->priv_ctx = (struct tls_offload_context *)sw_ctx; 666 ctx->priv_ctx = (struct tls_offload_context *)sw_ctx;
682 ctx->free_resources = tls_sw_free_resources;
683 667
684 crypto_info = &ctx->crypto_send; 668 crypto_info = &ctx->crypto_send;
685 switch (crypto_info->cipher_type) { 669 switch (crypto_info->cipher_type) {
diff --git a/net/unix/Makefile b/net/unix/Makefile
index b663c607b1c6..ffd0a275c3a7 100644
--- a/net/unix/Makefile
+++ b/net/unix/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the Linux unix domain socket layer. 3# Makefile for the Linux unix domain socket layer.
3# 4#
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 7f46bab4ce5c..a9ee634f3c42 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -814,6 +814,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
814 */ 814 */
815 case SOCK_RAW: 815 case SOCK_RAW:
816 sock->type = SOCK_DGRAM; 816 sock->type = SOCK_DGRAM;
817 /* fall through */
817 case SOCK_DGRAM: 818 case SOCK_DGRAM:
818 sock->ops = &unix_dgram_ops; 819 sock->ops = &unix_dgram_ops;
819 break; 820 break;
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 4d9679701a6d..384c84e83462 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -257,6 +257,8 @@ static int unix_diag_get_exact(struct sk_buff *in_skb,
257 err = -ENOENT; 257 err = -ENOENT;
258 if (sk == NULL) 258 if (sk == NULL)
259 goto out_nosk; 259 goto out_nosk;
260 if (!net_eq(sock_net(sk), net))
261 goto out;
260 262
261 err = sock_diag_check_cookie(sk, req->udiag_cookie); 263 err = sock_diag_check_cookie(sk, req->udiag_cookie);
262 if (err) 264 if (err)
diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig
index a24369d175fd..970f96489fe7 100644
--- a/net/vmw_vsock/Kconfig
+++ b/net/vmw_vsock/Kconfig
@@ -15,6 +15,16 @@ config VSOCKETS
15 To compile this driver as a module, choose M here: the module 15 To compile this driver as a module, choose M here: the module
16 will be called vsock. If unsure, say N. 16 will be called vsock. If unsure, say N.
17 17
18config VSOCKETS_DIAG
19 tristate "Virtual Sockets monitoring interface"
20 depends on VSOCKETS
21 default y
22 help
23 Support for PF_VSOCK sockets monitoring interface used by the ss tool.
24 If unsure, say Y.
25
26 Enable this module so userspace applications can query open sockets.
27
18config VMWARE_VMCI_VSOCKETS 28config VMWARE_VMCI_VSOCKETS
19 tristate "VMware VMCI transport for Virtual Sockets" 29 tristate "VMware VMCI transport for Virtual Sockets"
20 depends on VSOCKETS && VMWARE_VMCI 30 depends on VSOCKETS && VMWARE_VMCI
diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile
index e63d574234a9..7c6f9a0b67b0 100644
--- a/net/vmw_vsock/Makefile
+++ b/net/vmw_vsock/Makefile
@@ -1,4 +1,6 @@
1# SPDX-License-Identifier: GPL-2.0
1obj-$(CONFIG_VSOCKETS) += vsock.o 2obj-$(CONFIG_VSOCKETS) += vsock.o
3obj-$(CONFIG_VSOCKETS_DIAG) += vsock_diag.o
2obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o 4obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
3obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o 5obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o
4obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o 6obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o
@@ -6,6 +8,8 @@ obj-$(CONFIG_HYPERV_VSOCKETS) += hv_sock.o
6 8
7vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o 9vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o
8 10
11vsock_diag-y += diag.o
12
9vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \ 13vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \
10 vmci_transport_notify_qstate.o 14 vmci_transport_notify_qstate.o
11 15
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index dfc8c51e4d74..5d28abf87fbf 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -36,7 +36,7 @@
36 * not support simultaneous connects (two "client" sockets connecting). 36 * not support simultaneous connects (two "client" sockets connecting).
37 * 37 *
38 * - "Server" sockets are referred to as listener sockets throughout this 38 * - "Server" sockets are referred to as listener sockets throughout this
39 * implementation because they are in the VSOCK_SS_LISTEN state. When a 39 * implementation because they are in the TCP_LISTEN state. When a
40 * connection request is received (the second kind of socket mentioned above), 40 * connection request is received (the second kind of socket mentioned above),
41 * we create a new socket and refer to it as a pending socket. These pending 41 * we create a new socket and refer to it as a pending socket. These pending
42 * sockets are placed on the pending connection list of the listener socket. 42 * sockets are placed on the pending connection list of the listener socket.
@@ -82,6 +82,15 @@
82 * argument, we must ensure the reference count is increased to ensure the 82 * argument, we must ensure the reference count is increased to ensure the
83 * socket isn't freed before the function is run; the deferred function will 83 * socket isn't freed before the function is run; the deferred function will
84 * then drop the reference. 84 * then drop the reference.
85 *
86 * - sk->sk_state uses the TCP state constants because they are widely used by
87 * other address families and exposed to userspace tools like ss(8):
88 *
89 * TCP_CLOSE - unconnected
90 * TCP_SYN_SENT - connecting
91 * TCP_ESTABLISHED - connected
92 * TCP_CLOSING - disconnecting
93 * TCP_LISTEN - listening
85 */ 94 */
86 95
87#include <linux/types.h> 96#include <linux/types.h>
@@ -153,7 +162,6 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
153 * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function 162 * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function
154 * mods with VSOCK_HASH_SIZE to ensure this. 163 * mods with VSOCK_HASH_SIZE to ensure this.
155 */ 164 */
156#define VSOCK_HASH_SIZE 251
157#define MAX_PORT_RETRIES 24 165#define MAX_PORT_RETRIES 24
158 166
159#define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE) 167#define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE)
@@ -168,9 +176,12 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
168#define vsock_connected_sockets_vsk(vsk) \ 176#define vsock_connected_sockets_vsk(vsk) \
169 vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr) 177 vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr)
170 178
171static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; 179struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
172static struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; 180EXPORT_SYMBOL_GPL(vsock_bind_table);
173static DEFINE_SPINLOCK(vsock_table_lock); 181struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
182EXPORT_SYMBOL_GPL(vsock_connected_table);
183DEFINE_SPINLOCK(vsock_table_lock);
184EXPORT_SYMBOL_GPL(vsock_table_lock);
174 185
175/* Autobind this socket to the local address if necessary. */ 186/* Autobind this socket to the local address if necessary. */
176static int vsock_auto_bind(struct vsock_sock *vsk) 187static int vsock_auto_bind(struct vsock_sock *vsk)
@@ -184,7 +195,7 @@ static int vsock_auto_bind(struct vsock_sock *vsk)
184 return __vsock_bind(sk, &local_addr); 195 return __vsock_bind(sk, &local_addr);
185} 196}
186 197
187static void vsock_init_tables(void) 198static int __init vsock_init_tables(void)
188{ 199{
189 int i; 200 int i;
190 201
@@ -193,6 +204,7 @@ static void vsock_init_tables(void)
193 204
194 for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) 205 for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++)
195 INIT_LIST_HEAD(&vsock_connected_table[i]); 206 INIT_LIST_HEAD(&vsock_connected_table[i]);
207 return 0;
196} 208}
197 209
198static void __vsock_insert_bound(struct list_head *list, 210static void __vsock_insert_bound(struct list_head *list,
@@ -248,16 +260,6 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
248 return NULL; 260 return NULL;
249} 261}
250 262
251static bool __vsock_in_bound_table(struct vsock_sock *vsk)
252{
253 return !list_empty(&vsk->bound_table);
254}
255
256static bool __vsock_in_connected_table(struct vsock_sock *vsk)
257{
258 return !list_empty(&vsk->connected_table);
259}
260
261static void vsock_insert_unbound(struct vsock_sock *vsk) 263static void vsock_insert_unbound(struct vsock_sock *vsk)
262{ 264{
263 spin_lock_bh(&vsock_table_lock); 265 spin_lock_bh(&vsock_table_lock);
@@ -485,7 +487,7 @@ void vsock_pending_work(struct work_struct *work)
485 if (vsock_in_connected_table(vsk)) 487 if (vsock_in_connected_table(vsk))
486 vsock_remove_connected(vsk); 488 vsock_remove_connected(vsk);
487 489
488 sk->sk_state = SS_FREE; 490 sk->sk_state = TCP_CLOSE;
489 491
490out: 492out:
491 release_sock(sk); 493 release_sock(sk);
@@ -625,7 +627,6 @@ struct sock *__vsock_create(struct net *net,
625 627
626 sk->sk_destruct = vsock_sk_destruct; 628 sk->sk_destruct = vsock_sk_destruct;
627 sk->sk_backlog_rcv = vsock_queue_rcv_skb; 629 sk->sk_backlog_rcv = vsock_queue_rcv_skb;
628 sk->sk_state = 0;
629 sock_reset_flag(sk, SOCK_DONE); 630 sock_reset_flag(sk, SOCK_DONE);
630 631
631 INIT_LIST_HEAD(&vsk->bound_table); 632 INIT_LIST_HEAD(&vsk->bound_table);
@@ -899,7 +900,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
899 /* Listening sockets that have connections in their accept 900 /* Listening sockets that have connections in their accept
900 * queue can be read. 901 * queue can be read.
901 */ 902 */
902 if (sk->sk_state == VSOCK_SS_LISTEN 903 if (sk->sk_state == TCP_LISTEN
903 && !vsock_is_accept_queue_empty(sk)) 904 && !vsock_is_accept_queue_empty(sk))
904 mask |= POLLIN | POLLRDNORM; 905 mask |= POLLIN | POLLRDNORM;
905 906
@@ -928,7 +929,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
928 } 929 }
929 930
930 /* Connected sockets that can produce data can be written. */ 931 /* Connected sockets that can produce data can be written. */
931 if (sk->sk_state == SS_CONNECTED) { 932 if (sk->sk_state == TCP_ESTABLISHED) {
932 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { 933 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
933 bool space_avail_now = false; 934 bool space_avail_now = false;
934 int ret = transport->notify_poll_out( 935 int ret = transport->notify_poll_out(
@@ -950,7 +951,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
950 * POLLOUT|POLLWRNORM when peer is closed and nothing to read, 951 * POLLOUT|POLLWRNORM when peer is closed and nothing to read,
951 * but local send is not shutdown. 952 * but local send is not shutdown.
952 */ 953 */
953 if (sk->sk_state == SS_UNCONNECTED) { 954 if (sk->sk_state == TCP_CLOSE) {
954 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) 955 if (!(sk->sk_shutdown & SEND_SHUTDOWN))
955 mask |= POLLOUT | POLLWRNORM; 956 mask |= POLLOUT | POLLWRNORM;
956 957
@@ -1120,9 +1121,9 @@ static void vsock_connect_timeout(struct work_struct *work)
1120 sk = sk_vsock(vsk); 1121 sk = sk_vsock(vsk);
1121 1122
1122 lock_sock(sk); 1123 lock_sock(sk);
1123 if (sk->sk_state == SS_CONNECTING && 1124 if (sk->sk_state == TCP_SYN_SENT &&
1124 (sk->sk_shutdown != SHUTDOWN_MASK)) { 1125 (sk->sk_shutdown != SHUTDOWN_MASK)) {
1125 sk->sk_state = SS_UNCONNECTED; 1126 sk->sk_state = TCP_CLOSE;
1126 sk->sk_err = ETIMEDOUT; 1127 sk->sk_err = ETIMEDOUT;
1127 sk->sk_error_report(sk); 1128 sk->sk_error_report(sk);
1128 cancel = 1; 1129 cancel = 1;
@@ -1168,7 +1169,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
1168 err = -EALREADY; 1169 err = -EALREADY;
1169 break; 1170 break;
1170 default: 1171 default:
1171 if ((sk->sk_state == VSOCK_SS_LISTEN) || 1172 if ((sk->sk_state == TCP_LISTEN) ||
1172 vsock_addr_cast(addr, addr_len, &remote_addr) != 0) { 1173 vsock_addr_cast(addr, addr_len, &remote_addr) != 0) {
1173 err = -EINVAL; 1174 err = -EINVAL;
1174 goto out; 1175 goto out;
@@ -1191,7 +1192,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
1191 if (err) 1192 if (err)
1192 goto out; 1193 goto out;
1193 1194
1194 sk->sk_state = SS_CONNECTING; 1195 sk->sk_state = TCP_SYN_SENT;
1195 1196
1196 err = transport->connect(vsk); 1197 err = transport->connect(vsk);
1197 if (err < 0) 1198 if (err < 0)
@@ -1211,7 +1212,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
1211 timeout = vsk->connect_timeout; 1212 timeout = vsk->connect_timeout;
1212 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1213 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1213 1214
1214 while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) { 1215 while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) {
1215 if (flags & O_NONBLOCK) { 1216 if (flags & O_NONBLOCK) {
1216 /* If we're not going to block, we schedule a timeout 1217 /* If we're not going to block, we schedule a timeout
1217 * function to generate a timeout on the connection 1218 * function to generate a timeout on the connection
@@ -1234,13 +1235,13 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
1234 1235
1235 if (signal_pending(current)) { 1236 if (signal_pending(current)) {
1236 err = sock_intr_errno(timeout); 1237 err = sock_intr_errno(timeout);
1237 sk->sk_state = SS_UNCONNECTED; 1238 sk->sk_state = TCP_CLOSE;
1238 sock->state = SS_UNCONNECTED; 1239 sock->state = SS_UNCONNECTED;
1239 vsock_transport_cancel_pkt(vsk); 1240 vsock_transport_cancel_pkt(vsk);
1240 goto out_wait; 1241 goto out_wait;
1241 } else if (timeout == 0) { 1242 } else if (timeout == 0) {
1242 err = -ETIMEDOUT; 1243 err = -ETIMEDOUT;
1243 sk->sk_state = SS_UNCONNECTED; 1244 sk->sk_state = TCP_CLOSE;
1244 sock->state = SS_UNCONNECTED; 1245 sock->state = SS_UNCONNECTED;
1245 vsock_transport_cancel_pkt(vsk); 1246 vsock_transport_cancel_pkt(vsk);
1246 goto out_wait; 1247 goto out_wait;
@@ -1251,7 +1252,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
1251 1252
1252 if (sk->sk_err) { 1253 if (sk->sk_err) {
1253 err = -sk->sk_err; 1254 err = -sk->sk_err;
1254 sk->sk_state = SS_UNCONNECTED; 1255 sk->sk_state = TCP_CLOSE;
1255 sock->state = SS_UNCONNECTED; 1256 sock->state = SS_UNCONNECTED;
1256 } else { 1257 } else {
1257 err = 0; 1258 err = 0;
@@ -1284,7 +1285,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
1284 goto out; 1285 goto out;
1285 } 1286 }
1286 1287
1287 if (listener->sk_state != VSOCK_SS_LISTEN) { 1288 if (listener->sk_state != TCP_LISTEN) {
1288 err = -EINVAL; 1289 err = -EINVAL;
1289 goto out; 1290 goto out;
1290 } 1291 }
@@ -1374,7 +1375,7 @@ static int vsock_listen(struct socket *sock, int backlog)
1374 } 1375 }
1375 1376
1376 sk->sk_max_ack_backlog = backlog; 1377 sk->sk_max_ack_backlog = backlog;
1377 sk->sk_state = VSOCK_SS_LISTEN; 1378 sk->sk_state = TCP_LISTEN;
1378 1379
1379 err = 0; 1380 err = 0;
1380 1381
@@ -1554,7 +1555,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1554 1555
1555 /* Callers should not provide a destination with stream sockets. */ 1556 /* Callers should not provide a destination with stream sockets. */
1556 if (msg->msg_namelen) { 1557 if (msg->msg_namelen) {
1557 err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP; 1558 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1558 goto out; 1559 goto out;
1559 } 1560 }
1560 1561
@@ -1565,7 +1566,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1565 goto out; 1566 goto out;
1566 } 1567 }
1567 1568
1568 if (sk->sk_state != SS_CONNECTED || 1569 if (sk->sk_state != TCP_ESTABLISHED ||
1569 !vsock_addr_bound(&vsk->local_addr)) { 1570 !vsock_addr_bound(&vsk->local_addr)) {
1570 err = -ENOTCONN; 1571 err = -ENOTCONN;
1571 goto out; 1572 goto out;
@@ -1689,7 +1690,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1689 1690
1690 lock_sock(sk); 1691 lock_sock(sk);
1691 1692
1692 if (sk->sk_state != SS_CONNECTED) { 1693 if (sk->sk_state != TCP_ESTABLISHED) {
1693 /* Recvmsg is supposed to return 0 if a peer performs an 1694 /* Recvmsg is supposed to return 0 if a peer performs an
1694 * orderly shutdown. Differentiate between that case and when a 1695 * orderly shutdown. Differentiate between that case and when a
1695 * peer has not connected or a local shutdown occured with the 1696 * peer has not connected or a local shutdown occured with the
@@ -1957,8 +1958,6 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
1957 vsock_proto.owner = owner; 1958 vsock_proto.owner = owner;
1958 transport = t; 1959 transport = t;
1959 1960
1960 vsock_init_tables();
1961
1962 vsock_device.minor = MISC_DYNAMIC_MINOR; 1961 vsock_device.minor = MISC_DYNAMIC_MINOR;
1963 err = misc_register(&vsock_device); 1962 err = misc_register(&vsock_device);
1964 if (err) { 1963 if (err) {
@@ -2019,6 +2018,8 @@ const struct vsock_transport *vsock_core_get_transport(void)
2019} 2018}
2020EXPORT_SYMBOL_GPL(vsock_core_get_transport); 2019EXPORT_SYMBOL_GPL(vsock_core_get_transport);
2021 2020
2021module_init(vsock_init_tables);
2022
2022MODULE_AUTHOR("VMware, Inc."); 2023MODULE_AUTHOR("VMware, Inc.");
2023MODULE_DESCRIPTION("VMware Virtual Socket Family"); 2024MODULE_DESCRIPTION("VMware Virtual Socket Family");
2024MODULE_VERSION("1.0.2.0-k"); 2025MODULE_VERSION("1.0.2.0-k");
diff --git a/net/vmw_vsock/diag.c b/net/vmw_vsock/diag.c
new file mode 100644
index 000000000000..31b567652250
--- /dev/null
+++ b/net/vmw_vsock/diag.c
@@ -0,0 +1,186 @@
1/*
2 * vsock sock_diag(7) module
3 *
4 * Copyright (C) 2017 Red Hat, Inc.
5 * Author: Stefan Hajnoczi <stefanha@redhat.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the Free
9 * Software Foundation version 2 and no later version.
10 *
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 */
16
17#include <linux/module.h>
18#include <linux/sock_diag.h>
19#include <linux/vm_sockets_diag.h>
20#include <net/af_vsock.h>
21
22static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
23 u32 portid, u32 seq, u32 flags)
24{
25 struct vsock_sock *vsk = vsock_sk(sk);
26 struct vsock_diag_msg *rep;
27 struct nlmsghdr *nlh;
28
29 nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep),
30 flags);
31 if (!nlh)
32 return -EMSGSIZE;
33
34 rep = nlmsg_data(nlh);
35 rep->vdiag_family = AF_VSOCK;
36
37 /* Lock order dictates that sk_lock is acquired before
38 * vsock_table_lock, so we cannot lock here. Simply don't take
39 * sk_lock; sk is guaranteed to stay alive since vsock_table_lock is
40 * held.
41 */
42 rep->vdiag_type = sk->sk_type;
43 rep->vdiag_state = sk->sk_state;
44 rep->vdiag_shutdown = sk->sk_shutdown;
45 rep->vdiag_src_cid = vsk->local_addr.svm_cid;
46 rep->vdiag_src_port = vsk->local_addr.svm_port;
47 rep->vdiag_dst_cid = vsk->remote_addr.svm_cid;
48 rep->vdiag_dst_port = vsk->remote_addr.svm_port;
49 rep->vdiag_ino = sock_i_ino(sk);
50
51 sock_diag_save_cookie(sk, rep->vdiag_cookie);
52
53 return 0;
54}
55
56static int vsock_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
57{
58 struct vsock_diag_req *req;
59 struct vsock_sock *vsk;
60 unsigned int bucket;
61 unsigned int last_i;
62 unsigned int table;
63 struct net *net;
64 unsigned int i;
65
66 req = nlmsg_data(cb->nlh);
67 net = sock_net(skb->sk);
68
69 /* State saved between calls: */
70 table = cb->args[0];
71 bucket = cb->args[1];
72 i = last_i = cb->args[2];
73
74 /* TODO VMCI pending sockets? */
75
76 spin_lock_bh(&vsock_table_lock);
77
78 /* Bind table (locally created sockets) */
79 if (table == 0) {
80 while (bucket < ARRAY_SIZE(vsock_bind_table)) {
81 struct list_head *head = &vsock_bind_table[bucket];
82
83 i = 0;
84 list_for_each_entry(vsk, head, bound_table) {
85 struct sock *sk = sk_vsock(vsk);
86
87 if (!net_eq(sock_net(sk), net))
88 continue;
89 if (i < last_i)
90 goto next_bind;
91 if (!(req->vdiag_states & (1 << sk->sk_state)))
92 goto next_bind;
93 if (sk_diag_fill(sk, skb,
94 NETLINK_CB(cb->skb).portid,
95 cb->nlh->nlmsg_seq,
96 NLM_F_MULTI) < 0)
97 goto done;
98next_bind:
99 i++;
100 }
101 last_i = 0;
102 bucket++;
103 }
104
105 table++;
106 bucket = 0;
107 }
108
109 /* Connected table (accepted connections) */
110 while (bucket < ARRAY_SIZE(vsock_connected_table)) {
111 struct list_head *head = &vsock_connected_table[bucket];
112
113 i = 0;
114 list_for_each_entry(vsk, head, connected_table) {
115 struct sock *sk = sk_vsock(vsk);
116
117 /* Skip sockets we've already seen above */
118 if (__vsock_in_bound_table(vsk))
119 continue;
120
121 if (!net_eq(sock_net(sk), net))
122 continue;
123 if (i < last_i)
124 goto next_connected;
125 if (!(req->vdiag_states & (1 << sk->sk_state)))
126 goto next_connected;
127 if (sk_diag_fill(sk, skb,
128 NETLINK_CB(cb->skb).portid,
129 cb->nlh->nlmsg_seq,
130 NLM_F_MULTI) < 0)
131 goto done;
132next_connected:
133 i++;
134 }
135 last_i = 0;
136 bucket++;
137 }
138
139done:
140 spin_unlock_bh(&vsock_table_lock);
141
142 cb->args[0] = table;
143 cb->args[1] = bucket;
144 cb->args[2] = i;
145
146 return skb->len;
147}
148
149static int vsock_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
150{
151 int hdrlen = sizeof(struct vsock_diag_req);
152 struct net *net = sock_net(skb->sk);
153
154 if (nlmsg_len(h) < hdrlen)
155 return -EINVAL;
156
157 if (h->nlmsg_flags & NLM_F_DUMP) {
158 struct netlink_dump_control c = {
159 .dump = vsock_diag_dump,
160 };
161 return netlink_dump_start(net->diag_nlsk, skb, h, &c);
162 }
163
164 return -EOPNOTSUPP;
165}
166
167static const struct sock_diag_handler vsock_diag_handler = {
168 .family = AF_VSOCK,
169 .dump = vsock_diag_handler_dump,
170};
171
172static int __init vsock_diag_init(void)
173{
174 return sock_diag_register(&vsock_diag_handler);
175}
176
177static void __exit vsock_diag_exit(void)
178{
179 sock_diag_unregister(&vsock_diag_handler);
180}
181
182module_init(vsock_diag_init);
183module_exit(vsock_diag_exit);
184MODULE_LICENSE("GPL");
185MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG,
186 40 /* AF_VSOCK */);
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 14ed5a344cdf..5583df708b8c 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -310,11 +310,15 @@ static void hvs_close_connection(struct vmbus_channel *chan)
310 struct sock *sk = get_per_channel_state(chan); 310 struct sock *sk = get_per_channel_state(chan);
311 struct vsock_sock *vsk = vsock_sk(sk); 311 struct vsock_sock *vsk = vsock_sk(sk);
312 312
313 sk->sk_state = SS_UNCONNECTED; 313 lock_sock(sk);
314
315 sk->sk_state = TCP_CLOSE;
314 sock_set_flag(sk, SOCK_DONE); 316 sock_set_flag(sk, SOCK_DONE);
315 vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN; 317 vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN;
316 318
317 sk->sk_state_change(sk); 319 sk->sk_state_change(sk);
320
321 release_sock(sk);
318} 322}
319 323
320static void hvs_open_connection(struct vmbus_channel *chan) 324static void hvs_open_connection(struct vmbus_channel *chan)
@@ -344,8 +348,9 @@ static void hvs_open_connection(struct vmbus_channel *chan)
344 if (!sk) 348 if (!sk)
345 return; 349 return;
346 350
347 if ((conn_from_host && sk->sk_state != VSOCK_SS_LISTEN) || 351 lock_sock(sk);
348 (!conn_from_host && sk->sk_state != SS_CONNECTING)) 352 if ((conn_from_host && sk->sk_state != TCP_LISTEN) ||
353 (!conn_from_host && sk->sk_state != TCP_SYN_SENT))
349 goto out; 354 goto out;
350 355
351 if (conn_from_host) { 356 if (conn_from_host) {
@@ -357,7 +362,7 @@ static void hvs_open_connection(struct vmbus_channel *chan)
357 if (!new) 362 if (!new)
358 goto out; 363 goto out;
359 364
360 new->sk_state = SS_CONNECTING; 365 new->sk_state = TCP_SYN_SENT;
361 vnew = vsock_sk(new); 366 vnew = vsock_sk(new);
362 hvs_new = vnew->trans; 367 hvs_new = vnew->trans;
363 hvs_new->chan = chan; 368 hvs_new->chan = chan;
@@ -384,7 +389,7 @@ static void hvs_open_connection(struct vmbus_channel *chan)
384 vmbus_set_chn_rescind_callback(chan, hvs_close_connection); 389 vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
385 390
386 if (conn_from_host) { 391 if (conn_from_host) {
387 new->sk_state = SS_CONNECTED; 392 new->sk_state = TCP_ESTABLISHED;
388 sk->sk_ack_backlog++; 393 sk->sk_ack_backlog++;
389 394
390 hvs_addr_init(&vnew->local_addr, if_type); 395 hvs_addr_init(&vnew->local_addr, if_type);
@@ -395,11 +400,9 @@ static void hvs_open_connection(struct vmbus_channel *chan)
395 400
396 vsock_insert_connected(vnew); 401 vsock_insert_connected(vnew);
397 402
398 lock_sock(sk);
399 vsock_enqueue_accept(sk, new); 403 vsock_enqueue_accept(sk, new);
400 release_sock(sk);
401 } else { 404 } else {
402 sk->sk_state = SS_CONNECTED; 405 sk->sk_state = TCP_ESTABLISHED;
403 sk->sk_socket->state = SS_CONNECTED; 406 sk->sk_socket->state = SS_CONNECTED;
404 407
405 vsock_insert_connected(vsock_sk(sk)); 408 vsock_insert_connected(vsock_sk(sk));
@@ -410,6 +413,8 @@ static void hvs_open_connection(struct vmbus_channel *chan)
410out: 413out:
411 /* Release refcnt obtained when we called vsock_find_bound_socket() */ 414 /* Release refcnt obtained when we called vsock_find_bound_socket() */
412 sock_put(sk); 415 sock_put(sk);
416
417 release_sock(sk);
413} 418}
414 419
415static u32 hvs_get_local_cid(void) 420static u32 hvs_get_local_cid(void)
@@ -476,13 +481,21 @@ out:
476 481
477static void hvs_release(struct vsock_sock *vsk) 482static void hvs_release(struct vsock_sock *vsk)
478{ 483{
484 struct sock *sk = sk_vsock(vsk);
479 struct hvsock *hvs = vsk->trans; 485 struct hvsock *hvs = vsk->trans;
480 struct vmbus_channel *chan = hvs->chan; 486 struct vmbus_channel *chan;
487
488 lock_sock(sk);
489
490 sk->sk_state = SS_DISCONNECTING;
491 vsock_remove_sock(vsk);
492
493 release_sock(sk);
481 494
495 chan = hvs->chan;
482 if (chan) 496 if (chan)
483 hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN); 497 hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN);
484 498
485 vsock_remove_sock(vsk);
486} 499}
487 500
488static void hvs_destruct(struct vsock_sock *vsk) 501static void hvs_destruct(struct vsock_sock *vsk)
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 403d86e80162..8e03bd3f3668 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -414,7 +414,7 @@ static void virtio_vsock_event_fill(struct virtio_vsock *vsock)
414static void virtio_vsock_reset_sock(struct sock *sk) 414static void virtio_vsock_reset_sock(struct sock *sk)
415{ 415{
416 lock_sock(sk); 416 lock_sock(sk);
417 sk->sk_state = SS_UNCONNECTED; 417 sk->sk_state = TCP_CLOSE;
418 sk->sk_err = ECONNRESET; 418 sk->sk_err = ECONNRESET;
419 sk->sk_error_report(sk); 419 sk->sk_error_report(sk);
420 release_sock(sk); 420 release_sock(sk);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index edba7ab97563..3ae3a33da70b 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -708,7 +708,7 @@ static void virtio_transport_do_close(struct vsock_sock *vsk,
708 sock_set_flag(sk, SOCK_DONE); 708 sock_set_flag(sk, SOCK_DONE);
709 vsk->peer_shutdown = SHUTDOWN_MASK; 709 vsk->peer_shutdown = SHUTDOWN_MASK;
710 if (vsock_stream_has_data(vsk) <= 0) 710 if (vsock_stream_has_data(vsk) <= 0)
711 sk->sk_state = SS_DISCONNECTING; 711 sk->sk_state = TCP_CLOSING;
712 sk->sk_state_change(sk); 712 sk->sk_state_change(sk);
713 713
714 if (vsk->close_work_scheduled && 714 if (vsk->close_work_scheduled &&
@@ -748,8 +748,8 @@ static bool virtio_transport_close(struct vsock_sock *vsk)
748{ 748{
749 struct sock *sk = &vsk->sk; 749 struct sock *sk = &vsk->sk;
750 750
751 if (!(sk->sk_state == SS_CONNECTED || 751 if (!(sk->sk_state == TCP_ESTABLISHED ||
752 sk->sk_state == SS_DISCONNECTING)) 752 sk->sk_state == TCP_CLOSING))
753 return true; 753 return true;
754 754
755 /* Already received SHUTDOWN from peer, reply with RST */ 755 /* Already received SHUTDOWN from peer, reply with RST */
@@ -801,7 +801,7 @@ virtio_transport_recv_connecting(struct sock *sk,
801 801
802 switch (le16_to_cpu(pkt->hdr.op)) { 802 switch (le16_to_cpu(pkt->hdr.op)) {
803 case VIRTIO_VSOCK_OP_RESPONSE: 803 case VIRTIO_VSOCK_OP_RESPONSE:
804 sk->sk_state = SS_CONNECTED; 804 sk->sk_state = TCP_ESTABLISHED;
805 sk->sk_socket->state = SS_CONNECTED; 805 sk->sk_socket->state = SS_CONNECTED;
806 vsock_insert_connected(vsk); 806 vsock_insert_connected(vsk);
807 sk->sk_state_change(sk); 807 sk->sk_state_change(sk);
@@ -821,7 +821,7 @@ virtio_transport_recv_connecting(struct sock *sk,
821 821
822destroy: 822destroy:
823 virtio_transport_reset(vsk, pkt); 823 virtio_transport_reset(vsk, pkt);
824 sk->sk_state = SS_UNCONNECTED; 824 sk->sk_state = TCP_CLOSE;
825 sk->sk_err = skerr; 825 sk->sk_err = skerr;
826 sk->sk_error_report(sk); 826 sk->sk_error_report(sk);
827 return err; 827 return err;
@@ -857,7 +857,7 @@ virtio_transport_recv_connected(struct sock *sk,
857 vsk->peer_shutdown |= SEND_SHUTDOWN; 857 vsk->peer_shutdown |= SEND_SHUTDOWN;
858 if (vsk->peer_shutdown == SHUTDOWN_MASK && 858 if (vsk->peer_shutdown == SHUTDOWN_MASK &&
859 vsock_stream_has_data(vsk) <= 0) 859 vsock_stream_has_data(vsk) <= 0)
860 sk->sk_state = SS_DISCONNECTING; 860 sk->sk_state = TCP_CLOSING;
861 if (le32_to_cpu(pkt->hdr.flags)) 861 if (le32_to_cpu(pkt->hdr.flags))
862 sk->sk_state_change(sk); 862 sk->sk_state_change(sk);
863 break; 863 break;
@@ -928,7 +928,7 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt)
928 928
929 lock_sock_nested(child, SINGLE_DEPTH_NESTING); 929 lock_sock_nested(child, SINGLE_DEPTH_NESTING);
930 930
931 child->sk_state = SS_CONNECTED; 931 child->sk_state = TCP_ESTABLISHED;
932 932
933 vchild = vsock_sk(child); 933 vchild = vsock_sk(child);
934 vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid), 934 vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid),
@@ -1016,18 +1016,18 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt)
1016 sk->sk_write_space(sk); 1016 sk->sk_write_space(sk);
1017 1017
1018 switch (sk->sk_state) { 1018 switch (sk->sk_state) {
1019 case VSOCK_SS_LISTEN: 1019 case TCP_LISTEN:
1020 virtio_transport_recv_listen(sk, pkt); 1020 virtio_transport_recv_listen(sk, pkt);
1021 virtio_transport_free_pkt(pkt); 1021 virtio_transport_free_pkt(pkt);
1022 break; 1022 break;
1023 case SS_CONNECTING: 1023 case TCP_SYN_SENT:
1024 virtio_transport_recv_connecting(sk, pkt); 1024 virtio_transport_recv_connecting(sk, pkt);
1025 virtio_transport_free_pkt(pkt); 1025 virtio_transport_free_pkt(pkt);
1026 break; 1026 break;
1027 case SS_CONNECTED: 1027 case TCP_ESTABLISHED:
1028 virtio_transport_recv_connected(sk, pkt); 1028 virtio_transport_recv_connected(sk, pkt);
1029 break; 1029 break;
1030 case SS_DISCONNECTING: 1030 case TCP_CLOSING:
1031 virtio_transport_recv_disconnecting(sk, pkt); 1031 virtio_transport_recv_disconnecting(sk, pkt);
1032 virtio_transport_free_pkt(pkt); 1032 virtio_transport_free_pkt(pkt);
1033 break; 1033 break;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 10ae7823a19d..391775e3575c 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -21,7 +21,6 @@
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/kmod.h> 22#include <linux/kmod.h>
23#include <linux/list.h> 23#include <linux/list.h>
24#include <linux/miscdevice.h>
25#include <linux/module.h> 24#include <linux/module.h>
26#include <linux/mutex.h> 25#include <linux/mutex.h>
27#include <linux/net.h> 26#include <linux/net.h>
@@ -743,7 +742,7 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
743 /* The local context ID may be out of date, update it. */ 742 /* The local context ID may be out of date, update it. */
744 vsk->local_addr.svm_cid = dst.svm_cid; 743 vsk->local_addr.svm_cid = dst.svm_cid;
745 744
746 if (sk->sk_state == SS_CONNECTED) 745 if (sk->sk_state == TCP_ESTABLISHED)
747 vmci_trans(vsk)->notify_ops->handle_notify_pkt( 746 vmci_trans(vsk)->notify_ops->handle_notify_pkt(
748 sk, pkt, true, &dst, &src, 747 sk, pkt, true, &dst, &src,
749 &bh_process_pkt); 748 &bh_process_pkt);
@@ -801,7 +800,9 @@ static void vmci_transport_handle_detach(struct sock *sk)
801 * left in our consume queue. 800 * left in our consume queue.
802 */ 801 */
803 if (vsock_stream_has_data(vsk) <= 0) { 802 if (vsock_stream_has_data(vsk) <= 0) {
804 if (sk->sk_state == SS_CONNECTING) { 803 sk->sk_state = TCP_CLOSE;
804
805 if (sk->sk_state == TCP_SYN_SENT) {
805 /* The peer may detach from a queue pair while 806 /* The peer may detach from a queue pair while
806 * we are still in the connecting state, i.e., 807 * we are still in the connecting state, i.e.,
807 * if the peer VM is killed after attaching to 808 * if the peer VM is killed after attaching to
@@ -810,12 +811,10 @@ static void vmci_transport_handle_detach(struct sock *sk)
810 * event like a reset. 811 * event like a reset.
811 */ 812 */
812 813
813 sk->sk_state = SS_UNCONNECTED;
814 sk->sk_err = ECONNRESET; 814 sk->sk_err = ECONNRESET;
815 sk->sk_error_report(sk); 815 sk->sk_error_report(sk);
816 return; 816 return;
817 } 817 }
818 sk->sk_state = SS_UNCONNECTED;
819 } 818 }
820 sk->sk_state_change(sk); 819 sk->sk_state_change(sk);
821 } 820 }
@@ -883,17 +882,17 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work)
883 vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context; 882 vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context;
884 883
885 switch (sk->sk_state) { 884 switch (sk->sk_state) {
886 case VSOCK_SS_LISTEN: 885 case TCP_LISTEN:
887 vmci_transport_recv_listen(sk, pkt); 886 vmci_transport_recv_listen(sk, pkt);
888 break; 887 break;
889 case SS_CONNECTING: 888 case TCP_SYN_SENT:
890 /* Processing of pending connections for servers goes through 889 /* Processing of pending connections for servers goes through
891 * the listening socket, so see vmci_transport_recv_listen() 890 * the listening socket, so see vmci_transport_recv_listen()
892 * for that path. 891 * for that path.
893 */ 892 */
894 vmci_transport_recv_connecting_client(sk, pkt); 893 vmci_transport_recv_connecting_client(sk, pkt);
895 break; 894 break;
896 case SS_CONNECTED: 895 case TCP_ESTABLISHED:
897 vmci_transport_recv_connected(sk, pkt); 896 vmci_transport_recv_connected(sk, pkt);
898 break; 897 break;
899 default: 898 default:
@@ -942,7 +941,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
942 vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context; 941 vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context;
943 942
944 switch (pending->sk_state) { 943 switch (pending->sk_state) {
945 case SS_CONNECTING: 944 case TCP_SYN_SENT:
946 err = vmci_transport_recv_connecting_server(sk, 945 err = vmci_transport_recv_connecting_server(sk,
947 pending, 946 pending,
948 pkt); 947 pkt);
@@ -1072,7 +1071,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
1072 vsock_add_pending(sk, pending); 1071 vsock_add_pending(sk, pending);
1073 sk->sk_ack_backlog++; 1072 sk->sk_ack_backlog++;
1074 1073
1075 pending->sk_state = SS_CONNECTING; 1074 pending->sk_state = TCP_SYN_SENT;
1076 vmci_trans(vpending)->produce_size = 1075 vmci_trans(vpending)->produce_size =
1077 vmci_trans(vpending)->consume_size = qp_size; 1076 vmci_trans(vpending)->consume_size = qp_size;
1078 vmci_trans(vpending)->queue_pair_size = qp_size; 1077 vmci_trans(vpending)->queue_pair_size = qp_size;
@@ -1197,11 +1196,11 @@ vmci_transport_recv_connecting_server(struct sock *listener,
1197 * the socket will be valid until it is removed from the queue. 1196 * the socket will be valid until it is removed from the queue.
1198 * 1197 *
1199 * If we fail sending the attach below, we remove the socket from the 1198 * If we fail sending the attach below, we remove the socket from the
1200 * connected list and move the socket to SS_UNCONNECTED before 1199 * connected list and move the socket to TCP_CLOSE before
1201 * releasing the lock, so a pending slow path processing of an incoming 1200 * releasing the lock, so a pending slow path processing of an incoming
1202 * packet will not see the socket in the connected state in that case. 1201 * packet will not see the socket in the connected state in that case.
1203 */ 1202 */
1204 pending->sk_state = SS_CONNECTED; 1203 pending->sk_state = TCP_ESTABLISHED;
1205 1204
1206 vsock_insert_connected(vpending); 1205 vsock_insert_connected(vpending);
1207 1206
@@ -1232,7 +1231,7 @@ vmci_transport_recv_connecting_server(struct sock *listener,
1232 1231
1233destroy: 1232destroy:
1234 pending->sk_err = skerr; 1233 pending->sk_err = skerr;
1235 pending->sk_state = SS_UNCONNECTED; 1234 pending->sk_state = TCP_CLOSE;
1236 /* As long as we drop our reference, all necessary cleanup will handle 1235 /* As long as we drop our reference, all necessary cleanup will handle
1237 * when the cleanup function drops its reference and our destruct 1236 * when the cleanup function drops its reference and our destruct
1238 * implementation is called. Note that since the listen handler will 1237 * implementation is called. Note that since the listen handler will
@@ -1270,7 +1269,7 @@ vmci_transport_recv_connecting_client(struct sock *sk,
1270 * accounting (it can already be found since it's in the bound 1269 * accounting (it can already be found since it's in the bound
1271 * table). 1270 * table).
1272 */ 1271 */
1273 sk->sk_state = SS_CONNECTED; 1272 sk->sk_state = TCP_ESTABLISHED;
1274 sk->sk_socket->state = SS_CONNECTED; 1273 sk->sk_socket->state = SS_CONNECTED;
1275 vsock_insert_connected(vsk); 1274 vsock_insert_connected(vsk);
1276 sk->sk_state_change(sk); 1275 sk->sk_state_change(sk);
@@ -1338,7 +1337,7 @@ vmci_transport_recv_connecting_client(struct sock *sk,
1338destroy: 1337destroy:
1339 vmci_transport_send_reset(sk, pkt); 1338 vmci_transport_send_reset(sk, pkt);
1340 1339
1341 sk->sk_state = SS_UNCONNECTED; 1340 sk->sk_state = TCP_CLOSE;
1342 sk->sk_err = skerr; 1341 sk->sk_err = skerr;
1343 sk->sk_error_report(sk); 1342 sk->sk_error_report(sk);
1344 return err; 1343 return err;
@@ -1526,7 +1525,7 @@ static int vmci_transport_recv_connected(struct sock *sk,
1526 sock_set_flag(sk, SOCK_DONE); 1525 sock_set_flag(sk, SOCK_DONE);
1527 vsk->peer_shutdown = SHUTDOWN_MASK; 1526 vsk->peer_shutdown = SHUTDOWN_MASK;
1528 if (vsock_stream_has_data(vsk) <= 0) 1527 if (vsock_stream_has_data(vsk) <= 0)
1529 sk->sk_state = SS_DISCONNECTING; 1528 sk->sk_state = TCP_CLOSING;
1530 1529
1531 sk->sk_state_change(sk); 1530 sk->sk_state_change(sk);
1532 break; 1531 break;
@@ -1790,7 +1789,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk)
1790 err = vmci_transport_send_conn_request( 1789 err = vmci_transport_send_conn_request(
1791 sk, vmci_trans(vsk)->queue_pair_size); 1790 sk, vmci_trans(vsk)->queue_pair_size);
1792 if (err < 0) { 1791 if (err < 0) {
1793 sk->sk_state = SS_UNCONNECTED; 1792 sk->sk_state = TCP_CLOSE;
1794 return err; 1793 return err;
1795 } 1794 }
1796 } else { 1795 } else {
@@ -1800,7 +1799,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk)
1800 sk, vmci_trans(vsk)->queue_pair_size, 1799 sk, vmci_trans(vsk)->queue_pair_size,
1801 supported_proto_versions); 1800 supported_proto_versions);
1802 if (err < 0) { 1801 if (err < 0) {
1803 sk->sk_state = SS_UNCONNECTED; 1802 sk->sk_state = TCP_CLOSE;
1804 return err; 1803 return err;
1805 } 1804 }
1806 1805
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
index 1406db4d97d1..41fb427f150a 100644
--- a/net/vmw_vsock/vmci_transport_notify.c
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -355,7 +355,7 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
355 * queue. Ask for notifications when there is something to 355 * queue. Ask for notifications when there is something to
356 * read. 356 * read.
357 */ 357 */
358 if (sk->sk_state == SS_CONNECTED) { 358 if (sk->sk_state == TCP_ESTABLISHED) {
359 if (!send_waiting_read(sk, 1)) 359 if (!send_waiting_read(sk, 1))
360 return -1; 360 return -1;
361 361
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
index f3a0afc46208..0cc84f2bb05e 100644
--- a/net/vmw_vsock/vmci_transport_notify_qstate.c
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -176,7 +176,7 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
176 * queue. Ask for notifications when there is something to 176 * queue. Ask for notifications when there is something to
177 * read. 177 * read.
178 */ 178 */
179 if (sk->sk_state == SS_CONNECTED) 179 if (sk->sk_state == TCP_ESTABLISHED)
180 vsock_block_update_write_window(sk); 180 vsock_block_update_write_window(sk);
181 *data_ready_now = false; 181 *data_ready_now = false;
182 } 182 }
diff --git a/net/wimax/Makefile b/net/wimax/Makefile
index 8f1510d0cc2b..eb2db0d3b880 100644
--- a/net/wimax/Makefile
+++ b/net/wimax/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1 2
2obj-$(CONFIG_WIMAX) += wimax.o 3obj-$(CONFIG_WIMAX) += wimax.o
3 4
diff --git a/net/wireless/.gitignore b/net/wireless/.gitignore
index c33451b896d9..61cbc304a3d3 100644
--- a/net/wireless/.gitignore
+++ b/net/wireless/.gitignore
@@ -1 +1,2 @@
1regdb.c 1shipped-certs.c
2extra-certs.c
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 6c606120abfe..da91bb547db3 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -19,6 +19,7 @@ config WEXT_PRIV
19config CFG80211 19config CFG80211
20 tristate "cfg80211 - wireless configuration API" 20 tristate "cfg80211 - wireless configuration API"
21 depends on RFKILL || !RFKILL 21 depends on RFKILL || !RFKILL
22 select FW_LOADER
22 ---help--- 23 ---help---
23 cfg80211 is the Linux wireless LAN (802.11) configuration API. 24 cfg80211 is the Linux wireless LAN (802.11) configuration API.
24 Enable this if you have a wireless device. 25 Enable this if you have a wireless device.
@@ -82,6 +83,36 @@ config CFG80211_CERTIFICATION_ONUS
82 you are a wireless researcher and are working in a controlled 83 you are a wireless researcher and are working in a controlled
83 and approved environment by your local regulatory agency. 84 and approved environment by your local regulatory agency.
84 85
86config CFG80211_REQUIRE_SIGNED_REGDB
87 bool "require regdb signature" if CFG80211_CERTIFICATION_ONUS
88 default y
89 select SYSTEM_DATA_VERIFICATION
90 help
91 Require that in addition to the "regulatory.db" file a
92 "regulatory.db.p7s" can be loaded with a valid PKCS#7
93 signature for the regulatory.db file made by one of the
94 keys in the certs/ directory.
95
96config CFG80211_USE_KERNEL_REGDB_KEYS
97 bool "allow regdb keys shipped with the kernel" if CFG80211_CERTIFICATION_ONUS
98 default y
99 depends on CFG80211_REQUIRE_SIGNED_REGDB
100 help
101 Allow the regulatory database to be signed by one of the keys for
102 which certificates are part of the kernel sources
103 (in net/wireless/certs/).
104
105 This is currently only Seth Forshee's key, who is the regulatory
106 database maintainer.
107
108config CFG80211_EXTRA_REGDB_KEYDIR
109 string "additional regdb key directory" if CFG80211_CERTIFICATION_ONUS
110 depends on CFG80211_REQUIRE_SIGNED_REGDB
111 help
112 If selected, point to a directory with DER-encoded X.509
113 certificates like in the kernel sources (net/wireless/certs/)
114 that shall be accepted for a signed regulatory database.
115
85config CFG80211_REG_CELLULAR_HINTS 116config CFG80211_REG_CELLULAR_HINTS
86 bool "cfg80211 regulatory support for cellular base station hints" 117 bool "cfg80211 regulatory support for cellular base station hints"
87 depends on CFG80211_CERTIFICATION_ONUS 118 depends on CFG80211_CERTIFICATION_ONUS
@@ -139,35 +170,14 @@ config CFG80211_DEBUGFS
139 170
140 If unsure, say N. 171 If unsure, say N.
141 172
142config CFG80211_INTERNAL_REGDB
143 bool "use statically compiled regulatory rules database" if EXPERT
144 default n
145 depends on CFG80211
146 ---help---
147 This option generates an internal data structure representing
148 the wireless regulatory rules described in net/wireless/db.txt
149 and includes code to query that database. This is an alternative
150 to using CRDA for defining regulatory rules for the kernel.
151
152 Using this option requires some parsing of the db.txt at build time,
153 the parser will be upkept with the latest wireless-regdb updates but
154 older wireless-regdb formats will be ignored. The parser may later
155 be replaced to avoid issues with conflicts on versions of
156 wireless-regdb.
157
158 For details see:
159
160 http://wireless.kernel.org/en/developers/Regulatory
161
162 Most distributions have a CRDA package. So if unsure, say N.
163
164config CFG80211_CRDA_SUPPORT 173config CFG80211_CRDA_SUPPORT
165 bool "support CRDA" if CFG80211_INTERNAL_REGDB 174 bool "support CRDA" if EXPERT
166 default y 175 default y
167 depends on CFG80211 176 depends on CFG80211
168 help 177 help
169 You should enable this option unless you know for sure you have no 178 You should enable this option unless you know for sure you have no
170 need for it, for example when using internal regdb (above.) 179 need for it, for example when using internal regdb (above) or the
180 database loaded as a firmware file.
171 181
172 If unsure, say Y. 182 If unsure, say Y.
173 183
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index d06e5015751a..278d979c211a 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1obj-$(CONFIG_CFG80211) += cfg80211.o 2obj-$(CONFIG_CFG80211) += cfg80211.o
2obj-$(CONFIG_LIB80211) += lib80211.o 3obj-$(CONFIG_LIB80211) += lib80211.o
3obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o 4obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o
@@ -14,11 +15,27 @@ cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o mesh.o ap.o trace.o ocb.o
14cfg80211-$(CONFIG_OF) += of.o 15cfg80211-$(CONFIG_OF) += of.o
15cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o 16cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o
16cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o 17cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o
17cfg80211-$(CONFIG_CFG80211_INTERNAL_REGDB) += regdb.o
18 18
19CFLAGS_trace.o := -I$(src) 19CFLAGS_trace.o := -I$(src)
20 20
21$(obj)/regdb.c: $(src)/db.txt $(src)/genregdb.awk 21cfg80211-$(CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS) += shipped-certs.o
22 @$(AWK) -f $(srctree)/$(src)/genregdb.awk < $< > $@ 22ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
23cfg80211-y += extra-certs.o
24endif
23 25
24clean-files := regdb.c 26$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509)
27 @$(kecho) " GEN $@"
28 @echo '#include "reg.h"' > $@
29 @echo 'const u8 shipped_regdb_certs[] = {' >> $@
30 @for f in $^ ; do hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ ; done
31 @echo '};' >> $@
32 @echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);' >> $@
33
34$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
35 $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
36 @$(kecho) " GEN $@"
37 @echo '#include "reg.h"' > $@
38 @echo 'const u8 extra_regdb_certs[] = {' >> $@
39 @for f in $^ ; do test -f $$f && hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ || true ; done
40 @echo '};' >> $@
41 @echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);' >> $@
diff --git a/net/wireless/ap.c b/net/wireless/ap.c
index 25666d3009be..63682176c96c 100644
--- a/net/wireless/ap.c
+++ b/net/wireless/ap.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/ieee80211.h> 2#include <linux/ieee80211.h>
2#include <linux/export.h> 3#include <linux/export.h>
3#include <net/cfg80211.h> 4#include <net/cfg80211.h>
diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509
new file mode 100644
index 000000000000..c6f8f9d6b988
--- /dev/null
+++ b/net/wireless/certs/sforshee.x509
Binary files differ
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index b8aa5a7d5c77..a48859982a32 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * This file contains helper code to handle channel 3 * This file contains helper code to handle channel
3 * settings and keeping track of what is possible at 4 * settings and keeping track of what is possible at
@@ -464,7 +465,7 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
464 struct ieee80211_channel *chan) 465 struct ieee80211_channel *chan)
465{ 466{
466 int width; 467 int width;
467 u32 cf_offset, freq; 468 u32 freq;
468 469
469 if (chandef->chan->center_freq == chan->center_freq) 470 if (chandef->chan->center_freq == chan->center_freq)
470 return true; 471 return true;
@@ -473,8 +474,6 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
473 if (width <= 20) 474 if (width <= 20)
474 return false; 475 return false;
475 476
476 cf_offset = width / 2 - 10;
477
478 for (freq = chandef->center_freq1 - width / 2 + 10; 477 for (freq = chandef->center_freq1 - width / 2 + 10;
479 freq <= chandef->center_freq1 + width / 2 - 10; freq += 20) { 478 freq <= chandef->center_freq1 + width / 2 - 10; freq += 20) {
480 if (chan->center_freq == freq) 479 if (chan->center_freq == freq)
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 7b33e8c366bc..fdde0d98fde1 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1384,7 +1384,7 @@ out_fail_sysfs:
1384out_fail_pernet: 1384out_fail_pernet:
1385 return err; 1385 return err;
1386} 1386}
1387subsys_initcall(cfg80211_init); 1387fs_initcall(cfg80211_init);
1388 1388
1389static void __exit cfg80211_exit(void) 1389static void __exit cfg80211_exit(void)
1390{ 1390{
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 6e809325af3b..d2f7e8b8a097 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Wireless configuration interface internals. 3 * Wireless configuration interface internals.
3 * 4 *
@@ -216,6 +217,7 @@ enum cfg80211_event_type {
216 EVENT_DISCONNECTED, 217 EVENT_DISCONNECTED,
217 EVENT_IBSS_JOINED, 218 EVENT_IBSS_JOINED,
218 EVENT_STOPPED, 219 EVENT_STOPPED,
220 EVENT_PORT_AUTHORIZED,
219}; 221};
220 222
221struct cfg80211_event { 223struct cfg80211_event {
@@ -235,6 +237,9 @@ struct cfg80211_event {
235 u8 bssid[ETH_ALEN]; 237 u8 bssid[ETH_ALEN];
236 struct ieee80211_channel *channel; 238 struct ieee80211_channel *channel;
237 } ij; 239 } ij;
240 struct {
241 u8 bssid[ETH_ALEN];
242 } pa;
238 }; 243 };
239}; 244};
240 245
@@ -385,6 +390,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
385 bool wextev); 390 bool wextev);
386void __cfg80211_roamed(struct wireless_dev *wdev, 391void __cfg80211_roamed(struct wireless_dev *wdev,
387 struct cfg80211_roam_info *info); 392 struct cfg80211_roam_info *info);
393void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid);
388int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, 394int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
389 struct wireless_dev *wdev); 395 struct wireless_dev *wdev);
390void cfg80211_autodisconnect_wk(struct work_struct *work); 396void cfg80211_autodisconnect_wk(struct work_struct *work);
diff --git a/net/wireless/db.txt b/net/wireless/db.txt
deleted file mode 100644
index a2fc3a09ccdc..000000000000
--- a/net/wireless/db.txt
+++ /dev/null
@@ -1,17 +0,0 @@
1#
2# This file is a placeholder to prevent accidental build breakage if someone
3# enables CONFIG_CFG80211_INTERNAL_REGDB. Almost no one actually needs to
4# enable that build option.
5#
6# You should be using CRDA instead. It is even better if you use the CRDA
7# package provided by your distribution, since they will probably keep it
8# up-to-date on your behalf.
9#
10# If you _really_ intend to use CONFIG_CFG80211_INTERNAL_REGDB then you will
11# need to replace this file with one containing appropriately formatted
12# regulatory rules that cover the regulatory domains you will be using. Your
13# best option is to extract the db.txt file from the wireless-regdb git
14# repository:
15#
16# git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-regdb.git
17#
diff --git a/net/wireless/debugfs.h b/net/wireless/debugfs.h
index 74fdd3811427..a8a135d94ab5 100644
--- a/net/wireless/debugfs.h
+++ b/net/wireless/debugfs.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __CFG80211_DEBUGFS_H 2#ifndef __CFG80211_DEBUGFS_H
2#define __CFG80211_DEBUGFS_H 3#define __CFG80211_DEBUGFS_H
3 4
diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c
index e9e91298c70d..a9c0f368db5d 100644
--- a/net/wireless/ethtool.c
+++ b/net/wireless/ethtool.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/utsname.h> 2#include <linux/utsname.h>
2#include <net/cfg80211.h> 3#include <net/cfg80211.h>
3#include "core.h" 4#include "core.h"
diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk
deleted file mode 100644
index baf2426b555a..000000000000
--- a/net/wireless/genregdb.awk
+++ /dev/null
@@ -1,158 +0,0 @@
1#!/usr/bin/awk -f
2#
3# genregdb.awk -- generate regdb.c from db.txt
4#
5# Actually, it reads from stdin (presumed to be db.txt) and writes
6# to stdout (presumed to be regdb.c), but close enough...
7#
8# Copyright 2009 John W. Linville <linville@tuxdriver.com>
9#
10# Permission to use, copy, modify, and/or distribute this software for any
11# purpose with or without fee is hereby granted, provided that the above
12# copyright notice and this permission notice appear in all copies.
13#
14# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
15# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
16# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
17# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
19# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
20# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21
22BEGIN {
23 active = 0
24 rules = 0;
25 print "/*"
26 print " * DO NOT EDIT -- file generated from data in db.txt"
27 print " */"
28 print ""
29 print "#include <linux/nl80211.h>"
30 print "#include <net/cfg80211.h>"
31 print "#include \"regdb.h\""
32 print ""
33 regdb = "const struct ieee80211_regdomain *reg_regdb[] = {\n"
34}
35
36function parse_country_head() {
37 country=$2
38 sub(/:/, "", country)
39 printf "static const struct ieee80211_regdomain regdom_%s = {\n", country
40 printf "\t.alpha2 = \"%s\",\n", country
41 if ($NF ~ /DFS-ETSI/)
42 printf "\t.dfs_region = NL80211_DFS_ETSI,\n"
43 else if ($NF ~ /DFS-FCC/)
44 printf "\t.dfs_region = NL80211_DFS_FCC,\n"
45 else if ($NF ~ /DFS-JP/)
46 printf "\t.dfs_region = NL80211_DFS_JP,\n"
47 printf "\t.reg_rules = {\n"
48 active = 1
49 regdb = regdb "\t&regdom_" country ",\n"
50}
51
52function parse_reg_rule()
53{
54 flag_starts_at = 7
55
56 start = $1
57 sub(/\(/, "", start)
58 end = $3
59 bw = $5
60 sub(/\),/, "", bw)
61 gain = 0
62 power = $6
63 # power might be in mW...
64 units = $7
65 dfs_cac = 0
66
67 sub(/\(/, "", power)
68 sub(/\),/, "", power)
69 sub(/\),/, "", units)
70 sub(/\)/, "", units)
71
72 if (units == "mW") {
73 flag_starts_at = 8
74 power = 10 * log(power)/log(10)
75 if ($8 ~ /[[:digit:]]/) {
76 flag_starts_at = 9
77 dfs_cac = $8
78 }
79 } else {
80 if ($7 ~ /[[:digit:]]/) {
81 flag_starts_at = 8
82 dfs_cac = $7
83 }
84 }
85 sub(/\(/, "", dfs_cac)
86 sub(/\),/, "", dfs_cac)
87 flagstr = ""
88 for (i=flag_starts_at; i<=NF; i++)
89 flagstr = flagstr $i
90 split(flagstr, flagarray, ",")
91 flags = ""
92 for (arg in flagarray) {
93 if (flagarray[arg] == "NO-OFDM") {
94 flags = flags "\n\t\t\tNL80211_RRF_NO_OFDM | "
95 } else if (flagarray[arg] == "NO-CCK") {
96 flags = flags "\n\t\t\tNL80211_RRF_NO_CCK | "
97 } else if (flagarray[arg] == "NO-INDOOR") {
98 flags = flags "\n\t\t\tNL80211_RRF_NO_INDOOR | "
99 } else if (flagarray[arg] == "NO-OUTDOOR") {
100 flags = flags "\n\t\t\tNL80211_RRF_NO_OUTDOOR | "
101 } else if (flagarray[arg] == "DFS") {
102 flags = flags "\n\t\t\tNL80211_RRF_DFS | "
103 } else if (flagarray[arg] == "PTP-ONLY") {
104 flags = flags "\n\t\t\tNL80211_RRF_PTP_ONLY | "
105 } else if (flagarray[arg] == "PTMP-ONLY") {
106 flags = flags "\n\t\t\tNL80211_RRF_PTMP_ONLY | "
107 } else if (flagarray[arg] == "PASSIVE-SCAN") {
108 flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
109 } else if (flagarray[arg] == "NO-IBSS") {
110 flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
111 } else if (flagarray[arg] == "NO-IR") {
112 flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
113 } else if (flagarray[arg] == "AUTO-BW") {
114 flags = flags "\n\t\t\tNL80211_RRF_AUTO_BW | "
115 }
116
117 }
118 flags = flags "0"
119 printf "\t\tREG_RULE_EXT(%d, %d, %d, %d, %.0f, %d, %s),\n", start, end, bw, gain, power, dfs_cac, flags
120 rules++
121}
122
123function print_tail_country()
124{
125 active = 0
126 printf "\t},\n"
127 printf "\t.n_reg_rules = %d\n", rules
128 printf "};\n\n"
129 rules = 0;
130}
131
132/^[ \t]*#/ {
133 # Ignore
134}
135
136!active && /^[ \t]*$/ {
137 # Ignore
138}
139
140!active && /country/ {
141 parse_country_head()
142}
143
144active && /^[ \t]*\(/ {
145 parse_reg_rule()
146}
147
148active && /^[ \t]*$/ {
149 print_tail_country()
150}
151
152END {
153 if (active)
154 print_tail_country()
155 print regdb "};"
156 print ""
157 print "int reg_regdb_size = ARRAY_SIZE(reg_regdb);"
158}
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 10bf040a0982..413d4f4e6334 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Some IBSS support code for cfg80211. 3 * Some IBSS support code for cfg80211.
3 * 4 *
diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c
index 459611577d3d..801d4781a73b 100644
--- a/net/wireless/lib80211.c
+++ b/net/wireless/lib80211.c
@@ -44,7 +44,7 @@ static DEFINE_SPINLOCK(lib80211_crypto_lock);
44static void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info, 44static void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info,
45 int force); 45 int force);
46static void lib80211_crypt_quiescing(struct lib80211_crypt_info *info); 46static void lib80211_crypt_quiescing(struct lib80211_crypt_info *info);
47static void lib80211_crypt_deinit_handler(unsigned long data); 47static void lib80211_crypt_deinit_handler(struct timer_list *t);
48 48
49int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name, 49int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name,
50 spinlock_t *lock) 50 spinlock_t *lock)
@@ -55,8 +55,8 @@ int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name,
55 info->lock = lock; 55 info->lock = lock;
56 56
57 INIT_LIST_HEAD(&info->crypt_deinit_list); 57 INIT_LIST_HEAD(&info->crypt_deinit_list);
58 setup_timer(&info->crypt_deinit_timer, lib80211_crypt_deinit_handler, 58 timer_setup(&info->crypt_deinit_timer, lib80211_crypt_deinit_handler,
59 (unsigned long)info); 59 0);
60 60
61 return 0; 61 return 0;
62} 62}
@@ -116,9 +116,10 @@ static void lib80211_crypt_quiescing(struct lib80211_crypt_info *info)
116 spin_unlock_irqrestore(info->lock, flags); 116 spin_unlock_irqrestore(info->lock, flags);
117} 117}
118 118
119static void lib80211_crypt_deinit_handler(unsigned long data) 119static void lib80211_crypt_deinit_handler(struct timer_list *t)
120{ 120{
121 struct lib80211_crypt_info *info = (struct lib80211_crypt_info *)data; 121 struct lib80211_crypt_info *info = from_timer(info, t,
122 crypt_deinit_timer);
122 unsigned long flags; 123 unsigned long flags;
123 124
124 lib80211_crypt_deinit_entries(info, 0); 125 lib80211_crypt_deinit_entries(info, 0);
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 421a6b80ec62..51aa55618ef7 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/ieee80211.h> 2#include <linux/ieee80211.h>
2#include <linux/export.h> 3#include <linux/export.h>
3#include <net/cfg80211.h> 4#include <net/cfg80211.h>
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index d8df7a5180a0..e7c64a8dce54 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * cfg80211 MLME SAP interface 3 * cfg80211 MLME SAP interface
3 * 4 *
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0df8023f480b..b1ac23ca20c8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -549,6 +549,14 @@ nl80211_nan_srf_policy[NL80211_NAN_SRF_ATTR_MAX + 1] = {
549 [NL80211_NAN_SRF_MAC_ADDRS] = { .type = NLA_NESTED }, 549 [NL80211_NAN_SRF_MAC_ADDRS] = { .type = NLA_NESTED },
550}; 550};
551 551
552/* policy for packet pattern attributes */
553static const struct nla_policy
554nl80211_packet_pattern_policy[MAX_NL80211_PKTPAT + 1] = {
555 [NL80211_PKTPAT_MASK] = { .type = NLA_BINARY, },
556 [NL80211_PKTPAT_PATTERN] = { .type = NLA_BINARY, },
557 [NL80211_PKTPAT_OFFSET] = { .type = NLA_U32 },
558};
559
552static int nl80211_prepare_wdev_dump(struct sk_buff *skb, 560static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
553 struct netlink_callback *cb, 561 struct netlink_callback *cb,
554 struct cfg80211_registered_device **rdev, 562 struct cfg80211_registered_device **rdev,
@@ -2122,6 +2130,15 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
2122 case NL80211_CHAN_HT40MINUS: 2130 case NL80211_CHAN_HT40MINUS:
2123 cfg80211_chandef_create(chandef, chandef->chan, 2131 cfg80211_chandef_create(chandef, chandef->chan,
2124 chantype); 2132 chantype);
2133 /* user input for center_freq is incorrect */
2134 if (info->attrs[NL80211_ATTR_CENTER_FREQ1] &&
2135 chandef->center_freq1 != nla_get_u32(
2136 info->attrs[NL80211_ATTR_CENTER_FREQ1]))
2137 return -EINVAL;
2138 /* center_freq2 must be zero */
2139 if (info->attrs[NL80211_ATTR_CENTER_FREQ2] &&
2140 nla_get_u32(info->attrs[NL80211_ATTR_CENTER_FREQ2]))
2141 return -EINVAL;
2125 break; 2142 break;
2126 default: 2143 default:
2127 return -EINVAL; 2144 return -EINVAL;
@@ -2588,10 +2605,32 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
2588 goto nla_put_failure; 2605 goto nla_put_failure;
2589 } 2606 }
2590 2607
2591 if (wdev->ssid_len) { 2608 wdev_lock(wdev);
2592 if (nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid)) 2609 switch (wdev->iftype) {
2610 case NL80211_IFTYPE_AP:
2611 if (wdev->ssid_len &&
2612 nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
2593 goto nla_put_failure; 2613 goto nla_put_failure;
2614 break;
2615 case NL80211_IFTYPE_STATION:
2616 case NL80211_IFTYPE_P2P_CLIENT:
2617 case NL80211_IFTYPE_ADHOC: {
2618 const u8 *ssid_ie;
2619 if (!wdev->current_bss)
2620 break;
2621 ssid_ie = ieee80211_bss_get_ie(&wdev->current_bss->pub,
2622 WLAN_EID_SSID);
2623 if (!ssid_ie)
2624 break;
2625 if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
2626 goto nla_put_failure;
2627 break;
2628 }
2629 default:
2630 /* nothing */
2631 break;
2594 } 2632 }
2633 wdev_unlock(wdev);
2595 2634
2596 genlmsg_end(msg, hdr); 2635 genlmsg_end(msg, hdr);
2597 return 0; 2636 return 0;
@@ -5669,6 +5708,11 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
5669 } 5708 }
5670} 5709}
5671 5710
5711static int nl80211_reload_regdb(struct sk_buff *skb, struct genl_info *info)
5712{
5713 return reg_reload_regdb();
5714}
5715
5672static int nl80211_get_mesh_config(struct sk_buff *skb, 5716static int nl80211_get_mesh_config(struct sk_buff *skb,
5673 struct genl_info *info) 5717 struct genl_info *info)
5674{ 5718{
@@ -6269,7 +6313,7 @@ static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb,
6269 if (!hdr) 6313 if (!hdr)
6270 return -1; 6314 return -1;
6271 6315
6272 genl_dump_check_consistent(cb, hdr, &nl80211_fam); 6316 genl_dump_check_consistent(cb, hdr);
6273 6317
6274 if (nl80211_put_regdom(regdom, msg)) 6318 if (nl80211_put_regdom(regdom, msg))
6275 goto nla_put_failure; 6319 goto nla_put_failure;
@@ -6610,6 +6654,77 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev)
6610 return regulatory_pre_cac_allowed(wdev->wiphy); 6654 return regulatory_pre_cac_allowed(wdev->wiphy);
6611} 6655}
6612 6656
6657static int
6658nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
6659 void *request, struct nlattr **attrs,
6660 bool is_sched_scan)
6661{
6662 u8 *mac_addr, *mac_addr_mask;
6663 u32 *flags;
6664 enum nl80211_feature_flags randomness_flag;
6665
6666 if (!attrs[NL80211_ATTR_SCAN_FLAGS])
6667 return 0;
6668
6669 if (is_sched_scan) {
6670 struct cfg80211_sched_scan_request *req = request;
6671
6672 randomness_flag = wdev ?
6673 NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR :
6674 NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
6675 flags = &req->flags;
6676 mac_addr = req->mac_addr;
6677 mac_addr_mask = req->mac_addr_mask;
6678 } else {
6679 struct cfg80211_scan_request *req = request;
6680
6681 randomness_flag = NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
6682 flags = &req->flags;
6683 mac_addr = req->mac_addr;
6684 mac_addr_mask = req->mac_addr_mask;
6685 }
6686
6687 *flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]);
6688
6689 if ((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
6690 !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN))
6691 return -EOPNOTSUPP;
6692
6693 if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
6694 int err;
6695
6696 if (!(wiphy->features & randomness_flag) ||
6697 (wdev && wdev->current_bss))
6698 return -EOPNOTSUPP;
6699
6700 err = nl80211_parse_random_mac(attrs, mac_addr, mac_addr_mask);
6701 if (err)
6702 return err;
6703 }
6704
6705 if ((*flags & NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME) &&
6706 !wiphy_ext_feature_isset(wiphy,
6707 NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME))
6708 return -EOPNOTSUPP;
6709
6710 if ((*flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP) &&
6711 !wiphy_ext_feature_isset(wiphy,
6712 NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP))
6713 return -EOPNOTSUPP;
6714
6715 if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) &&
6716 !wiphy_ext_feature_isset(wiphy,
6717 NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION))
6718 return -EOPNOTSUPP;
6719
6720 if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE) &&
6721 !wiphy_ext_feature_isset(wiphy,
6722 NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE))
6723 return -EOPNOTSUPP;
6724
6725 return 0;
6726}
6727
6613static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) 6728static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
6614{ 6729{
6615 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 6730 struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -6815,34 +6930,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
6815 nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]); 6930 nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]);
6816 } 6931 }
6817 6932
6818 if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) { 6933 err = nl80211_check_scan_flags(wiphy, wdev, request, info->attrs,
6819 request->flags = nla_get_u32( 6934 false);
6820 info->attrs[NL80211_ATTR_SCAN_FLAGS]); 6935 if (err)
6821 if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && 6936 goto out_free;
6822 !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) {
6823 err = -EOPNOTSUPP;
6824 goto out_free;
6825 }
6826
6827 if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
6828 if (!(wiphy->features &
6829 NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR)) {
6830 err = -EOPNOTSUPP;
6831 goto out_free;
6832 }
6833
6834 if (wdev->current_bss) {
6835 err = -EOPNOTSUPP;
6836 goto out_free;
6837 }
6838
6839 err = nl80211_parse_random_mac(info->attrs,
6840 request->mac_addr,
6841 request->mac_addr_mask);
6842 if (err)
6843 goto out_free;
6844 }
6845 }
6846 6937
6847 request->no_cck = 6938 request->no_cck =
6848 nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); 6939 nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]);
@@ -7290,37 +7381,9 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
7290 request->ie_len); 7381 request->ie_len);
7291 } 7382 }
7292 7383
7293 if (attrs[NL80211_ATTR_SCAN_FLAGS]) { 7384 err = nl80211_check_scan_flags(wiphy, wdev, request, attrs, true);
7294 request->flags = nla_get_u32( 7385 if (err)
7295 attrs[NL80211_ATTR_SCAN_FLAGS]); 7386 goto out_free;
7296 if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
7297 !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) {
7298 err = -EOPNOTSUPP;
7299 goto out_free;
7300 }
7301
7302 if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
7303 u32 flg = NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR;
7304
7305 if (!wdev) /* must be net-detect */
7306 flg = NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
7307
7308 if (!(wiphy->features & flg)) {
7309 err = -EOPNOTSUPP;
7310 goto out_free;
7311 }
7312
7313 if (wdev && wdev->current_bss) {
7314 err = -EOPNOTSUPP;
7315 goto out_free;
7316 }
7317
7318 err = nl80211_parse_random_mac(attrs, request->mac_addr,
7319 request->mac_addr_mask);
7320 if (err)
7321 goto out_free;
7322 }
7323 }
7324 7387
7325 if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY]) 7388 if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY])
7326 request->delay = 7389 request->delay =
@@ -7681,7 +7744,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
7681 if (!hdr) 7744 if (!hdr)
7682 return -1; 7745 return -1;
7683 7746
7684 genl_dump_check_consistent(cb, hdr, &nl80211_fam); 7747 genl_dump_check_consistent(cb, hdr);
7685 7748
7686 if (nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation)) 7749 if (nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation))
7687 goto nla_put_failure; 7750 goto nla_put_failure;
@@ -8924,8 +8987,14 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
8924 8987
8925 if (info->attrs[NL80211_ATTR_USE_MFP]) { 8988 if (info->attrs[NL80211_ATTR_USE_MFP]) {
8926 connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); 8989 connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]);
8990 if (connect.mfp == NL80211_MFP_OPTIONAL &&
8991 !wiphy_ext_feature_isset(&rdev->wiphy,
8992 NL80211_EXT_FEATURE_MFP_OPTIONAL))
8993 return -EOPNOTSUPP;
8994
8927 if (connect.mfp != NL80211_MFP_REQUIRED && 8995 if (connect.mfp != NL80211_MFP_REQUIRED &&
8928 connect.mfp != NL80211_MFP_NO) 8996 connect.mfp != NL80211_MFP_NO &&
8997 connect.mfp != NL80211_MFP_OPTIONAL)
8929 return -EINVAL; 8998 return -EINVAL;
8930 } else { 8999 } else {
8931 connect.mfp = NL80211_MFP_NO; 9000 connect.mfp = NL80211_MFP_NO;
@@ -9987,6 +10056,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
9987 if (err) 10056 if (err)
9988 return err; 10057 return err;
9989 10058
10059 if (!setup.chandef.chan)
10060 return -EINVAL;
10061
9990 err = validate_beacon_tx_rate(rdev, setup.chandef.chan->band, 10062 err = validate_beacon_tx_rate(rdev, setup.chandef.chan->band,
9991 &setup.beacon_rate); 10063 &setup.beacon_rate);
9992 if (err) 10064 if (err)
@@ -10529,7 +10601,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
10529 u8 *mask_pat; 10601 u8 *mask_pat;
10530 10602
10531 nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, 10603 nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
10532 NULL, info->extack); 10604 nl80211_packet_pattern_policy,
10605 info->extack);
10533 err = -EINVAL; 10606 err = -EINVAL;
10534 if (!pat_tb[NL80211_PKTPAT_MASK] || 10607 if (!pat_tb[NL80211_PKTPAT_MASK] ||
10535 !pat_tb[NL80211_PKTPAT_PATTERN]) 10608 !pat_tb[NL80211_PKTPAT_PATTERN])
@@ -10778,7 +10851,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
10778 rem) { 10851 rem) {
10779 u8 *mask_pat; 10852 u8 *mask_pat;
10780 10853
10781 nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL, NULL); 10854 nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
10855 nl80211_packet_pattern_policy, NULL);
10782 if (!pat_tb[NL80211_PKTPAT_MASK] || 10856 if (!pat_tb[NL80211_PKTPAT_MASK] ||
10783 !pat_tb[NL80211_PKTPAT_PATTERN]) 10857 !pat_tb[NL80211_PKTPAT_PATTERN])
10784 return -EINVAL; 10858 return -EINVAL;
@@ -10903,6 +10977,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
10903 if (err) 10977 if (err)
10904 return err; 10978 return err;
10905 10979
10980 if (!tb[NL80211_REKEY_DATA_REPLAY_CTR] || !tb[NL80211_REKEY_DATA_KEK] ||
10981 !tb[NL80211_REKEY_DATA_KCK])
10982 return -EINVAL;
10906 if (nla_len(tb[NL80211_REKEY_DATA_REPLAY_CTR]) != NL80211_REPLAY_CTR_LEN) 10983 if (nla_len(tb[NL80211_REKEY_DATA_REPLAY_CTR]) != NL80211_REPLAY_CTR_LEN)
10907 return -ERANGE; 10984 return -ERANGE;
10908 if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN) 10985 if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN)
@@ -12669,6 +12746,12 @@ static const struct genl_ops nl80211_ops[] = {
12669 .flags = GENL_ADMIN_PERM, 12746 .flags = GENL_ADMIN_PERM,
12670 }, 12747 },
12671 { 12748 {
12749 .cmd = NL80211_CMD_RELOAD_REGDB,
12750 .doit = nl80211_reload_regdb,
12751 .policy = nl80211_policy,
12752 .flags = GENL_ADMIN_PERM,
12753 },
12754 {
12672 .cmd = NL80211_CMD_GET_MESH_CONFIG, 12755 .cmd = NL80211_CMD_GET_MESH_CONFIG,
12673 .doit = nl80211_get_mesh_config, 12756 .doit = nl80211_get_mesh_config,
12674 .policy = nl80211_policy, 12757 .policy = nl80211_policy,
@@ -13796,9 +13879,7 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
13796 info->req_ie)) || 13879 info->req_ie)) ||
13797 (info->resp_ie && 13880 (info->resp_ie &&
13798 nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len, 13881 nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len,
13799 info->resp_ie)) || 13882 info->resp_ie)))
13800 (info->authorized &&
13801 nla_put_flag(msg, NL80211_ATTR_PORT_AUTHORIZED)))
13802 goto nla_put_failure; 13883 goto nla_put_failure;
13803 13884
13804 genlmsg_end(msg, hdr); 13885 genlmsg_end(msg, hdr);
@@ -13812,6 +13893,36 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
13812 nlmsg_free(msg); 13893 nlmsg_free(msg);
13813} 13894}
13814 13895
13896void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
13897 struct net_device *netdev, const u8 *bssid)
13898{
13899 struct sk_buff *msg;
13900 void *hdr;
13901
13902 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
13903 if (!msg)
13904 return;
13905
13906 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PORT_AUTHORIZED);
13907 if (!hdr) {
13908 nlmsg_free(msg);
13909 return;
13910 }
13911
13912 if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid))
13913 goto nla_put_failure;
13914
13915 genlmsg_end(msg, hdr);
13916
13917 genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
13918 NL80211_MCGRP_MLME, GFP_KERNEL);
13919 return;
13920
13921 nla_put_failure:
13922 genlmsg_cancel(msg, hdr);
13923 nlmsg_free(msg);
13924}
13925
13815void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, 13926void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
13816 struct net_device *netdev, u16 reason, 13927 struct net_device *netdev, u16 reason,
13817 const u8 *ie, size_t ie_len, bool from_ap) 13928 const u8 *ie, size_t ie_len, bool from_ap)
@@ -14185,7 +14296,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
14185 struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); 14296 struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
14186 struct sk_buff *msg; 14297 struct sk_buff *msg;
14187 void *hdr; 14298 void *hdr;
14188 u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid); 14299 u32 nlportid = READ_ONCE(wdev->ap_unexpected_nlportid);
14189 14300
14190 if (!nlportid) 14301 if (!nlportid)
14191 return false; 14302 return false;
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index b96933322077..79e47fe60c35 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __NET_WIRELESS_NL80211_H 2#ifndef __NET_WIRELESS_NL80211_H
2#define __NET_WIRELESS_NL80211_H 3#define __NET_WIRELESS_NL80211_H
3 4
@@ -58,6 +59,8 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
58void nl80211_send_roamed(struct cfg80211_registered_device *rdev, 59void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
59 struct net_device *netdev, 60 struct net_device *netdev,
60 struct cfg80211_roam_info *info, gfp_t gfp); 61 struct cfg80211_roam_info *info, gfp_t gfp);
62void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
63 struct net_device *netdev, const u8 *bssid);
61void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, 64void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
62 struct net_device *netdev, u16 reason, 65 struct net_device *netdev, u16 reason,
63 const u8 *ie, size_t ie_len, bool from_ap); 66 const u8 *ie, size_t ie_len, bool from_ap);
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index ce23d7d49960..0c06240d25af 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __CFG80211_RDEV_OPS 2#ifndef __CFG80211_RDEV_OPS
2#define __CFG80211_RDEV_OPS 3#define __CFG80211_RDEV_OPS
3 4
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 6e94f6934a0e..78e71b0390be 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -53,12 +53,13 @@
53#include <linux/ctype.h> 53#include <linux/ctype.h>
54#include <linux/nl80211.h> 54#include <linux/nl80211.h>
55#include <linux/platform_device.h> 55#include <linux/platform_device.h>
56#include <linux/verification.h>
56#include <linux/moduleparam.h> 57#include <linux/moduleparam.h>
58#include <linux/firmware.h>
57#include <net/cfg80211.h> 59#include <net/cfg80211.h>
58#include "core.h" 60#include "core.h"
59#include "reg.h" 61#include "reg.h"
60#include "rdev-ops.h" 62#include "rdev-ops.h"
61#include "regdb.h"
62#include "nl80211.h" 63#include "nl80211.h"
63 64
64/* 65/*
@@ -100,7 +101,7 @@ static struct regulatory_request core_request_world = {
100static struct regulatory_request __rcu *last_request = 101static struct regulatory_request __rcu *last_request =
101 (void __force __rcu *)&core_request_world; 102 (void __force __rcu *)&core_request_world;
102 103
103/* To trigger userspace events */ 104/* To trigger userspace events and load firmware */
104static struct platform_device *reg_pdev; 105static struct platform_device *reg_pdev;
105 106
106/* 107/*
@@ -443,7 +444,6 @@ reg_copy_regd(const struct ieee80211_regdomain *src_regd)
443 return regd; 444 return regd;
444} 445}
445 446
446#ifdef CONFIG_CFG80211_INTERNAL_REGDB
447struct reg_regdb_apply_request { 447struct reg_regdb_apply_request {
448 struct list_head list; 448 struct list_head list;
449 const struct ieee80211_regdomain *regdom; 449 const struct ieee80211_regdomain *regdom;
@@ -475,55 +475,26 @@ static void reg_regdb_apply(struct work_struct *work)
475 475
476static DECLARE_WORK(reg_regdb_work, reg_regdb_apply); 476static DECLARE_WORK(reg_regdb_work, reg_regdb_apply);
477 477
478static int reg_query_builtin(const char *alpha2) 478static int reg_schedule_apply(const struct ieee80211_regdomain *regdom)
479{ 479{
480 const struct ieee80211_regdomain *regdom = NULL;
481 struct reg_regdb_apply_request *request; 480 struct reg_regdb_apply_request *request;
482 unsigned int i;
483
484 for (i = 0; i < reg_regdb_size; i++) {
485 if (alpha2_equal(alpha2, reg_regdb[i]->alpha2)) {
486 regdom = reg_regdb[i];
487 break;
488 }
489 }
490
491 if (!regdom)
492 return -ENODATA;
493 481
494 request = kzalloc(sizeof(struct reg_regdb_apply_request), GFP_KERNEL); 482 request = kzalloc(sizeof(struct reg_regdb_apply_request), GFP_KERNEL);
495 if (!request) 483 if (!request) {
496 return -ENOMEM; 484 kfree(regdom);
497
498 request->regdom = reg_copy_regd(regdom);
499 if (IS_ERR_OR_NULL(request->regdom)) {
500 kfree(request);
501 return -ENOMEM; 485 return -ENOMEM;
502 } 486 }
503 487
488 request->regdom = regdom;
489
504 mutex_lock(&reg_regdb_apply_mutex); 490 mutex_lock(&reg_regdb_apply_mutex);
505 list_add_tail(&request->list, &reg_regdb_apply_list); 491 list_add_tail(&request->list, &reg_regdb_apply_list);
506 mutex_unlock(&reg_regdb_apply_mutex); 492 mutex_unlock(&reg_regdb_apply_mutex);
507 493
508 schedule_work(&reg_regdb_work); 494 schedule_work(&reg_regdb_work);
509
510 return 0; 495 return 0;
511} 496}
512 497
513/* Feel free to add any other sanity checks here */
514static void reg_regdb_size_check(void)
515{
516 /* We should ideally BUILD_BUG_ON() but then random builds would fail */
517 WARN_ONCE(!reg_regdb_size, "db.txt is empty, you should update it...");
518}
519#else
520static inline void reg_regdb_size_check(void) {}
521static inline int reg_query_builtin(const char *alpha2)
522{
523 return -ENODATA;
524}
525#endif /* CONFIG_CFG80211_INTERNAL_REGDB */
526
527#ifdef CONFIG_CFG80211_CRDA_SUPPORT 498#ifdef CONFIG_CFG80211_CRDA_SUPPORT
528/* Max number of consecutive attempts to communicate with CRDA */ 499/* Max number of consecutive attempts to communicate with CRDA */
529#define REG_MAX_CRDA_TIMEOUTS 10 500#define REG_MAX_CRDA_TIMEOUTS 10
@@ -599,10 +570,402 @@ static inline int call_crda(const char *alpha2)
599} 570}
600#endif /* CONFIG_CFG80211_CRDA_SUPPORT */ 571#endif /* CONFIG_CFG80211_CRDA_SUPPORT */
601 572
573/* code to directly load a firmware database through request_firmware */
574static const struct fwdb_header *regdb;
575
576struct fwdb_country {
577 u8 alpha2[2];
578 __be16 coll_ptr;
579 /* this struct cannot be extended */
580} __packed __aligned(4);
581
582struct fwdb_collection {
583 u8 len;
584 u8 n_rules;
585 u8 dfs_region;
586 /* no optional data yet */
587 /* aligned to 2, then followed by __be16 array of rule pointers */
588} __packed __aligned(4);
589
590enum fwdb_flags {
591 FWDB_FLAG_NO_OFDM = BIT(0),
592 FWDB_FLAG_NO_OUTDOOR = BIT(1),
593 FWDB_FLAG_DFS = BIT(2),
594 FWDB_FLAG_NO_IR = BIT(3),
595 FWDB_FLAG_AUTO_BW = BIT(4),
596};
597
598struct fwdb_rule {
599 u8 len;
600 u8 flags;
601 __be16 max_eirp;
602 __be32 start, end, max_bw;
603 /* start of optional data */
604 __be16 cac_timeout;
605} __packed __aligned(4);
606
607#define FWDB_MAGIC 0x52474442
608#define FWDB_VERSION 20
609
610struct fwdb_header {
611 __be32 magic;
612 __be32 version;
613 struct fwdb_country country[];
614} __packed __aligned(4);
615
616static bool valid_rule(const u8 *data, unsigned int size, u16 rule_ptr)
617{
618 struct fwdb_rule *rule = (void *)(data + (rule_ptr << 2));
619
620 if ((u8 *)rule + sizeof(rule->len) > data + size)
621 return false;
622
623 /* mandatory fields */
624 if (rule->len < offsetofend(struct fwdb_rule, max_bw))
625 return false;
626
627 return true;
628}
629
630static bool valid_country(const u8 *data, unsigned int size,
631 const struct fwdb_country *country)
632{
633 unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
634 struct fwdb_collection *coll = (void *)(data + ptr);
635 __be16 *rules_ptr;
636 unsigned int i;
637
638 /* make sure we can read len/n_rules */
639 if ((u8 *)coll + offsetofend(typeof(*coll), n_rules) > data + size)
640 return false;
641
642 /* make sure base struct and all rules fit */
643 if ((u8 *)coll + ALIGN(coll->len, 2) +
644 (coll->n_rules * 2) > data + size)
645 return false;
646
647 /* mandatory fields must exist */
648 if (coll->len < offsetofend(struct fwdb_collection, dfs_region))
649 return false;
650
651 rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2));
652
653 for (i = 0; i < coll->n_rules; i++) {
654 u16 rule_ptr = be16_to_cpu(rules_ptr[i]);
655
656 if (!valid_rule(data, size, rule_ptr))
657 return false;
658 }
659
660 return true;
661}
662
663#ifdef CONFIG_CFG80211_REQUIRE_SIGNED_REGDB
664static struct key *builtin_regdb_keys;
665
666static void __init load_keys_from_buffer(const u8 *p, unsigned int buflen)
667{
668 const u8 *end = p + buflen;
669 size_t plen;
670 key_ref_t key;
671
672 while (p < end) {
673 /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
674 * than 256 bytes in size.
675 */
676 if (end - p < 4)
677 goto dodgy_cert;
678 if (p[0] != 0x30 &&
679 p[1] != 0x82)
680 goto dodgy_cert;
681 plen = (p[2] << 8) | p[3];
682 plen += 4;
683 if (plen > end - p)
684 goto dodgy_cert;
685
686 key = key_create_or_update(make_key_ref(builtin_regdb_keys, 1),
687 "asymmetric", NULL, p, plen,
688 ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
689 KEY_USR_VIEW | KEY_USR_READ),
690 KEY_ALLOC_NOT_IN_QUOTA |
691 KEY_ALLOC_BUILT_IN |
692 KEY_ALLOC_BYPASS_RESTRICTION);
693 if (IS_ERR(key)) {
694 pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
695 PTR_ERR(key));
696 } else {
697 pr_notice("Loaded X.509 cert '%s'\n",
698 key_ref_to_ptr(key)->description);
699 key_ref_put(key);
700 }
701 p += plen;
702 }
703
704 return;
705
706dodgy_cert:
707 pr_err("Problem parsing in-kernel X.509 certificate list\n");
708}
709
710static int __init load_builtin_regdb_keys(void)
711{
712 builtin_regdb_keys =
713 keyring_alloc(".builtin_regdb_keys",
714 KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
715 ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
716 KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH),
717 KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
718 if (IS_ERR(builtin_regdb_keys))
719 return PTR_ERR(builtin_regdb_keys);
720
721 pr_notice("Loading compiled-in X.509 certificates for regulatory database\n");
722
723#ifdef CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS
724 load_keys_from_buffer(shipped_regdb_certs, shipped_regdb_certs_len);
725#endif
726#ifdef CONFIG_CFG80211_EXTRA_REGDB_KEYDIR
727 if (CONFIG_CFG80211_EXTRA_REGDB_KEYDIR[0] != '\0')
728 load_keys_from_buffer(extra_regdb_certs, extra_regdb_certs_len);
729#endif
730
731 return 0;
732}
733
734static bool regdb_has_valid_signature(const u8 *data, unsigned int size)
735{
736 const struct firmware *sig;
737 bool result;
738
739 if (request_firmware(&sig, "regulatory.db.p7s", &reg_pdev->dev))
740 return false;
741
742 result = verify_pkcs7_signature(data, size, sig->data, sig->size,
743 builtin_regdb_keys,
744 VERIFYING_UNSPECIFIED_SIGNATURE,
745 NULL, NULL) == 0;
746
747 release_firmware(sig);
748
749 return result;
750}
751
752static void free_regdb_keyring(void)
753{
754 key_put(builtin_regdb_keys);
755}
756#else
757static int load_builtin_regdb_keys(void)
758{
759 return 0;
760}
761
762static bool regdb_has_valid_signature(const u8 *data, unsigned int size)
763{
764 return true;
765}
766
767static void free_regdb_keyring(void)
768{
769}
770#endif /* CONFIG_CFG80211_REQUIRE_SIGNED_REGDB */
771
772static bool valid_regdb(const u8 *data, unsigned int size)
773{
774 const struct fwdb_header *hdr = (void *)data;
775 const struct fwdb_country *country;
776
777 if (size < sizeof(*hdr))
778 return false;
779
780 if (hdr->magic != cpu_to_be32(FWDB_MAGIC))
781 return false;
782
783 if (hdr->version != cpu_to_be32(FWDB_VERSION))
784 return false;
785
786 if (!regdb_has_valid_signature(data, size))
787 return false;
788
789 country = &hdr->country[0];
790 while ((u8 *)(country + 1) <= data + size) {
791 if (!country->coll_ptr)
792 break;
793 if (!valid_country(data, size, country))
794 return false;
795 country++;
796 }
797
798 return true;
799}
800
801static int regdb_query_country(const struct fwdb_header *db,
802 const struct fwdb_country *country)
803{
804 unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
805 struct fwdb_collection *coll = (void *)((u8 *)db + ptr);
806 struct ieee80211_regdomain *regdom;
807 unsigned int size_of_regd;
808 unsigned int i;
809
810 size_of_regd =
811 sizeof(struct ieee80211_regdomain) +
812 coll->n_rules * sizeof(struct ieee80211_reg_rule);
813
814 regdom = kzalloc(size_of_regd, GFP_KERNEL);
815 if (!regdom)
816 return -ENOMEM;
817
818 regdom->n_reg_rules = coll->n_rules;
819 regdom->alpha2[0] = country->alpha2[0];
820 regdom->alpha2[1] = country->alpha2[1];
821 regdom->dfs_region = coll->dfs_region;
822
823 for (i = 0; i < regdom->n_reg_rules; i++) {
824 __be16 *rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2));
825 unsigned int rule_ptr = be16_to_cpu(rules_ptr[i]) << 2;
826 struct fwdb_rule *rule = (void *)((u8 *)db + rule_ptr);
827 struct ieee80211_reg_rule *rrule = &regdom->reg_rules[i];
828
829 rrule->freq_range.start_freq_khz = be32_to_cpu(rule->start);
830 rrule->freq_range.end_freq_khz = be32_to_cpu(rule->end);
831 rrule->freq_range.max_bandwidth_khz = be32_to_cpu(rule->max_bw);
832
833 rrule->power_rule.max_antenna_gain = 0;
834 rrule->power_rule.max_eirp = be16_to_cpu(rule->max_eirp);
835
836 rrule->flags = 0;
837 if (rule->flags & FWDB_FLAG_NO_OFDM)
838 rrule->flags |= NL80211_RRF_NO_OFDM;
839 if (rule->flags & FWDB_FLAG_NO_OUTDOOR)
840 rrule->flags |= NL80211_RRF_NO_OUTDOOR;
841 if (rule->flags & FWDB_FLAG_DFS)
842 rrule->flags |= NL80211_RRF_DFS;
843 if (rule->flags & FWDB_FLAG_NO_IR)
844 rrule->flags |= NL80211_RRF_NO_IR;
845 if (rule->flags & FWDB_FLAG_AUTO_BW)
846 rrule->flags |= NL80211_RRF_AUTO_BW;
847
848 rrule->dfs_cac_ms = 0;
849
850 /* handle optional data */
851 if (rule->len >= offsetofend(struct fwdb_rule, cac_timeout))
852 rrule->dfs_cac_ms =
853 1000 * be16_to_cpu(rule->cac_timeout);
854 }
855
856 return reg_schedule_apply(regdom);
857}
858
859static int query_regdb(const char *alpha2)
860{
861 const struct fwdb_header *hdr = regdb;
862 const struct fwdb_country *country;
863
864 ASSERT_RTNL();
865
866 if (IS_ERR(regdb))
867 return PTR_ERR(regdb);
868
869 country = &hdr->country[0];
870 while (country->coll_ptr) {
871 if (alpha2_equal(alpha2, country->alpha2))
872 return regdb_query_country(regdb, country);
873 country++;
874 }
875
876 return -ENODATA;
877}
878
879static void regdb_fw_cb(const struct firmware *fw, void *context)
880{
881 int set_error = 0;
882 bool restore = true;
883 void *db;
884
885 if (!fw) {
886 pr_info("failed to load regulatory.db\n");
887 set_error = -ENODATA;
888 } else if (!valid_regdb(fw->data, fw->size)) {
889 pr_info("loaded regulatory.db is malformed or signature is missing/invalid\n");
890 set_error = -EINVAL;
891 }
892
893 rtnl_lock();
894 if (WARN_ON(regdb && !IS_ERR(regdb))) {
895 /* just restore and free new db */
896 } else if (set_error) {
897 regdb = ERR_PTR(set_error);
898 } else if (fw) {
899 db = kmemdup(fw->data, fw->size, GFP_KERNEL);
900 if (db) {
901 regdb = db;
902 restore = context && query_regdb(context);
903 } else {
904 restore = true;
905 }
906 }
907
908 if (restore)
909 restore_regulatory_settings(true);
910
911 rtnl_unlock();
912
913 kfree(context);
914
915 release_firmware(fw);
916}
917
918static int query_regdb_file(const char *alpha2)
919{
920 ASSERT_RTNL();
921
922 if (regdb)
923 return query_regdb(alpha2);
924
925 alpha2 = kmemdup(alpha2, 2, GFP_KERNEL);
926 if (!alpha2)
927 return -ENOMEM;
928
929 return request_firmware_nowait(THIS_MODULE, true, "regulatory.db",
930 &reg_pdev->dev, GFP_KERNEL,
931 (void *)alpha2, regdb_fw_cb);
932}
933
934int reg_reload_regdb(void)
935{
936 const struct firmware *fw;
937 void *db;
938 int err;
939
940 err = request_firmware(&fw, "regulatory.db", &reg_pdev->dev);
941 if (err)
942 return err;
943
944 if (!valid_regdb(fw->data, fw->size)) {
945 err = -ENODATA;
946 goto out;
947 }
948
949 db = kmemdup(fw->data, fw->size, GFP_KERNEL);
950 if (!db) {
951 err = -ENOMEM;
952 goto out;
953 }
954
955 rtnl_lock();
956 if (!IS_ERR_OR_NULL(regdb))
957 kfree(regdb);
958 regdb = db;
959 rtnl_unlock();
960
961 out:
962 release_firmware(fw);
963 return err;
964}
965
602static bool reg_query_database(struct regulatory_request *request) 966static bool reg_query_database(struct regulatory_request *request)
603{ 967{
604 /* query internal regulatory database (if it exists) */ 968 if (query_regdb_file(request->alpha2) == 0)
605 if (reg_query_builtin(request->alpha2) == 0)
606 return true; 969 return true;
607 970
608 if (call_crda(request->alpha2) == 0) 971 if (call_crda(request->alpha2) == 0)
@@ -3281,24 +3644,13 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy,
3281 } 3644 }
3282} 3645}
3283 3646
3284int __init regulatory_init(void) 3647static int __init regulatory_init_db(void)
3285{ 3648{
3286 int err = 0; 3649 int err;
3287
3288 reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0);
3289 if (IS_ERR(reg_pdev))
3290 return PTR_ERR(reg_pdev);
3291 3650
3292 spin_lock_init(&reg_requests_lock); 3651 err = load_builtin_regdb_keys();
3293 spin_lock_init(&reg_pending_beacons_lock); 3652 if (err)
3294 spin_lock_init(&reg_indoor_lock); 3653 return err;
3295
3296 reg_regdb_size_check();
3297
3298 rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom);
3299
3300 user_alpha2[0] = '9';
3301 user_alpha2[1] = '7';
3302 3654
3303 /* We always try to get an update for the static regdomain */ 3655 /* We always try to get an update for the static regdomain */
3304 err = regulatory_hint_core(cfg80211_world_regdom->alpha2); 3656 err = regulatory_hint_core(cfg80211_world_regdom->alpha2);
@@ -3327,6 +3679,31 @@ int __init regulatory_init(void)
3327 3679
3328 return 0; 3680 return 0;
3329} 3681}
3682#ifndef MODULE
3683late_initcall(regulatory_init_db);
3684#endif
3685
3686int __init regulatory_init(void)
3687{
3688 reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0);
3689 if (IS_ERR(reg_pdev))
3690 return PTR_ERR(reg_pdev);
3691
3692 spin_lock_init(&reg_requests_lock);
3693 spin_lock_init(&reg_pending_beacons_lock);
3694 spin_lock_init(&reg_indoor_lock);
3695
3696 rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom);
3697
3698 user_alpha2[0] = '9';
3699 user_alpha2[1] = '7';
3700
3701#ifdef MODULE
3702 return regulatory_init_db();
3703#else
3704 return 0;
3705#endif
3706}
3330 3707
3331void regulatory_exit(void) 3708void regulatory_exit(void)
3332{ 3709{
@@ -3360,4 +3737,9 @@ void regulatory_exit(void)
3360 list_del(&reg_request->list); 3737 list_del(&reg_request->list);
3361 kfree(reg_request); 3738 kfree(reg_request);
3362 } 3739 }
3740
3741 if (!IS_ERR_OR_NULL(regdb))
3742 kfree(regdb);
3743
3744 free_regdb_keyring();
3363} 3745}
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index ca7fedf2e7a1..9ceeb5f3a7cb 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -1,5 +1,8 @@
1#ifndef __NET_WIRELESS_REG_H 1#ifndef __NET_WIRELESS_REG_H
2#define __NET_WIRELESS_REG_H 2#define __NET_WIRELESS_REG_H
3
4#include <net/cfg80211.h>
5
3/* 6/*
4 * Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com> 7 * Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
5 * 8 *
@@ -179,4 +182,15 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy,
179 * @wiphy2 - wiphy it's dfs_region to be checked against that of wiphy1 182 * @wiphy2 - wiphy it's dfs_region to be checked against that of wiphy1
180 */ 183 */
181bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2); 184bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2);
185
186/**
187 * reg_reload_regdb - reload the regulatory.db firmware file
188 */
189int reg_reload_regdb(void);
190
191extern const u8 shipped_regdb_certs[];
192extern unsigned int shipped_regdb_certs_len;
193extern const u8 extra_regdb_certs[];
194extern unsigned int extra_regdb_certs_len;
195
182#endif /* __NET_WIRELESS_REG_H */ 196#endif /* __NET_WIRELESS_REG_H */
diff --git a/net/wireless/regdb.h b/net/wireless/regdb.h
deleted file mode 100644
index 3279cfcefb0c..000000000000
--- a/net/wireless/regdb.h
+++ /dev/null
@@ -1,23 +0,0 @@
1#ifndef __REGDB_H__
2#define __REGDB_H__
3
4/*
5 * Copyright 2009 John W. Linville <linville@tuxdriver.com>
6 *
7 * Permission to use, copy, modify, and/or distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20extern const struct ieee80211_regdomain *reg_regdb[];
21extern int reg_regdb_size;
22
23#endif /* __REGDB_H__ */
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 9f0901f3e42b..f6c5fe482506 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * cfg80211 scan result handling 3 * cfg80211 scan result handling
3 * 4 *
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 0a49b88070d0..fdb3646274a5 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * SME code for cfg80211 3 * SME code for cfg80211
3 * both driver SME event handling and the SME implementation 4 * both driver SME event handling and the SME implementation
@@ -522,11 +523,6 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
522 return -EOPNOTSUPP; 523 return -EOPNOTSUPP;
523 524
524 if (wdev->current_bss) { 525 if (wdev->current_bss) {
525 if (!prev_bssid)
526 return -EALREADY;
527 if (prev_bssid &&
528 !ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid))
529 return -ENOTCONN;
530 cfg80211_unhold_bss(wdev->current_bss); 526 cfg80211_unhold_bss(wdev->current_bss);
531 cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); 527 cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
532 wdev->current_bss = NULL; 528 wdev->current_bss = NULL;
@@ -960,7 +956,6 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
960 ev->rm.resp_ie_len = info->resp_ie_len; 956 ev->rm.resp_ie_len = info->resp_ie_len;
961 memcpy((void *)ev->rm.resp_ie, info->resp_ie, info->resp_ie_len); 957 memcpy((void *)ev->rm.resp_ie, info->resp_ie, info->resp_ie_len);
962 ev->rm.bss = info->bss; 958 ev->rm.bss = info->bss;
963 ev->rm.authorized = info->authorized;
964 959
965 spin_lock_irqsave(&wdev->event_lock, flags); 960 spin_lock_irqsave(&wdev->event_lock, flags);
966 list_add_tail(&ev->list, &wdev->event_list); 961 list_add_tail(&ev->list, &wdev->event_list);
@@ -969,6 +964,50 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
969} 964}
970EXPORT_SYMBOL(cfg80211_roamed); 965EXPORT_SYMBOL(cfg80211_roamed);
971 966
967void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid)
968{
969 ASSERT_WDEV_LOCK(wdev);
970
971 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
972 return;
973
974 if (WARN_ON(!wdev->current_bss) ||
975 WARN_ON(!ether_addr_equal(wdev->current_bss->pub.bssid, bssid)))
976 return;
977
978 nl80211_send_port_authorized(wiphy_to_rdev(wdev->wiphy), wdev->netdev,
979 bssid);
980}
981
982void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid,
983 gfp_t gfp)
984{
985 struct wireless_dev *wdev = dev->ieee80211_ptr;
986 struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
987 struct cfg80211_event *ev;
988 unsigned long flags;
989
990 if (WARN_ON(!bssid))
991 return;
992
993 ev = kzalloc(sizeof(*ev), gfp);
994 if (!ev)
995 return;
996
997 ev->type = EVENT_PORT_AUTHORIZED;
998 memcpy(ev->pa.bssid, bssid, ETH_ALEN);
999
1000 /*
1001 * Use the wdev event list so that if there are pending
1002 * connected/roamed events, they will be reported first.
1003 */
1004 spin_lock_irqsave(&wdev->event_lock, flags);
1005 list_add_tail(&ev->list, &wdev->event_list);
1006 spin_unlock_irqrestore(&wdev->event_lock, flags);
1007 queue_work(cfg80211_wq, &rdev->event_work);
1008}
1009EXPORT_SYMBOL(cfg80211_port_authorized);
1010
972void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, 1011void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
973 size_t ie_len, u16 reason, bool from_ap) 1012 size_t ie_len, u16 reason, bool from_ap)
974{ 1013{
@@ -1063,11 +1102,35 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
1063 1102
1064 ASSERT_WDEV_LOCK(wdev); 1103 ASSERT_WDEV_LOCK(wdev);
1065 1104
1066 if (WARN_ON(wdev->connect_keys)) { 1105 /*
1067 kzfree(wdev->connect_keys); 1106 * If we have an ssid_len, we're trying to connect or are
1068 wdev->connect_keys = NULL; 1107 * already connected, so reject a new SSID unless it's the
1108 * same (which is the case for re-association.)
1109 */
1110 if (wdev->ssid_len &&
1111 (wdev->ssid_len != connect->ssid_len ||
1112 memcmp(wdev->ssid, connect->ssid, wdev->ssid_len)))
1113 return -EALREADY;
1114
1115 /*
1116 * If connected, reject (re-)association unless prev_bssid
1117 * matches the current BSSID.
1118 */
1119 if (wdev->current_bss) {
1120 if (!prev_bssid)
1121 return -EALREADY;
1122 if (!ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid))
1123 return -ENOTCONN;
1069 } 1124 }
1070 1125
1126 /*
1127 * Reject if we're in the process of connecting with WEP,
1128 * this case isn't very interesting and trying to handle
1129 * it would make the code much more complex.
1130 */
1131 if (wdev->connect_keys)
1132 return -EINPROGRESS;
1133
1071 cfg80211_oper_and_ht_capa(&connect->ht_capa_mask, 1134 cfg80211_oper_and_ht_capa(&connect->ht_capa_mask,
1072 rdev->wiphy.ht_capa_mod_mask); 1135 rdev->wiphy.ht_capa_mod_mask);
1073 1136
@@ -1118,7 +1181,12 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
1118 1181
1119 if (err) { 1182 if (err) {
1120 wdev->connect_keys = NULL; 1183 wdev->connect_keys = NULL;
1121 wdev->ssid_len = 0; 1184 /*
1185 * This could be reassoc getting refused, don't clear
1186 * ssid_len in that case.
1187 */
1188 if (!wdev->current_bss)
1189 wdev->ssid_len = 0;
1122 return err; 1190 return err;
1123 } 1191 }
1124 1192
@@ -1145,6 +1213,14 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
1145 else if (wdev->ssid_len) 1213 else if (wdev->ssid_len)
1146 err = rdev_disconnect(rdev, dev, reason); 1214 err = rdev_disconnect(rdev, dev, reason);
1147 1215
1216 /*
1217 * Clear ssid_len unless we actually were fully connected,
1218 * in which case cfg80211_disconnected() will take care of
1219 * this later.
1220 */
1221 if (!wdev->current_bss)
1222 wdev->ssid_len = 0;
1223
1148 return err; 1224 return err;
1149} 1225}
1150 1226
diff --git a/net/wireless/sysfs.h b/net/wireless/sysfs.h
index b533ed71daff..7b454c2de9b7 100644
--- a/net/wireless/sysfs.h
+++ b/net/wireless/sysfs.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef __WIRELESS_SYSFS_H 2#ifndef __WIRELESS_SYSFS_H
2#define __WIRELESS_SYSFS_H 3#define __WIRELESS_SYSFS_H
3 4
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 0f8db41eaddb..f3353fe5b35b 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#undef TRACE_SYSTEM 2#undef TRACE_SYSTEM
2#define TRACE_SYSTEM cfg80211 3#define TRACE_SYSTEM cfg80211
3 4
diff --git a/net/wireless/util.c b/net/wireless/util.c
index bcb1284c3415..c69160694b6c 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Wireless utility functions 3 * Wireless utility functions
3 * 4 *
@@ -157,32 +158,30 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband)
157 case NL80211_BAND_2GHZ: 158 case NL80211_BAND_2GHZ:
158 want = 7; 159 want = 7;
159 for (i = 0; i < sband->n_bitrates; i++) { 160 for (i = 0; i < sband->n_bitrates; i++) {
160 if (sband->bitrates[i].bitrate == 10) { 161 switch (sband->bitrates[i].bitrate) {
162 case 10:
163 case 20:
164 case 55:
165 case 110:
161 sband->bitrates[i].flags |= 166 sband->bitrates[i].flags |=
162 IEEE80211_RATE_MANDATORY_B | 167 IEEE80211_RATE_MANDATORY_B |
163 IEEE80211_RATE_MANDATORY_G; 168 IEEE80211_RATE_MANDATORY_G;
164 want--; 169 want--;
165 } 170 break;
166 171 case 60:
167 if (sband->bitrates[i].bitrate == 20 || 172 case 120:
168 sband->bitrates[i].bitrate == 55 || 173 case 240:
169 sband->bitrates[i].bitrate == 110 ||
170 sband->bitrates[i].bitrate == 60 ||
171 sband->bitrates[i].bitrate == 120 ||
172 sband->bitrates[i].bitrate == 240) {
173 sband->bitrates[i].flags |= 174 sband->bitrates[i].flags |=
174 IEEE80211_RATE_MANDATORY_G; 175 IEEE80211_RATE_MANDATORY_G;
175 want--; 176 want--;
176 } 177 /* fall through */
177 178 default:
178 if (sband->bitrates[i].bitrate != 10 &&
179 sband->bitrates[i].bitrate != 20 &&
180 sband->bitrates[i].bitrate != 55 &&
181 sband->bitrates[i].bitrate != 110)
182 sband->bitrates[i].flags |= 179 sband->bitrates[i].flags |=
183 IEEE80211_RATE_ERP_G; 180 IEEE80211_RATE_ERP_G;
181 break;
182 }
184 } 183 }
185 WARN_ON(want != 0 && want != 3 && want != 6); 184 WARN_ON(want != 0 && want != 3);
186 break; 185 break;
187 case NL80211_BAND_60GHZ: 186 case NL80211_BAND_60GHZ:
188 /* check for mandatory HT MCS 1..4 */ 187 /* check for mandatory HT MCS 1..4 */
@@ -529,121 +528,6 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
529} 528}
530EXPORT_SYMBOL(ieee80211_data_to_8023_exthdr); 529EXPORT_SYMBOL(ieee80211_data_to_8023_exthdr);
531 530
532int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
533 enum nl80211_iftype iftype,
534 const u8 *bssid, bool qos)
535{
536 struct ieee80211_hdr hdr;
537 u16 hdrlen, ethertype;
538 __le16 fc;
539 const u8 *encaps_data;
540 int encaps_len, skip_header_bytes;
541 int nh_pos, h_pos;
542 int head_need;
543
544 if (unlikely(skb->len < ETH_HLEN))
545 return -EINVAL;
546
547 nh_pos = skb_network_header(skb) - skb->data;
548 h_pos = skb_transport_header(skb) - skb->data;
549
550 /* convert Ethernet header to proper 802.11 header (based on
551 * operation mode) */
552 ethertype = (skb->data[12] << 8) | skb->data[13];
553 fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA);
554
555 switch (iftype) {
556 case NL80211_IFTYPE_AP:
557 case NL80211_IFTYPE_AP_VLAN:
558 case NL80211_IFTYPE_P2P_GO:
559 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
560 /* DA BSSID SA */
561 memcpy(hdr.addr1, skb->data, ETH_ALEN);
562 memcpy(hdr.addr2, addr, ETH_ALEN);
563 memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN);
564 hdrlen = 24;
565 break;
566 case NL80211_IFTYPE_STATION:
567 case NL80211_IFTYPE_P2P_CLIENT:
568 fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
569 /* BSSID SA DA */
570 memcpy(hdr.addr1, bssid, ETH_ALEN);
571 memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
572 memcpy(hdr.addr3, skb->data, ETH_ALEN);
573 hdrlen = 24;
574 break;
575 case NL80211_IFTYPE_OCB:
576 case NL80211_IFTYPE_ADHOC:
577 /* DA SA BSSID */
578 memcpy(hdr.addr1, skb->data, ETH_ALEN);
579 memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
580 memcpy(hdr.addr3, bssid, ETH_ALEN);
581 hdrlen = 24;
582 break;
583 default:
584 return -EOPNOTSUPP;
585 }
586
587 if (qos) {
588 fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
589 hdrlen += 2;
590 }
591
592 hdr.frame_control = fc;
593 hdr.duration_id = 0;
594 hdr.seq_ctrl = 0;
595
596 skip_header_bytes = ETH_HLEN;
597 if (ethertype == ETH_P_AARP || ethertype == ETH_P_IPX) {
598 encaps_data = bridge_tunnel_header;
599 encaps_len = sizeof(bridge_tunnel_header);
600 skip_header_bytes -= 2;
601 } else if (ethertype >= ETH_P_802_3_MIN) {
602 encaps_data = rfc1042_header;
603 encaps_len = sizeof(rfc1042_header);
604 skip_header_bytes -= 2;
605 } else {
606 encaps_data = NULL;
607 encaps_len = 0;
608 }
609
610 skb_pull(skb, skip_header_bytes);
611 nh_pos -= skip_header_bytes;
612 h_pos -= skip_header_bytes;
613
614 head_need = hdrlen + encaps_len - skb_headroom(skb);
615
616 if (head_need > 0 || skb_cloned(skb)) {
617 head_need = max(head_need, 0);
618 if (head_need)
619 skb_orphan(skb);
620
621 if (pskb_expand_head(skb, head_need, 0, GFP_ATOMIC))
622 return -ENOMEM;
623 }
624
625 if (encaps_data) {
626 memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len);
627 nh_pos += encaps_len;
628 h_pos += encaps_len;
629 }
630
631 memcpy(skb_push(skb, hdrlen), &hdr, hdrlen);
632
633 nh_pos += hdrlen;
634 h_pos += hdrlen;
635
636 /* Update skb pointers to various headers since this modified frame
637 * is going to go through Linux networking code that may potentially
638 * need things like pointer to IP header. */
639 skb_reset_mac_header(skb);
640 skb_set_network_header(skb, nh_pos);
641 skb_set_transport_header(skb, h_pos);
642
643 return 0;
644}
645EXPORT_SYMBOL(ieee80211_data_from_8023);
646
647static void 531static void
648__frame_add_frag(struct sk_buff *skb, struct page *page, 532__frame_add_frag(struct sk_buff *skb, struct page *page,
649 void *ptr, int len, int size) 533 void *ptr, int len, int size)
@@ -963,6 +847,9 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev)
963 case EVENT_STOPPED: 847 case EVENT_STOPPED:
964 __cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev); 848 __cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev);
965 break; 849 break;
850 case EVENT_PORT_AUTHORIZED:
851 __cfg80211_port_authorized(wdev, ev->pa.bssid);
852 break;
966 } 853 }
967 wdev_unlock(wdev); 854 wdev_unlock(wdev);
968 855
@@ -1367,13 +1254,29 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len,
1367} 1254}
1368EXPORT_SYMBOL(cfg80211_get_p2p_attr); 1255EXPORT_SYMBOL(cfg80211_get_p2p_attr);
1369 1256
1370static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id) 1257static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id, bool id_ext)
1371{ 1258{
1372 int i; 1259 int i;
1373 1260
1374 for (i = 0; i < n_ids; i++) 1261 /* Make sure array values are legal */
1375 if (ids[i] == id) 1262 if (WARN_ON(ids[n_ids - 1] == WLAN_EID_EXTENSION))
1263 return false;
1264
1265 i = 0;
1266 while (i < n_ids) {
1267 if (ids[i] == WLAN_EID_EXTENSION) {
1268 if (id_ext && (ids[i + 1] == id))
1269 return true;
1270
1271 i += 2;
1272 continue;
1273 }
1274
1275 if (ids[i] == id && !id_ext)
1376 return true; 1276 return true;
1277
1278 i++;
1279 }
1377 return false; 1280 return false;
1378} 1281}
1379 1282
@@ -1403,14 +1306,36 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
1403{ 1306{
1404 size_t pos = offset; 1307 size_t pos = offset;
1405 1308
1406 while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) { 1309 while (pos < ielen) {
1310 u8 ext = 0;
1311
1312 if (ies[pos] == WLAN_EID_EXTENSION)
1313 ext = 2;
1314 if ((pos + ext) >= ielen)
1315 break;
1316
1317 if (!ieee80211_id_in_list(ids, n_ids, ies[pos + ext],
1318 ies[pos] == WLAN_EID_EXTENSION))
1319 break;
1320
1407 if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) { 1321 if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) {
1408 pos = skip_ie(ies, ielen, pos); 1322 pos = skip_ie(ies, ielen, pos);
1409 1323
1410 while (pos < ielen && 1324 while (pos < ielen) {
1411 !ieee80211_id_in_list(after_ric, n_after_ric, 1325 if (ies[pos] == WLAN_EID_EXTENSION)
1412 ies[pos])) 1326 ext = 2;
1413 pos = skip_ie(ies, ielen, pos); 1327 else
1328 ext = 0;
1329
1330 if ((pos + ext) >= ielen)
1331 break;
1332
1333 if (!ieee80211_id_in_list(after_ric,
1334 n_after_ric,
1335 ies[pos + ext],
1336 ext == 2))
1337 pos = skip_ie(ies, ielen, pos);
1338 }
1414 } else { 1339 } else {
1415 pos = skip_ie(ies, ielen, pos); 1340 pos = skip_ie(ies, ielen, pos);
1416 } 1341 }
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 5d4a02c7979b..7ca04a7de85a 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * cfg80211 - wext compat code 3 * cfg80211 - wext compat code
3 * 4 *
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index c434f193f39a..c67d7a82ab13 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * cfg80211 wext compat for managed mode. 3 * cfg80211 wext compat for managed mode.
3 * 4 *
diff --git a/net/x25/Makefile b/net/x25/Makefile
index a2c34ab6f194..5dd544a231f2 100644
--- a/net/x25/Makefile
+++ b/net/x25/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the Linux X.25 Packet layer. 3# Makefile for the Linux X.25 Packet layer.
3# 4#
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index ac095936552d..562cc11131f6 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -374,9 +374,11 @@ static void __x25_destroy_socket(struct sock *);
374/* 374/*
375 * handler for deferred kills. 375 * handler for deferred kills.
376 */ 376 */
377static void x25_destroy_timer(unsigned long data) 377static void x25_destroy_timer(struct timer_list *t)
378{ 378{
379 x25_destroy_socket_from_timer((struct sock *)data); 379 struct sock *sk = from_timer(sk, t, sk_timer);
380
381 x25_destroy_socket_from_timer(sk);
380} 382}
381 383
382/* 384/*
@@ -414,7 +416,6 @@ static void __x25_destroy_socket(struct sock *sk)
414 /* Defer: outstanding buffers */ 416 /* Defer: outstanding buffers */
415 sk->sk_timer.expires = jiffies + 10 * HZ; 417 sk->sk_timer.expires = jiffies + 10 * HZ;
416 sk->sk_timer.function = x25_destroy_timer; 418 sk->sk_timer.function = x25_destroy_timer;
417 sk->sk_timer.data = (unsigned long)sk;
418 add_timer(&sk->sk_timer); 419 add_timer(&sk->sk_timer);
419 } else { 420 } else {
420 /* drop last reference so sock_put will free */ 421 /* drop last reference so sock_put will free */
diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c
index ba078c85f0a1..e9802afa43d0 100644
--- a/net/x25/sysctl_net_x25.c
+++ b/net/x25/sysctl_net_x25.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* -*- linux-c -*- 2/* -*- linux-c -*-
2 * sysctl_net_x25.c: sysctl interface to net X.25 subsystem. 3 * sysctl_net_x25.c: sysctl interface to net X.25 subsystem.
3 * 4 *
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 997ff7b2509b..ad1734d36ed7 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -103,7 +103,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
103 *vc_fac_mask |= X25_MASK_REVERSE; 103 *vc_fac_mask |= X25_MASK_REVERSE;
104 break; 104 break;
105 } 105 }
106 106 /*fall through */
107 case X25_FAC_THROUGHPUT: 107 case X25_FAC_THROUGHPUT:
108 facilities->throughput = p[1]; 108 facilities->throughput = p[1];
109 *vc_fac_mask |= X25_MASK_THROUGHPUT; 109 *vc_fac_mask |= X25_MASK_THROUGHPUT;
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 7ac50098a375..3c12cae32001 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -345,6 +345,7 @@ static int x25_state4_machine(struct sock *sk, struct sk_buff *skb, int frametyp
345 345
346 case X25_RESET_REQUEST: 346 case X25_RESET_REQUEST:
347 x25_write_internal(sk, X25_RESET_CONFIRMATION); 347 x25_write_internal(sk, X25_RESET_CONFIRMATION);
348 /* fall through */
348 case X25_RESET_CONFIRMATION: { 349 case X25_RESET_CONFIRMATION: {
349 x25_stop_timer(sk); 350 x25_stop_timer(sk);
350 x25->condition = 0x00; 351 x25->condition = 0x00;
diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index e0cd04d28352..a6a8ab09b914 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -36,7 +36,7 @@
36LIST_HEAD(x25_neigh_list); 36LIST_HEAD(x25_neigh_list);
37DEFINE_RWLOCK(x25_neigh_list_lock); 37DEFINE_RWLOCK(x25_neigh_list_lock);
38 38
39static void x25_t20timer_expiry(unsigned long); 39static void x25_t20timer_expiry(struct timer_list *);
40 40
41static void x25_transmit_restart_confirmation(struct x25_neigh *nb); 41static void x25_transmit_restart_confirmation(struct x25_neigh *nb);
42static void x25_transmit_restart_request(struct x25_neigh *nb); 42static void x25_transmit_restart_request(struct x25_neigh *nb);
@@ -49,9 +49,9 @@ static inline void x25_start_t20timer(struct x25_neigh *nb)
49 mod_timer(&nb->t20timer, jiffies + nb->t20); 49 mod_timer(&nb->t20timer, jiffies + nb->t20);
50} 50}
51 51
52static void x25_t20timer_expiry(unsigned long param) 52static void x25_t20timer_expiry(struct timer_list *t)
53{ 53{
54 struct x25_neigh *nb = (struct x25_neigh *)param; 54 struct x25_neigh *nb = from_timer(nb, t, t20timer);
55 55
56 x25_transmit_restart_request(nb); 56 x25_transmit_restart_request(nb);
57 57
@@ -252,7 +252,7 @@ void x25_link_device_up(struct net_device *dev)
252 return; 252 return;
253 253
254 skb_queue_head_init(&nb->queue); 254 skb_queue_head_init(&nb->queue);
255 setup_timer(&nb->t20timer, x25_t20timer_expiry, (unsigned long)nb); 255 timer_setup(&nb->t20timer, x25_t20timer_expiry, 0);
256 256
257 dev_hold(dev); 257 dev_hold(dev);
258 nb->dev = dev; 258 nb->dev = dev;
diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c
index 5c5db1a36399..fa3461002b3e 100644
--- a/net/x25/x25_timer.c
+++ b/net/x25/x25_timer.c
@@ -26,18 +26,17 @@
26#include <net/tcp_states.h> 26#include <net/tcp_states.h>
27#include <net/x25.h> 27#include <net/x25.h>
28 28
29static void x25_heartbeat_expiry(unsigned long); 29static void x25_heartbeat_expiry(struct timer_list *t);
30static void x25_timer_expiry(unsigned long); 30static void x25_timer_expiry(struct timer_list *t);
31 31
32void x25_init_timers(struct sock *sk) 32void x25_init_timers(struct sock *sk)
33{ 33{
34 struct x25_sock *x25 = x25_sk(sk); 34 struct x25_sock *x25 = x25_sk(sk);
35 35
36 setup_timer(&x25->timer, x25_timer_expiry, (unsigned long)sk); 36 timer_setup(&x25->timer, x25_timer_expiry, 0);
37 37
38 /* initialized by sock_init_data */ 38 /* initialized by sock_init_data */
39 sk->sk_timer.data = (unsigned long)sk; 39 sk->sk_timer.function = x25_heartbeat_expiry;
40 sk->sk_timer.function = &x25_heartbeat_expiry;
41} 40}
42 41
43void x25_start_heartbeat(struct sock *sk) 42void x25_start_heartbeat(struct sock *sk)
@@ -93,9 +92,9 @@ unsigned long x25_display_timer(struct sock *sk)
93 return x25->timer.expires - jiffies; 92 return x25->timer.expires - jiffies;
94} 93}
95 94
96static void x25_heartbeat_expiry(unsigned long param) 95static void x25_heartbeat_expiry(struct timer_list *t)
97{ 96{
98 struct sock *sk = (struct sock *)param; 97 struct sock *sk = from_timer(sk, t, sk_timer);
99 98
100 bh_lock_sock(sk); 99 bh_lock_sock(sk);
101 if (sock_owned_by_user(sk)) /* can currently only occur in state 3 */ 100 if (sock_owned_by_user(sk)) /* can currently only occur in state 3 */
@@ -160,9 +159,10 @@ static inline void x25_do_timer_expiry(struct sock * sk)
160 } 159 }
161} 160}
162 161
163static void x25_timer_expiry(unsigned long param) 162static void x25_timer_expiry(struct timer_list *t)
164{ 163{
165 struct sock *sk = (struct sock *)param; 164 struct x25_sock *x25 = from_timer(x25, t, timer);
165 struct sock *sk = &x25->sk;
166 166
167 bh_lock_sock(sk); 167 bh_lock_sock(sk);
168 if (sock_owned_by_user(sk)) { /* can currently only occur in state 3 */ 168 if (sock_owned_by_user(sk)) { /* can currently only occur in state 3 */
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 55b2ac300995..0bd2465a8c5a 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the XFRM subsystem. 3# Makefile for the XFRM subsystem.
3# 4#
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index acf00104ef31..30e5746085b8 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -91,6 +91,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
91 } 91 }
92 92
93 if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) { 93 if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) {
94 xso->dev = NULL;
94 dev_put(dev); 95 dev_put(dev);
95 return 0; 96 return 0;
96 } 97 }
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
index 1e98bc0fe0a5..2ad33ce1ea17 100644
--- a/net/xfrm/xfrm_hash.c
+++ b/net/xfrm/xfrm_hash.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* xfrm_hash.c: Common hash table code. 2/* xfrm_hash.c: Common hash table code.
2 * 3 *
3 * Copyright (C) 2006 David S. Miller (davem@davemloft.net) 4 * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index eaea9c4fb3b0..61be810389d8 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef _XFRM_HASH_H 2#ifndef _XFRM_HASH_H
2#define _XFRM_HASH_H 3#define _XFRM_HASH_H
3 4
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 2515cd2bc5db..347ab31574d5 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * xfrm_input.c 3 * xfrm_input.c
3 * 4 *
@@ -265,8 +266,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
265 goto lock; 266 goto lock;
266 } 267 }
267 268
268 daddr = (xfrm_address_t *)(skb_network_header(skb) +
269 XFRM_SPI_SKB_CB(skb)->daddroff);
270 family = XFRM_SPI_SKB_CB(skb)->family; 269 family = XFRM_SPI_SKB_CB(skb)->family;
271 270
272 /* if tunnel is present override skb->mark value with tunnel i_key */ 271 /* if tunnel is present override skb->mark value with tunnel i_key */
@@ -293,6 +292,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
293 goto drop; 292 goto drop;
294 } 293 }
295 294
295 daddr = (xfrm_address_t *)(skb_network_header(skb) +
296 XFRM_SPI_SKB_CB(skb)->daddroff);
296 do { 297 do {
297 if (skb->sp->len == XFRM_MAX_DEPTH) { 298 if (skb->sp->len == XFRM_MAX_DEPTH) {
298 XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); 299 XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
@@ -429,7 +430,8 @@ resume:
429 nf_reset(skb); 430 nf_reset(skb);
430 431
431 if (decaps) { 432 if (decaps) {
432 skb->sp->olen = 0; 433 if (skb->sp)
434 skb->sp->olen = 0;
433 skb_dst_drop(skb); 435 skb_dst_drop(skb);
434 gro_cells_receive(&gro_cells, skb); 436 gro_cells_receive(&gro_cells, skb);
435 return 0; 437 return 0;
@@ -440,7 +442,8 @@ resume:
440 442
441 err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async); 443 err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async);
442 if (xfrm_gro) { 444 if (xfrm_gro) {
443 skb->sp->olen = 0; 445 if (skb->sp)
446 skb->sp->olen = 0;
444 skb_dst_drop(skb); 447 skb_dst_drop(skb);
445 gro_cells_receive(&gro_cells, skb); 448 gro_cells_receive(&gro_cells, skb);
446 return err; 449 return err;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 31a2e6d34dba..73ad8c8ef344 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -105,6 +105,9 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
105 if (xfrm_offload(skb)) { 105 if (xfrm_offload(skb)) {
106 x->type_offload->encap(x, skb); 106 x->type_offload->encap(x, skb);
107 } else { 107 } else {
108 /* Inner headers are invalid now. */
109 skb->encapsulation = 0;
110
108 err = x->type->output(x, skb); 111 err = x->type->output(x, skb);
109 if (err == -EINPROGRESS) 112 if (err == -EINPROGRESS)
110 goto out; 113 goto out;
@@ -208,7 +211,6 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
208 int err; 211 int err;
209 212
210 secpath_reset(skb); 213 secpath_reset(skb);
211 skb->encapsulation = 0;
212 214
213 if (xfrm_dev_offload_ok(skb, x)) { 215 if (xfrm_dev_offload_ok(skb, x)) {
214 struct sec_path *sp; 216 struct sec_path *sp;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f06253969972..9542975eb2f9 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -57,7 +57,7 @@ static __read_mostly seqcount_t xfrm_policy_hash_generation;
57static void xfrm_init_pmtu(struct dst_entry *dst); 57static void xfrm_init_pmtu(struct dst_entry *dst);
58static int stale_bundle(struct dst_entry *dst); 58static int stale_bundle(struct dst_entry *dst);
59static int xfrm_bundle_ok(struct xfrm_dst *xdst); 59static int xfrm_bundle_ok(struct xfrm_dst *xdst);
60static void xfrm_policy_queue_process(unsigned long arg); 60static void xfrm_policy_queue_process(struct timer_list *t);
61 61
62static void __xfrm_policy_link(struct xfrm_policy *pol, int dir); 62static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
63static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 63static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
@@ -179,9 +179,9 @@ static inline unsigned long make_jiffies(long secs)
179 return secs*HZ; 179 return secs*HZ;
180} 180}
181 181
182static void xfrm_policy_timer(unsigned long data) 182static void xfrm_policy_timer(struct timer_list *t)
183{ 183{
184 struct xfrm_policy *xp = (struct xfrm_policy *)data; 184 struct xfrm_policy *xp = from_timer(xp, t, timer);
185 unsigned long now = get_seconds(); 185 unsigned long now = get_seconds();
186 long next = LONG_MAX; 186 long next = LONG_MAX;
187 int warn = 0; 187 int warn = 0;
@@ -267,10 +267,9 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
267 rwlock_init(&policy->lock); 267 rwlock_init(&policy->lock);
268 refcount_set(&policy->refcnt, 1); 268 refcount_set(&policy->refcnt, 1);
269 skb_queue_head_init(&policy->polq.hold_queue); 269 skb_queue_head_init(&policy->polq.hold_queue);
270 setup_timer(&policy->timer, xfrm_policy_timer, 270 timer_setup(&policy->timer, xfrm_policy_timer, 0);
271 (unsigned long)policy); 271 timer_setup(&policy->polq.hold_timer,
272 setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process, 272 xfrm_policy_queue_process, 0);
273 (unsigned long)policy);
274 } 273 }
275 return policy; 274 return policy;
276} 275}
@@ -1306,6 +1305,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
1306 newp->xfrm_nr = old->xfrm_nr; 1305 newp->xfrm_nr = old->xfrm_nr;
1307 newp->index = old->index; 1306 newp->index = old->index;
1308 newp->type = old->type; 1307 newp->type = old->type;
1308 newp->family = old->family;
1309 memcpy(newp->xfrm_vec, old->xfrm_vec, 1309 memcpy(newp->xfrm_vec, old->xfrm_vec,
1310 newp->xfrm_nr*sizeof(struct xfrm_tmpl)); 1310 newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1311 spin_lock_bh(&net->xfrm.xfrm_policy_lock); 1311 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
@@ -1573,6 +1573,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1573 goto put_states; 1573 goto put_states;
1574 } 1574 }
1575 1575
1576 if (!dst_prev)
1577 dst0 = dst1;
1578 else
1579 /* Ref count is taken during xfrm_alloc_dst()
1580 * No need to do dst_clone() on dst1
1581 */
1582 dst_prev->child = dst1;
1583
1576 if (xfrm[i]->sel.family == AF_UNSPEC) { 1584 if (xfrm[i]->sel.family == AF_UNSPEC) {
1577 inner_mode = xfrm_ip2inner_mode(xfrm[i], 1585 inner_mode = xfrm_ip2inner_mode(xfrm[i],
1578 xfrm_af2proto(family)); 1586 xfrm_af2proto(family));
@@ -1584,14 +1592,6 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1584 } else 1592 } else
1585 inner_mode = xfrm[i]->inner_mode; 1593 inner_mode = xfrm[i]->inner_mode;
1586 1594
1587 if (!dst_prev)
1588 dst0 = dst1;
1589 else
1590 /* Ref count is taken during xfrm_alloc_dst()
1591 * No need to do dst_clone() on dst1
1592 */
1593 dst_prev->child = dst1;
1594
1595 xdst->route = dst; 1595 xdst->route = dst;
1596 dst_copy_metrics(dst1, dst); 1596 dst_copy_metrics(dst1, dst);
1597 1597
@@ -1787,19 +1787,23 @@ void xfrm_policy_cache_flush(void)
1787 put_online_cpus(); 1787 put_online_cpus();
1788} 1788}
1789 1789
1790static bool xfrm_pol_dead(struct xfrm_dst *xdst) 1790static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst,
1791 struct xfrm_state * const xfrm[],
1792 int num)
1791{ 1793{
1792 unsigned int num_pols = xdst->num_pols; 1794 const struct dst_entry *dst = &xdst->u.dst;
1793 unsigned int pol_dead = 0, i; 1795 int i;
1794 1796
1795 for (i = 0; i < num_pols; i++) 1797 if (xdst->num_xfrms != num)
1796 pol_dead |= xdst->pols[i]->walk.dead; 1798 return false;
1797 1799
1798 /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ 1800 for (i = 0; i < num; i++) {
1799 if (pol_dead) 1801 if (!dst || dst->xfrm != xfrm[i])
1800 xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; 1802 return false;
1803 dst = dst->child;
1804 }
1801 1805
1802 return pol_dead; 1806 return xfrm_bundle_ok(xdst);
1803} 1807}
1804 1808
1805static struct xfrm_dst * 1809static struct xfrm_dst *
@@ -1813,26 +1817,28 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1813 struct dst_entry *dst; 1817 struct dst_entry *dst;
1814 int err; 1818 int err;
1815 1819
1820 /* Try to instantiate a bundle */
1821 err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
1822 if (err <= 0) {
1823 if (err != 0 && err != -EAGAIN)
1824 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1825 return ERR_PTR(err);
1826 }
1827
1816 xdst = this_cpu_read(xfrm_last_dst); 1828 xdst = this_cpu_read(xfrm_last_dst);
1817 if (xdst && 1829 if (xdst &&
1818 xdst->u.dst.dev == dst_orig->dev && 1830 xdst->u.dst.dev == dst_orig->dev &&
1819 xdst->num_pols == num_pols && 1831 xdst->num_pols == num_pols &&
1820 !xfrm_pol_dead(xdst) &&
1821 memcmp(xdst->pols, pols, 1832 memcmp(xdst->pols, pols,
1822 sizeof(struct xfrm_policy *) * num_pols) == 0 && 1833 sizeof(struct xfrm_policy *) * num_pols) == 0 &&
1823 xfrm_bundle_ok(xdst)) { 1834 xfrm_xdst_can_reuse(xdst, xfrm, err)) {
1824 dst_hold(&xdst->u.dst); 1835 dst_hold(&xdst->u.dst);
1836 while (err > 0)
1837 xfrm_state_put(xfrm[--err]);
1825 return xdst; 1838 return xdst;
1826 } 1839 }
1827 1840
1828 old = xdst; 1841 old = xdst;
1829 /* Try to instantiate a bundle */
1830 err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
1831 if (err <= 0) {
1832 if (err != 0 && err != -EAGAIN)
1833 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1834 return ERR_PTR(err);
1835 }
1836 1842
1837 dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); 1843 dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
1838 if (IS_ERR(dst)) { 1844 if (IS_ERR(dst)) {
@@ -1852,12 +1858,12 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1852 return xdst; 1858 return xdst;
1853} 1859}
1854 1860
1855static void xfrm_policy_queue_process(unsigned long arg) 1861static void xfrm_policy_queue_process(struct timer_list *t)
1856{ 1862{
1857 struct sk_buff *skb; 1863 struct sk_buff *skb;
1858 struct sock *sk; 1864 struct sock *sk;
1859 struct dst_entry *dst; 1865 struct dst_entry *dst;
1860 struct xfrm_policy *pol = (struct xfrm_policy *)arg; 1866 struct xfrm_policy *pol = from_timer(pol, t, polq.hold_timer);
1861 struct net *net = xp_net(pol); 1867 struct net *net = xp_net(pol);
1862 struct xfrm_policy_queue *pq = &pol->polq; 1868 struct xfrm_policy_queue *pq = &pol->polq;
1863 struct flowi fl; 1869 struct flowi fl;
@@ -2076,7 +2082,6 @@ make_dummy_bundle:
2076 xdst->num_xfrms = num_xfrms; 2082 xdst->num_xfrms = num_xfrms;
2077 memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); 2083 memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
2078 2084
2079 dst_hold(&xdst->u.dst);
2080 return xdst; 2085 return xdst;
2081 2086
2082inc_error: 2087inc_error:
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 0dab1cd79ce4..065d89606888 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -556,7 +556,7 @@ out:
556 return HRTIMER_NORESTART; 556 return HRTIMER_NORESTART;
557} 557}
558 558
559static void xfrm_replay_timer_handler(unsigned long data); 559static void xfrm_replay_timer_handler(struct timer_list *t);
560 560
561struct xfrm_state *xfrm_state_alloc(struct net *net) 561struct xfrm_state *xfrm_state_alloc(struct net *net)
562{ 562{
@@ -574,8 +574,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
574 INIT_HLIST_NODE(&x->byspi); 574 INIT_HLIST_NODE(&x->byspi);
575 tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, 575 tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler,
576 CLOCK_BOOTTIME, HRTIMER_MODE_ABS); 576 CLOCK_BOOTTIME, HRTIMER_MODE_ABS);
577 setup_timer(&x->rtimer, xfrm_replay_timer_handler, 577 timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0);
578 (unsigned long)x);
579 x->curlft.add_time = get_seconds(); 578 x->curlft.add_time = get_seconds();
580 x->lft.soft_byte_limit = XFRM_INF; 579 x->lft.soft_byte_limit = XFRM_INF;
581 x->lft.soft_packet_limit = XFRM_INF; 580 x->lft.soft_packet_limit = XFRM_INF;
@@ -732,12 +731,12 @@ restart:
732 } 731 }
733 } 732 }
734 } 733 }
734out:
735 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
735 if (cnt) { 736 if (cnt) {
736 err = 0; 737 err = 0;
737 xfrm_policy_cache_flush(); 738 xfrm_policy_cache_flush();
738 } 739 }
739out:
740 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
741 return err; 740 return err;
742} 741}
743EXPORT_SYMBOL(xfrm_state_flush); 742EXPORT_SYMBOL(xfrm_state_flush);
@@ -1879,9 +1878,9 @@ void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net)
1879} 1878}
1880EXPORT_SYMBOL(xfrm_state_walk_done); 1879EXPORT_SYMBOL(xfrm_state_walk_done);
1881 1880
1882static void xfrm_replay_timer_handler(unsigned long data) 1881static void xfrm_replay_timer_handler(struct timer_list *t)
1883{ 1882{
1884 struct xfrm_state *x = (struct xfrm_state *)data; 1883 struct xfrm_state *x = from_timer(x, t, rtimer);
1885 1884
1886 spin_lock(&x->lock); 1885 spin_lock(&x->lock);
1887 1886
@@ -2069,6 +2068,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
2069 if (err >= 0) { 2068 if (err >= 0) {
2070 xfrm_sk_policy_insert(sk, err, pol); 2069 xfrm_sk_policy_insert(sk, err, pol);
2071 xfrm_pol_put(pol); 2070 xfrm_pol_put(pol);
2071 __sk_dst_reset(sk);
2072 err = 0; 2072 err = 0;
2073 } 2073 }
2074 2074
diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c
index 35a7e794ad04..0c6c5ef65f9d 100644
--- a/net/xfrm/xfrm_sysctl.c
+++ b/net/xfrm/xfrm_sysctl.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/sysctl.h> 2#include <linux/sysctl.h>
2#include <linux/slab.h> 3#include <linux/slab.h>
3#include <net/net_namespace.h> 4#include <net/net_namespace.h>
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 2bfbd9121e3b..983b0233767b 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -42,7 +42,7 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type)
42 return 0; 42 return 0;
43 43
44 algp = nla_data(rt); 44 algp = nla_data(rt);
45 if (nla_len(rt) < xfrm_alg_len(algp)) 45 if (nla_len(rt) < (int)xfrm_alg_len(algp))
46 return -EINVAL; 46 return -EINVAL;
47 47
48 switch (type) { 48 switch (type) {
@@ -68,7 +68,7 @@ static int verify_auth_trunc(struct nlattr **attrs)
68 return 0; 68 return 0;
69 69
70 algp = nla_data(rt); 70 algp = nla_data(rt);
71 if (nla_len(rt) < xfrm_alg_auth_len(algp)) 71 if (nla_len(rt) < (int)xfrm_alg_auth_len(algp))
72 return -EINVAL; 72 return -EINVAL;
73 73
74 algp->alg_name[sizeof(algp->alg_name) - 1] = '\0'; 74 algp->alg_name[sizeof(algp->alg_name) - 1] = '\0';
@@ -84,7 +84,7 @@ static int verify_aead(struct nlattr **attrs)
84 return 0; 84 return 0;
85 85
86 algp = nla_data(rt); 86 algp = nla_data(rt);
87 if (nla_len(rt) < aead_len(algp)) 87 if (nla_len(rt) < (int)aead_len(algp))
88 return -EINVAL; 88 return -EINVAL;
89 89
90 algp->alg_name[sizeof(algp->alg_name) - 1] = '\0'; 90 algp->alg_name[sizeof(algp->alg_name) - 1] = '\0';
@@ -130,7 +130,7 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
130 if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) 130 if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8)
131 return -EINVAL; 131 return -EINVAL;
132 132
133 if (nla_len(rt) < xfrm_replay_state_esn_len(rs) && 133 if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) &&
134 nla_len(rt) != sizeof(*rs)) 134 nla_len(rt) != sizeof(*rs))
135 return -EINVAL; 135 return -EINVAL;
136 } 136 }
@@ -404,7 +404,7 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es
404 struct nlattr *rp) 404 struct nlattr *rp)
405{ 405{
406 struct xfrm_replay_state_esn *up; 406 struct xfrm_replay_state_esn *up;
407 int ulen; 407 unsigned int ulen;
408 408
409 if (!replay_esn || !rp) 409 if (!replay_esn || !rp)
410 return 0; 410 return 0;
@@ -414,7 +414,7 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es
414 414
415 /* Check the overall length and the internal bitmap length to avoid 415 /* Check the overall length and the internal bitmap length to avoid
416 * potential overflow. */ 416 * potential overflow. */
417 if (nla_len(rp) < ulen || 417 if (nla_len(rp) < (int)ulen ||
418 xfrm_replay_state_esn_len(replay_esn) != ulen || 418 xfrm_replay_state_esn_len(replay_esn) != ulen ||
419 replay_esn->bmp_len != up->bmp_len) 419 replay_esn->bmp_len != up->bmp_len)
420 return -EINVAL; 420 return -EINVAL;
@@ -430,14 +430,14 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn
430 struct nlattr *rta) 430 struct nlattr *rta)
431{ 431{
432 struct xfrm_replay_state_esn *p, *pp, *up; 432 struct xfrm_replay_state_esn *p, *pp, *up;
433 int klen, ulen; 433 unsigned int klen, ulen;
434 434
435 if (!rta) 435 if (!rta)
436 return 0; 436 return 0;
437 437
438 up = nla_data(rta); 438 up = nla_data(rta);
439 klen = xfrm_replay_state_esn_len(up); 439 klen = xfrm_replay_state_esn_len(up);
440 ulen = nla_len(rta) >= klen ? klen : sizeof(*up); 440 ulen = nla_len(rta) >= (int)klen ? klen : sizeof(*up);
441 441
442 p = kzalloc(klen, GFP_KERNEL); 442 p = kzalloc(klen, GFP_KERNEL);
443 if (!p) 443 if (!p)
@@ -458,9 +458,9 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn
458 return 0; 458 return 0;
459} 459}
460 460
461static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) 461static inline unsigned int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx)
462{ 462{
463 int len = 0; 463 unsigned int len = 0;
464 464
465 if (xfrm_ctx) { 465 if (xfrm_ctx) {
466 len += sizeof(struct xfrm_user_sec_ctx); 466 len += sizeof(struct xfrm_user_sec_ctx);
@@ -657,6 +657,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
657 657
658 if (err < 0) { 658 if (err < 0) {
659 x->km.state = XFRM_STATE_DEAD; 659 x->km.state = XFRM_STATE_DEAD;
660 xfrm_dev_state_delete(x);
660 __xfrm_state_put(x); 661 __xfrm_state_put(x);
661 goto out; 662 goto out;
662 } 663 }
@@ -1031,7 +1032,7 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb,
1031 return -1; 1032 return -1;
1032} 1033}
1033 1034
1034static inline size_t xfrm_spdinfo_msgsize(void) 1035static inline unsigned int xfrm_spdinfo_msgsize(void)
1035{ 1036{
1036 return NLMSG_ALIGN(4) 1037 return NLMSG_ALIGN(4)
1037 + nla_total_size(sizeof(struct xfrmu_spdinfo)) 1038 + nla_total_size(sizeof(struct xfrmu_spdinfo))
@@ -1146,18 +1147,19 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
1146 u32 *flags = nlmsg_data(nlh); 1147 u32 *flags = nlmsg_data(nlh);
1147 u32 sportid = NETLINK_CB(skb).portid; 1148 u32 sportid = NETLINK_CB(skb).portid;
1148 u32 seq = nlh->nlmsg_seq; 1149 u32 seq = nlh->nlmsg_seq;
1150 int err;
1149 1151
1150 r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC); 1152 r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC);
1151 if (r_skb == NULL) 1153 if (r_skb == NULL)
1152 return -ENOMEM; 1154 return -ENOMEM;
1153 1155
1154 if (build_spdinfo(r_skb, net, sportid, seq, *flags) < 0) 1156 err = build_spdinfo(r_skb, net, sportid, seq, *flags);
1155 BUG(); 1157 BUG_ON(err < 0);
1156 1158
1157 return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); 1159 return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
1158} 1160}
1159 1161
1160static inline size_t xfrm_sadinfo_msgsize(void) 1162static inline unsigned int xfrm_sadinfo_msgsize(void)
1161{ 1163{
1162 return NLMSG_ALIGN(4) 1164 return NLMSG_ALIGN(4)
1163 + nla_total_size(sizeof(struct xfrmu_sadhinfo)) 1165 + nla_total_size(sizeof(struct xfrmu_sadhinfo))
@@ -1204,13 +1206,14 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
1204 u32 *flags = nlmsg_data(nlh); 1206 u32 *flags = nlmsg_data(nlh);
1205 u32 sportid = NETLINK_CB(skb).portid; 1207 u32 sportid = NETLINK_CB(skb).portid;
1206 u32 seq = nlh->nlmsg_seq; 1208 u32 seq = nlh->nlmsg_seq;
1209 int err;
1207 1210
1208 r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC); 1211 r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC);
1209 if (r_skb == NULL) 1212 if (r_skb == NULL)
1210 return -ENOMEM; 1213 return -ENOMEM;
1211 1214
1212 if (build_sadinfo(r_skb, net, sportid, seq, *flags) < 0) 1215 err = build_sadinfo(r_skb, net, sportid, seq, *flags);
1213 BUG(); 1216 BUG_ON(err < 0);
1214 1217
1215 return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); 1218 return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
1216} 1219}
@@ -1633,7 +1636,7 @@ static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *s
1633 return copy_sec_ctx(xp->security, skb); 1636 return copy_sec_ctx(xp->security, skb);
1634 return 0; 1637 return 0;
1635} 1638}
1636static inline size_t userpolicy_type_attrsize(void) 1639static inline unsigned int userpolicy_type_attrsize(void)
1637{ 1640{
1638#ifdef CONFIG_XFRM_SUB_POLICY 1641#ifdef CONFIG_XFRM_SUB_POLICY
1639 return nla_total_size(sizeof(struct xfrm_userpolicy_type)); 1642 return nla_total_size(sizeof(struct xfrm_userpolicy_type));
@@ -1692,32 +1695,34 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
1692 1695
1693static int xfrm_dump_policy_done(struct netlink_callback *cb) 1696static int xfrm_dump_policy_done(struct netlink_callback *cb)
1694{ 1697{
1695 struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; 1698 struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
1696 struct net *net = sock_net(cb->skb->sk); 1699 struct net *net = sock_net(cb->skb->sk);
1697 1700
1698 xfrm_policy_walk_done(walk, net); 1701 xfrm_policy_walk_done(walk, net);
1699 return 0; 1702 return 0;
1700} 1703}
1701 1704
1705static int xfrm_dump_policy_start(struct netlink_callback *cb)
1706{
1707 struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
1708
1709 BUILD_BUG_ON(sizeof(*walk) > sizeof(cb->args));
1710
1711 xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY);
1712 return 0;
1713}
1714
1702static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) 1715static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb)
1703{ 1716{
1704 struct net *net = sock_net(skb->sk); 1717 struct net *net = sock_net(skb->sk);
1705 struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; 1718 struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
1706 struct xfrm_dump_info info; 1719 struct xfrm_dump_info info;
1707 1720
1708 BUILD_BUG_ON(sizeof(struct xfrm_policy_walk) >
1709 sizeof(cb->args) - sizeof(cb->args[0]));
1710
1711 info.in_skb = cb->skb; 1721 info.in_skb = cb->skb;
1712 info.out_skb = skb; 1722 info.out_skb = skb;
1713 info.nlmsg_seq = cb->nlh->nlmsg_seq; 1723 info.nlmsg_seq = cb->nlh->nlmsg_seq;
1714 info.nlmsg_flags = NLM_F_MULTI; 1724 info.nlmsg_flags = NLM_F_MULTI;
1715 1725
1716 if (!cb->args[0]) {
1717 cb->args[0] = 1;
1718 xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY);
1719 }
1720
1721 (void) xfrm_policy_walk(net, walk, dump_one_policy, &info); 1726 (void) xfrm_policy_walk(net, walk, dump_one_policy, &info);
1722 1727
1723 return skb->len; 1728 return skb->len;
@@ -1850,9 +1855,9 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
1850 return 0; 1855 return 0;
1851} 1856}
1852 1857
1853static inline size_t xfrm_aevent_msgsize(struct xfrm_state *x) 1858static inline unsigned int xfrm_aevent_msgsize(struct xfrm_state *x)
1854{ 1859{
1855 size_t replay_size = x->replay_esn ? 1860 unsigned int replay_size = x->replay_esn ?
1856 xfrm_replay_state_esn_len(x->replay_esn) : 1861 xfrm_replay_state_esn_len(x->replay_esn) :
1857 sizeof(struct xfrm_replay_state); 1862 sizeof(struct xfrm_replay_state);
1858 1863
@@ -1957,8 +1962,9 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
1957 c.seq = nlh->nlmsg_seq; 1962 c.seq = nlh->nlmsg_seq;
1958 c.portid = nlh->nlmsg_pid; 1963 c.portid = nlh->nlmsg_pid;
1959 1964
1960 if (build_aevent(r_skb, x, &c) < 0) 1965 err = build_aevent(r_skb, x, &c);
1961 BUG(); 1966 BUG_ON(err < 0);
1967
1962 err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid); 1968 err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid);
1963 spin_unlock_bh(&x->lock); 1969 spin_unlock_bh(&x->lock);
1964 xfrm_state_put(x); 1970 xfrm_state_put(x);
@@ -2321,8 +2327,8 @@ static int copy_to_user_kmaddress(const struct xfrm_kmaddress *k, struct sk_buff
2321 return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk); 2327 return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk);
2322} 2328}
2323 2329
2324static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma, 2330static inline unsigned int xfrm_migrate_msgsize(int num_migrate, int with_kma,
2325 int with_encp) 2331 int with_encp)
2326{ 2332{
2327 return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id)) 2333 return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id))
2328 + (with_kma ? nla_total_size(sizeof(struct xfrm_kmaddress)) : 0) 2334 + (with_kma ? nla_total_size(sizeof(struct xfrm_kmaddress)) : 0)
@@ -2385,6 +2391,7 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
2385{ 2391{
2386 struct net *net = &init_net; 2392 struct net *net = &init_net;
2387 struct sk_buff *skb; 2393 struct sk_buff *skb;
2394 int err;
2388 2395
2389 skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k, !!encap), 2396 skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k, !!encap),
2390 GFP_ATOMIC); 2397 GFP_ATOMIC);
@@ -2392,8 +2399,8 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
2392 return -ENOMEM; 2399 return -ENOMEM;
2393 2400
2394 /* build migrate */ 2401 /* build migrate */
2395 if (build_migrate(skb, m, num_migrate, k, sel, encap, dir, type) < 0) 2402 err = build_migrate(skb, m, num_migrate, k, sel, encap, dir, type);
2396 BUG(); 2403 BUG_ON(err < 0);
2397 2404
2398 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE); 2405 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE);
2399} 2406}
@@ -2473,6 +2480,7 @@ static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
2473 2480
2474static const struct xfrm_link { 2481static const struct xfrm_link {
2475 int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); 2482 int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);
2483 int (*start)(struct netlink_callback *);
2476 int (*dump)(struct sk_buff *, struct netlink_callback *); 2484 int (*dump)(struct sk_buff *, struct netlink_callback *);
2477 int (*done)(struct netlink_callback *); 2485 int (*done)(struct netlink_callback *);
2478 const struct nla_policy *nla_pol; 2486 const struct nla_policy *nla_pol;
@@ -2486,6 +2494,7 @@ static const struct xfrm_link {
2486 [XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_add_policy }, 2494 [XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_add_policy },
2487 [XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy }, 2495 [XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy },
2488 [XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy, 2496 [XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy,
2497 .start = xfrm_dump_policy_start,
2489 .dump = xfrm_dump_policy, 2498 .dump = xfrm_dump_policy,
2490 .done = xfrm_dump_policy_done }, 2499 .done = xfrm_dump_policy_done },
2491 [XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = { .doit = xfrm_alloc_userspi }, 2500 [XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = { .doit = xfrm_alloc_userspi },
@@ -2538,6 +2547,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
2538 2547
2539 { 2548 {
2540 struct netlink_dump_control c = { 2549 struct netlink_dump_control c = {
2550 .start = link->start,
2541 .dump = link->dump, 2551 .dump = link->dump,
2542 .done = link->done, 2552 .done = link->done,
2543 }; 2553 };
@@ -2566,7 +2576,7 @@ static void xfrm_netlink_rcv(struct sk_buff *skb)
2566 mutex_unlock(&net->xfrm.xfrm_cfg_mutex); 2576 mutex_unlock(&net->xfrm.xfrm_cfg_mutex);
2567} 2577}
2568 2578
2569static inline size_t xfrm_expire_msgsize(void) 2579static inline unsigned int xfrm_expire_msgsize(void)
2570{ 2580{
2571 return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) 2581 return NLMSG_ALIGN(sizeof(struct xfrm_user_expire))
2572 + nla_total_size(sizeof(struct xfrm_mark)); 2582 + nla_total_size(sizeof(struct xfrm_mark));
@@ -2617,13 +2627,14 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event
2617{ 2627{
2618 struct net *net = xs_net(x); 2628 struct net *net = xs_net(x);
2619 struct sk_buff *skb; 2629 struct sk_buff *skb;
2630 int err;
2620 2631
2621 skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC); 2632 skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC);
2622 if (skb == NULL) 2633 if (skb == NULL)
2623 return -ENOMEM; 2634 return -ENOMEM;
2624 2635
2625 if (build_aevent(skb, x, c) < 0) 2636 err = build_aevent(skb, x, c);
2626 BUG(); 2637 BUG_ON(err < 0);
2627 2638
2628 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_AEVENTS); 2639 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_AEVENTS);
2629} 2640}
@@ -2654,9 +2665,9 @@ static int xfrm_notify_sa_flush(const struct km_event *c)
2654 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA); 2665 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA);
2655} 2666}
2656 2667
2657static inline size_t xfrm_sa_len(struct xfrm_state *x) 2668static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
2658{ 2669{
2659 size_t l = 0; 2670 unsigned int l = 0;
2660 if (x->aead) 2671 if (x->aead)
2661 l += nla_total_size(aead_len(x->aead)); 2672 l += nla_total_size(aead_len(x->aead));
2662 if (x->aalg) { 2673 if (x->aalg) {
@@ -2701,8 +2712,9 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c)
2701 struct xfrm_usersa_id *id; 2712 struct xfrm_usersa_id *id;
2702 struct nlmsghdr *nlh; 2713 struct nlmsghdr *nlh;
2703 struct sk_buff *skb; 2714 struct sk_buff *skb;
2704 int len = xfrm_sa_len(x); 2715 unsigned int len = xfrm_sa_len(x);
2705 int headlen, err; 2716 unsigned int headlen;
2717 int err;
2706 2718
2707 headlen = sizeof(*p); 2719 headlen = sizeof(*p);
2708 if (c->event == XFRM_MSG_DELSA) { 2720 if (c->event == XFRM_MSG_DELSA) {
@@ -2776,8 +2788,8 @@ static int xfrm_send_state_notify(struct xfrm_state *x, const struct km_event *c
2776 2788
2777} 2789}
2778 2790
2779static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x, 2791static inline unsigned int xfrm_acquire_msgsize(struct xfrm_state *x,
2780 struct xfrm_policy *xp) 2792 struct xfrm_policy *xp)
2781{ 2793{
2782 return NLMSG_ALIGN(sizeof(struct xfrm_user_acquire)) 2794 return NLMSG_ALIGN(sizeof(struct xfrm_user_acquire))
2783 + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) 2795 + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr)
@@ -2829,13 +2841,14 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
2829{ 2841{
2830 struct net *net = xs_net(x); 2842 struct net *net = xs_net(x);
2831 struct sk_buff *skb; 2843 struct sk_buff *skb;
2844 int err;
2832 2845
2833 skb = nlmsg_new(xfrm_acquire_msgsize(x, xp), GFP_ATOMIC); 2846 skb = nlmsg_new(xfrm_acquire_msgsize(x, xp), GFP_ATOMIC);
2834 if (skb == NULL) 2847 if (skb == NULL)
2835 return -ENOMEM; 2848 return -ENOMEM;
2836 2849
2837 if (build_acquire(skb, x, xt, xp) < 0) 2850 err = build_acquire(skb, x, xt, xp);
2838 BUG(); 2851 BUG_ON(err < 0);
2839 2852
2840 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_ACQUIRE); 2853 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_ACQUIRE);
2841} 2854}
@@ -2900,7 +2913,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
2900 return xp; 2913 return xp;
2901} 2914}
2902 2915
2903static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp) 2916static inline unsigned int xfrm_polexpire_msgsize(struct xfrm_policy *xp)
2904{ 2917{
2905 return NLMSG_ALIGN(sizeof(struct xfrm_user_polexpire)) 2918 return NLMSG_ALIGN(sizeof(struct xfrm_user_polexpire))
2906 + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) 2919 + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr)
@@ -2944,26 +2957,28 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct
2944{ 2957{
2945 struct net *net = xp_net(xp); 2958 struct net *net = xp_net(xp);
2946 struct sk_buff *skb; 2959 struct sk_buff *skb;
2960 int err;
2947 2961
2948 skb = nlmsg_new(xfrm_polexpire_msgsize(xp), GFP_ATOMIC); 2962 skb = nlmsg_new(xfrm_polexpire_msgsize(xp), GFP_ATOMIC);
2949 if (skb == NULL) 2963 if (skb == NULL)
2950 return -ENOMEM; 2964 return -ENOMEM;
2951 2965
2952 if (build_polexpire(skb, xp, dir, c) < 0) 2966 err = build_polexpire(skb, xp, dir, c);
2953 BUG(); 2967 BUG_ON(err < 0);
2954 2968
2955 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE); 2969 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE);
2956} 2970}
2957 2971
2958static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) 2972static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c)
2959{ 2973{
2960 int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); 2974 unsigned int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
2961 struct net *net = xp_net(xp); 2975 struct net *net = xp_net(xp);
2962 struct xfrm_userpolicy_info *p; 2976 struct xfrm_userpolicy_info *p;
2963 struct xfrm_userpolicy_id *id; 2977 struct xfrm_userpolicy_id *id;
2964 struct nlmsghdr *nlh; 2978 struct nlmsghdr *nlh;
2965 struct sk_buff *skb; 2979 struct sk_buff *skb;
2966 int headlen, err; 2980 unsigned int headlen;
2981 int err;
2967 2982
2968 headlen = sizeof(*p); 2983 headlen = sizeof(*p);
2969 if (c->event == XFRM_MSG_DELPOLICY) { 2984 if (c->event == XFRM_MSG_DELPOLICY) {
@@ -3070,7 +3085,7 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct
3070 3085
3071} 3086}
3072 3087
3073static inline size_t xfrm_report_msgsize(void) 3088static inline unsigned int xfrm_report_msgsize(void)
3074{ 3089{
3075 return NLMSG_ALIGN(sizeof(struct xfrm_user_report)); 3090 return NLMSG_ALIGN(sizeof(struct xfrm_user_report));
3076} 3091}
@@ -3104,18 +3119,19 @@ static int xfrm_send_report(struct net *net, u8 proto,
3104 struct xfrm_selector *sel, xfrm_address_t *addr) 3119 struct xfrm_selector *sel, xfrm_address_t *addr)
3105{ 3120{
3106 struct sk_buff *skb; 3121 struct sk_buff *skb;
3122 int err;
3107 3123
3108 skb = nlmsg_new(xfrm_report_msgsize(), GFP_ATOMIC); 3124 skb = nlmsg_new(xfrm_report_msgsize(), GFP_ATOMIC);
3109 if (skb == NULL) 3125 if (skb == NULL)
3110 return -ENOMEM; 3126 return -ENOMEM;
3111 3127
3112 if (build_report(skb, proto, sel, addr) < 0) 3128 err = build_report(skb, proto, sel, addr);
3113 BUG(); 3129 BUG_ON(err < 0);
3114 3130
3115 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT); 3131 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT);
3116} 3132}
3117 3133
3118static inline size_t xfrm_mapping_msgsize(void) 3134static inline unsigned int xfrm_mapping_msgsize(void)
3119{ 3135{
3120 return NLMSG_ALIGN(sizeof(struct xfrm_user_mapping)); 3136 return NLMSG_ALIGN(sizeof(struct xfrm_user_mapping));
3121} 3137}
@@ -3151,6 +3167,7 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
3151{ 3167{
3152 struct net *net = xs_net(x); 3168 struct net *net = xs_net(x);
3153 struct sk_buff *skb; 3169 struct sk_buff *skb;
3170 int err;
3154 3171
3155 if (x->id.proto != IPPROTO_ESP) 3172 if (x->id.proto != IPPROTO_ESP)
3156 return -EINVAL; 3173 return -EINVAL;
@@ -3162,8 +3179,8 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
3162 if (skb == NULL) 3179 if (skb == NULL)
3163 return -ENOMEM; 3180 return -ENOMEM;
3164 3181
3165 if (build_mapping(skb, x, ipaddr, sport) < 0) 3182 err = build_mapping(skb, x, ipaddr, sport);
3166 BUG(); 3183 BUG_ON(err < 0);
3167 3184
3168 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MAPPING); 3185 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MAPPING);
3169} 3186}